root/crmd/lrm.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lrm_connection_destroy
  2. make_stop_id
  3. copy_instance_keys
  4. copy_meta_keys
  5. history_remove_recurring_op
  6. history_free_recurring_ops
  7. history_free
  8. update_history_cache
  9. send_task_ok_ack
  10. lrm_op_callback
  11. do_lrm_control
  12. lrm_state_verify_stopped
  13. build_parameter_list
  14. append_restart_list
  15. append_secure_list
  16. build_operation_update
  17. is_rsc_active
  18. build_active_RAs
  19. do_lrm_query_internal
  20. do_lrm_query
  21. notify_deleted
  22. lrm_remove_deleted_rsc
  23. lrm_remove_deleted_op
  24. delete_rsc_status
  25. delete_rsc_entry
  26. erase_lrm_history_by_op
  27. erase_lrm_history_by_id
  28. last_failed_matches_op
  29. lrm_clear_last_failure
  30. cancel_op
  31. cancel_action_by_key
  32. cancel_op_key
  33. get_lrm_resource
  34. delete_resource
  35. get_fake_call_id
  36. force_reprobe
  37. synthesize_lrmd_failure
  38. do_lrm_invoke
  39. construct_op
  40. send_direct_ack
  41. verify_stopped
  42. stop_recurring_action_by_rsc
  43. stop_recurring_actions
  44. record_pending_op
  45. do_lrm_rsc_op
  46. cib_rsc_callback
  47. do_update_resource
  48. do_lrm_event
  49. unescape_newlines
  50. process_lrm_event

   1 /*
   2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This software is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 
  19 #include <crm_internal.h>
  20 
  21 #include <sys/param.h>
  22 #include <sys/types.h>
  23 #include <sys/wait.h>
  24 
  25 #include <crm/crm.h>
  26 #include <crm/services.h>
  27 
  28 #include <crm/msg_xml.h>
  29 #include <crm/common/xml.h>
  30 
  31 #include <crmd.h>
  32 #include <crmd_fsa.h>
  33 #include <crmd_messages.h>
  34 #include <crmd_callbacks.h>
  35 #include <crmd_lrm.h>
  36 #include <regex.h>
  37 #include <crm/pengine/rules.h>
  38 
  39 #define START_DELAY_THRESHOLD 5 * 60 * 1000
  40 #define MAX_LRM_REG_FAILS 30
  41 
  42 #define s_if_plural(i) (((i) == 1)? "" : "s")
  43 
  44 struct delete_event_s {
  45     int rc;
  46     const char *rsc;
  47     lrm_state_t *lrm_state;
  48 };
  49 
  50 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
  51 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
  52 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
  53 static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
  54                              const char *user_name);
  55 
  56 static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op,
  57                                        const char *rsc_id, const char *operation);
  58 static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation,
  59                           xmlNode * msg, xmlNode * request);
  60 
  61 void send_direct_ack(const char *to_host, const char *to_sys,
  62                      lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id);
  63 
  64 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
  65                                          int log_level);
  66 static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op);
  67 
  68 static void
  69 lrm_connection_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  70 {
  71     if (is_set(fsa_input_register, R_LRM_CONNECTED)) {
  72         crm_crit("LRM Connection failed");
  73         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
  74         clear_bit(fsa_input_register, R_LRM_CONNECTED);
  75 
  76     } else {
  77         crm_info("LRM Connection disconnected");
  78     }
  79 
  80 }
  81 
  82 static char *
  83 make_stop_id(const char *rsc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
  84 {
  85     char *op_id = NULL;
  86 
  87     op_id = calloc(1, strlen(rsc) + 34);
  88     if (op_id != NULL) {
  89         snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id);
  90     }
  91     return op_id;
  92 }
  93 
  94 static void
  95 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  96 {
  97     if (strstr(key, CRM_META "_") == NULL) {
  98         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
  99     }
 100 }
 101 
 102 static void
 103 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 104 {
 105     if (strstr(key, CRM_META "_") != NULL) {
 106         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
 107     }
 108 }
 109 
 110 /*!
 111  * \internal
 112  * \brief Remove a recurring operation from a resource's history
 113  *
 114  * \param[in,out] history  Resource history to modify
 115  * \param[in]     op       Operation to remove
 116  *
 117  * \return TRUE if the operation was found and removed, FALSE otherwise
 118  */
 119 static gboolean
 120 history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122     GList *iter;
 123 
 124     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 125         lrmd_event_data_t *existing = iter->data;
 126 
 127         if ((op->interval == existing->interval)
 128             && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE)
 129             && safe_str_eq(op->op_type, existing->op_type)) {
 130 
 131             history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
 132             lrmd_free_event(existing);
 133             return TRUE;
 134         }
 135     }
 136     return FALSE;
 137 }
 138 
 139 /*!
 140  * \internal
 141  * \brief Free all recurring operations in resource history
 142  *
 143  * \param[in,out] history  Resource history to modify
 144  */
 145 static void
 146 history_free_recurring_ops(rsc_history_t *history)
     /* [previous][next][first][last][top][bottom][index][help] */
 147 {
 148     GList *iter;
 149 
 150     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 151         lrmd_free_event(iter->data);
 152     }
 153     g_list_free(history->recurring_op_list);
 154     history->recurring_op_list = NULL;
 155 }
 156 
 157 /*!
 158  * \internal
 159  * \brief Free resource history
 160  *
 161  * \param[in,out] history  Resource history to free
 162  */
 163 void
 164 history_free(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166     rsc_history_t *history = (rsc_history_t*)data;
 167 
 168     if (history->stop_params) {
 169         g_hash_table_destroy(history->stop_params);
 170     }
 171 
 172     /* Don't need to free history->rsc.id because it's set to history->id */
 173     free(history->rsc.type);
 174     free(history->rsc.class);
 175     free(history->rsc.provider);
 176 
 177     lrmd_free_event(history->failed);
 178     lrmd_free_event(history->last);
 179     free(history->id);
 180     history_free_recurring_ops(history);
 181     free(history);
 182 }
 183 
 184 static void
 185 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 186 {
 187     int target_rc = 0;
 188     rsc_history_t *entry = NULL;
 189 
 190     if (op->rsc_deleted) {
 191         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
 192         delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL);
 193         return;
 194     }
 195 
 196     if (safe_str_eq(op->op_type, RSC_NOTIFY)) {
 197         return;
 198     }
 199 
 200     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 201 
 202     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
 203     if (entry == NULL && rsc) {
 204         entry = calloc(1, sizeof(rsc_history_t));
 205         entry->id = strdup(op->rsc_id);
 206         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 207 
 208         entry->rsc.id = entry->id;
 209         entry->rsc.type = strdup(rsc->type);
 210         entry->rsc.class = strdup(rsc->class);
 211         if (rsc->provider) {
 212             entry->rsc.provider = strdup(rsc->provider);
 213         } else {
 214             entry->rsc.provider = NULL;
 215         }
 216 
 217     } else if (entry == NULL) {
 218         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
 219         return;
 220     }
 221 
 222     entry->last_callid = op->call_id;
 223     target_rc = rsc_op_expected_rc(op);
 224     if (op->op_status == PCMK_LRM_OP_CANCELLED) {
 225         if (op->interval > 0) {
 226             crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type,
 227                       op->interval);
 228             history_remove_recurring_op(entry, op);
 229             return;
 230         } else {
 231             crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval,
 232                       op->rc, op->op_status);
 233         }
 234 
 235     } else if (did_rsc_op_fail(op, target_rc)) {
 236         /* Store failed monitors here, otherwise the block below will cause them
 237          * to be forgotten when a stop happens.
 238          */
 239         if (entry->failed) {
 240             lrmd_free_event(entry->failed);
 241         }
 242         entry->failed = lrmd_copy_event(op);
 243 
 244     } else if (op->interval == 0) {
 245         if (entry->last) {
 246             lrmd_free_event(entry->last);
 247         }
 248         entry->last = lrmd_copy_event(op);
 249 
 250         if (op->params &&
 251             (safe_str_eq(CRMD_ACTION_START, op->op_type) ||
 252              safe_str_eq("reload", op->op_type) ||
 253              safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) {
 254 
 255             if (entry->stop_params) {
 256                 g_hash_table_destroy(entry->stop_params);
 257             }
 258             entry->stop_params = crm_str_table_new();
 259 
 260             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
 261         }
 262     }
 263 
 264     if (op->interval > 0) {
 265         /* Ensure there are no duplicates */
 266         history_remove_recurring_op(entry, op);
 267 
 268         crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval);
 269         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 270 
 271     } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) {
 272         crm_trace("Dropping %d recurring ops because of: %s_%s_%d",
 273                   g_list_length(entry->recurring_op_list), op->rsc_id,
 274                   op->op_type, op->interval);
 275         history_free_recurring_ops(entry);
 276     }
 277 }
 278 
 279 /*!
 280  * \internal
 281  * \brief Send a direct OK ack for a resource task
 282  *
 283  * \param[in] lrm_state  LRM connection
 284  * \param[in] input      Input message being ack'ed
 285  * \param[in] rsc_id     ID of affected resource
 286  * \param[in] rsc        Affected resource (if available)
 287  * \param[in] task       Operation task being ack'ed
 288  * \param[in] ack_host   Name of host to send ack to
 289  * \param[in] ack_sys    IPC system name to ack
 290  */
 291 static void
 292 send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 293                  const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task,
 294                  const char *ack_host, const char *ack_sys)
 295 {
 296     lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
 297 
 298     CRM_ASSERT(op != NULL);
 299     op->rc = PCMK_OCF_OK;
 300     op->op_status = PCMK_LRM_OP_DONE;
 301     send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id);
 302     lrmd_free_event(op);
 303 }
 304 
 305 void
 306 lrm_op_callback(lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 307 {
 308     const char *nodename = NULL;
 309     lrm_state_t *lrm_state = NULL;
 310 
 311     CRM_CHECK(op != NULL, return);
 312 
 313     /* determine the node name for this connection. */
 314     nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname;
 315 
 316     if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) {
 317         /* if this is the local lrmd ipc connection, set the right bits in the
 318          * crmd when the connection goes down */
 319         lrm_connection_destroy();
 320         return;
 321     } else if (op->type != lrmd_event_exec_complete) {
 322         /* we only need to process execution results */
 323         return;
 324     }
 325 
 326     lrm_state = lrm_state_find(nodename);
 327     CRM_ASSERT(lrm_state != NULL);
 328 
 329     process_lrm_event(lrm_state, op, NULL);
 330 }
 331 
 332 /*       A_LRM_CONNECT  */
 333 void
 334 do_lrm_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 335                enum crmd_fsa_cause cause,
 336                enum crmd_fsa_state cur_state,
 337                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 338 {
 339     /* This only pertains to local lrmd connections.  Remote connections are handled as
 340      * resources within the pengine.  Connecting and disconnecting from remote lrmd instances
 341      * handled differently than the local. */
 342 
 343     lrm_state_t *lrm_state = NULL;
 344 
 345     if(fsa_our_uname == NULL) {
 346         return; /* Nothing to do */
 347     }
 348     lrm_state = lrm_state_find_or_create(fsa_our_uname);
 349     if (lrm_state == NULL) {
 350         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 351         return;
 352     }
 353 
 354     if (action & A_LRM_DISCONNECT) {
 355         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
 356             if (action == A_LRM_DISCONNECT) {
 357                 crmd_fsa_stall(FALSE);
 358                 return;
 359             }
 360         }
 361 
 362         clear_bit(fsa_input_register, R_LRM_CONNECTED);
 363         crm_info("Disconnecting from the LRM");
 364         lrm_state_disconnect(lrm_state);
 365         lrm_state_reset_tables(lrm_state, FALSE);
 366         crm_notice("Disconnected from the LRM");
 367     }
 368 
 369     if (action & A_LRM_CONNECT) {
 370         int ret = pcmk_ok;
 371 
 372         crm_debug("Connecting to the LRM");
 373         ret = lrm_state_ipc_connect(lrm_state);
 374 
 375         if (ret != pcmk_ok) {
 376             if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
 377                 crm_warn("Failed to connect to the LRM %d time%s (%d max)",
 378                          lrm_state->num_lrm_register_fails,
 379                          s_if_plural(lrm_state->num_lrm_register_fails),
 380                          MAX_LRM_REG_FAILS);
 381 
 382                 crm_timer_start(wait_timer);
 383                 crmd_fsa_stall(FALSE);
 384                 return;
 385             }
 386         }
 387 
 388         if (ret != pcmk_ok) {
 389             crm_err("Failed to connect to the LRM the max allowed %d time%s",
 390                     lrm_state->num_lrm_register_fails,
 391                     s_if_plural(lrm_state->num_lrm_register_fails));
 392             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 393             return;
 394         }
 395 
 396         set_bit(fsa_input_register, R_LRM_CONNECTED);
 397         crm_info("LRM connection established");
 398     }
 399 
 400     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
 401         crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__);
 402     }
 403 }
 404 
 405 static gboolean
 406 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408     int counter = 0;
 409     gboolean rc = TRUE;
 410     const char *when = "lrm disconnect";
 411 
 412     GHashTableIter gIter;
 413     const char *key = NULL;
 414     rsc_history_t *entry = NULL;
 415     struct recurring_op_s *pending = NULL;
 416 
 417     crm_debug("Checking for active resources before exit");
 418 
 419     if (cur_state == S_TERMINATE) {
 420         log_level = LOG_ERR;
 421         when = "shutdown";
 422 
 423     } else if (is_set(fsa_input_register, R_SHUTDOWN)) {
 424         when = "shutdown... waiting";
 425     }
 426 
 427     if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) {
 428         guint removed = g_hash_table_foreach_remove(
 429             lrm_state->pending_ops, stop_recurring_actions, lrm_state);
 430         guint nremaining = g_hash_table_size(lrm_state->pending_ops);
 431 
 432         if (removed || nremaining) {
 433             crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
 434                        removed, s_if_plural(removed), when, nremaining);
 435         }
 436     }
 437 
 438     if (lrm_state->pending_ops) {
 439         g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
 440         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
 441             /* Ignore recurring actions in the shutdown calculations */
 442             if (pending->interval == 0) {
 443                 counter++;
 444             }
 445         }
 446     }
 447 
 448     if (counter > 0) {
 449         do_crm_log(log_level, "%d pending LRM operation%s at %s",
 450                    counter, s_if_plural(counter), when);
 451 
 452         if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) {
 453             g_hash_table_iter_init(&gIter, lrm_state->pending_ops);
 454             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
 455                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
 456             }
 457 
 458         } else {
 459             rc = FALSE;
 460         }
 461         return rc;
 462     }
 463 
 464     if (lrm_state->resource_history == NULL) {
 465         return rc;
 466     }
 467 
 468     if (is_set(fsa_input_register, R_SHUTDOWN)) {
 469         /* At this point we're not waiting, we're just shutting down */
 470         when = "shutdown";
 471     }
 472 
 473     counter = 0;
 474     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
 475     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
 476         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
 477             continue;
 478         }
 479 
 480         counter++;
 481         if (log_level == LOG_ERR) {
 482             crm_info("Found %s active at %s", entry->id, when);
 483         } else {
 484             crm_trace("Found %s active at %s", entry->id, when);
 485         }
 486         if (lrm_state->pending_ops) {
 487             GHashTableIter hIter;
 488 
 489             g_hash_table_iter_init(&hIter, lrm_state->pending_ops);
 490             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
 491                 if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) {
 492                     crm_notice("%sction %s (%s) incomplete at %s",
 493                                pending->interval == 0 ? "A" : "Recurring a",
 494                                key, pending->op_key, when);
 495                 }
 496             }
 497         }
 498     }
 499 
 500     if (counter) {
 501         crm_err("%d resource%s active at %s",
 502                 counter, (counter == 1)? " was" : "s were", when);
 503     }
 504 
 505     return rc;
 506 }
 507 
 508 static char *
 509 build_parameter_list(const lrmd_event_data_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 510                      const struct ra_metadata_s *metadata,
 511                      xmlNode *result, enum ra_param_flags_e param_type,
 512                      bool invert_for_xml)
 513 {
 514     int len = 0;
 515     int max = 0;
 516     char *list = NULL;
 517     GList *iter = NULL;
 518 
 519     /* Newer resource agents support the "private" parameter attribute to
 520      * indicate sensitive parameters. For backward compatibility with older
 521      * agents, this list is used if the agent doesn't specify any as "private".
 522      */
 523     const char *secure_terms[] = {
 524         "password",
 525         "passwd",
 526         "user",
 527     };
 528 
 529     if (is_not_set(metadata->ra_flags, ra_uses_private)
 530         && (param_type == ra_param_private)) {
 531 
 532         max = DIMOF(secure_terms);
 533     }
 534 
 535     for (iter = metadata->ra_params; iter != NULL; iter = iter->next) {
 536         struct ra_param_s *param = (struct ra_param_s *) iter->data;
 537         bool accept = FALSE;
 538 
 539         if (is_set(param->rap_flags, param_type)) {
 540             accept = TRUE;
 541 
 542         } else if (max) {
 543             for (int lpc = 0; lpc < max; lpc++) {
 544                 if (safe_str_eq(secure_terms[lpc], param->rap_name)) {
 545                     accept = TRUE;
 546                     break;
 547                 }
 548             }
 549         }
 550 
 551         if (accept) {
 552             int start = len;
 553 
 554             crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type));
 555 
 556             len += strlen(param->rap_name) + 2; // include spaces around
 557             list = realloc_safe(list, len + 1); // include null terminator
 558 
 559             // spaces before and after make parsing simpler
 560             sprintf(list + start, " %s ", param->rap_name);
 561 
 562         } else {
 563             crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type));
 564         }
 565 
 566         if (result && (invert_for_xml? !accept : accept)) {
 567             const char *v = g_hash_table_lookup(op->params, param->rap_name);
 568 
 569             if (v != NULL) {
 570                 crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v);
 571                 crm_xml_add(result, param->rap_name, v);
 572             }
 573         }
 574     }
 575 
 576     return list;
 577 }
 578 
 579 static void
 580 append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
     /* [previous][next][first][last][top][bottom][index][help] */
 581                     xmlNode *update, const char *version)
 582 {
 583     char *list = NULL;
 584     char *digest = NULL;
 585     xmlNode *restart = NULL;
 586 
 587     CRM_LOG_ASSERT(op->params != NULL);
 588 
 589     if (op->interval > 0) {
 590         /* monitors are not reloadable */
 591         return;
 592     }
 593 
 594     if (is_set(metadata->ra_flags, ra_supports_reload)) {
 595         restart = create_xml_node(NULL, XML_TAG_PARAMS);
 596         /* Add any parameters with unique="1" to the "op-force-restart" list.
 597          *
 598          * (Currently, we abuse "unique=0" to indicate reloadability. This is
 599          * nonstandard and should eventually be replaced once the OCF standard
 600          * is updated with something better.)
 601          */
 602         list = build_parameter_list(op, metadata, restart, ra_param_unique,
 603                                     FALSE);
 604 
 605     } else {
 606         /* Resource does not support reloads */
 607         return;
 608     }
 609 
 610     digest = calculate_operation_digest(restart, version);
 611     /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload,
 612      * no matter if it actually supports any parameters with unique="1"). */
 613     crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: "");
 614     crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest);
 615 
 616     crm_trace("%s: %s, %s", op->rsc_id, digest, list);
 617     crm_log_xml_trace(restart, "restart digest source");
 618 
 619     free_xml(restart);
 620     free(digest);
 621     free(list);
 622 }
 623 
 624 static void
 625 append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata,
     /* [previous][next][first][last][top][bottom][index][help] */
 626                    xmlNode *update, const char *version)
 627 {
 628     char *list = NULL;
 629     char *digest = NULL;
 630     xmlNode *secure = NULL;
 631 
 632     CRM_LOG_ASSERT(op->params != NULL);
 633 
 634     /*
 635      * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the
 636      * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on
 637      * the insecure ones
 638      */
 639     secure = create_xml_node(NULL, XML_TAG_PARAMS);
 640     list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE);
 641 
 642     if (list != NULL) {
 643         digest = calculate_operation_digest(secure, version);
 644         crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list);
 645         crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest);
 646 
 647         crm_trace("%s: %s, %s", op->rsc_id, digest, list);
 648         crm_log_xml_trace(secure, "secure digest source");
 649     } else {
 650         crm_trace("%s: no secure parameters", op->rsc_id);
 651     }
 652 
 653     free_xml(secure);
 654     free(digest);
 655     free(list);
 656 }
 657 
 658 static gboolean
 659 build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op,
     /* [previous][next][first][last][top][bottom][index][help] */
 660                        const char *node_name, const char *src)
 661 {
 662     int target_rc = 0;
 663     xmlNode *xml_op = NULL;
 664     struct ra_metadata_s *metadata = NULL;
 665     const char *caller_version = NULL;
 666     lrm_state_t *lrm_state = NULL;
 667 
 668     if (op == NULL) {
 669         return FALSE;
 670     }
 671 
 672     target_rc = rsc_op_expected_rc(op);
 673 
 674     /* there is a small risk in formerly mixed clusters that it will
 675      * be sub-optimal.
 676      *
 677      * however with our upgrade policy, the update we send should
 678      * still be completely supported anyway
 679      */
 680     caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION);
 681     CRM_LOG_ASSERT(caller_version != NULL);
 682 
 683     if(caller_version == NULL) {
 684         caller_version = CRM_FEATURE_SET;
 685     }
 686 
 687     crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
 688     xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG);
 689     if (xml_op == NULL) {
 690         return TRUE;
 691     }
 692 
 693     if ((rsc == NULL) || (op == NULL) || (op->params == NULL)
 694         || !crm_op_needs_metadata(rsc->class, op->op_type)) {
 695 
 696         crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)",
 697                   op->op_type, op->rsc_id, op->params, rsc);
 698         return TRUE;
 699     }
 700 
 701     lrm_state = lrm_state_find(node_name);
 702     if (lrm_state == NULL) {
 703         crm_warn("Cannot calculate digests for operation %s_%s_%d because we have no LRM connection to %s",
 704                  op->rsc_id, op->op_type, op->interval, node_name);
 705         return TRUE;
 706     }
 707 
 708     metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
 709     if (metadata == NULL) {
 710         /* For now, we always collect resource agent meta-data via a local,
 711          * synchronous, direct execution of the agent. This has multiple issues:
 712          * the lrmd should execute agents, not the crmd; meta-data for
 713          * Pacemaker Remote nodes should be collected on those nodes, not
 714          * locally; and the meta-data call shouldn't eat into the timeout of the
 715          * real action being performed.
 716          *
 717          * These issues are planned to be addressed by having the PE schedule
 718          * a meta-data cache check at the beginning of each transition. Once
 719          * that is working, this block will only be a fallback in case the
 720          * initial collection fails.
 721          */
 722         char *metadata_str = NULL;
 723 
 724         int rc = lrm_state_get_metadata(lrm_state, rsc->class,
 725                                         rsc->provider, rsc->type,
 726                                         &metadata_str, 0);
 727 
 728         if (rc != pcmk_ok) {
 729             crm_warn("Failed to get metadata for %s (%s:%s:%s)",
 730                      rsc->id, rsc->class, rsc->provider, rsc->type);
 731             return TRUE;
 732         }
 733 
 734         metadata = metadata_cache_update(lrm_state->metadata_cache, rsc,
 735                                          metadata_str);
 736         free(metadata_str);
 737         if (metadata == NULL) {
 738             crm_warn("Failed to update metadata for %s (%s:%s:%s)",
 739                      rsc->id, rsc->class, rsc->provider, rsc->type);
 740             return TRUE;
 741         }
 742     }
 743 
 744 #if ENABLE_VERSIONED_ATTRS
 745     crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version);
 746 #endif
 747 
 748     crm_trace("Including additional digests for %s::%s:%s", rsc->class, rsc->provider, rsc->type);
 749     append_restart_list(op, metadata, xml_op, caller_version);
 750     append_secure_list(op, metadata, xml_op, caller_version);
 751 
 752     return TRUE;
 753 }
 754 
 755 static gboolean
 756 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758     rsc_history_t *entry = NULL;
 759 
 760     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 761     if (entry == NULL || entry->last == NULL) {
 762         return FALSE;
 763     }
 764 
 765     crm_trace("Processing %s: %s.%d=%d",
 766               rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc);
 767     if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) {
 768         return FALSE;
 769 
 770     } else if (entry->last->rc == PCMK_OCF_OK
 771                && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) {
 772         /* a stricter check is too complex...
 773          * leave that to the PE
 774          */
 775         return FALSE;
 776 
 777     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
 778         return FALSE;
 779 
 780     } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) {
 781         /* Badly configured resources can't be reliably stopped */
 782         return FALSE;
 783     }
 784 
 785     return TRUE;
 786 }
 787 
 788 static gboolean
 789 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791     GHashTableIter iter;
 792     rsc_history_t *entry = NULL;
 793 
 794     g_hash_table_iter_init(&iter, lrm_state->resource_history);
 795     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 796 
 797         GList *gIter = NULL;
 798         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 799 
 800         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
 801         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
 802         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class);
 803         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 804 
 805         if (entry->last && entry->last->params) {
 806             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
 807             if (container) {
 808                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
 809                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
 810             }
 811         }
 812         build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__);
 813         build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__);
 814         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
 815             build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__);
 816         }
 817     }
 818 
 819     return FALSE;
 820 }
 821 
 822 static xmlNode *
 823 do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 824 {
 825     xmlNode *xml_state = NULL;
 826     xmlNode *xml_data = NULL;
 827     xmlNode *rsc_list = NULL;
 828     crm_node_t *peer = NULL;
 829 
 830     peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
 831     CRM_CHECK(peer != NULL, return NULL);
 832 
 833     xml_state = create_node_state_update(peer, update_flags, NULL,
 834                                          __FUNCTION__);
 835     if (xml_state == NULL) {
 836         return NULL;
 837     }
 838 
 839     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
 840     crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
 841     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 842 
 843     /* Build a list of active (not always running) resources */
 844     build_active_RAs(lrm_state, rsc_list);
 845 
 846     crm_log_xml_trace(xml_state, "Current state of the LRM");
 847 
 848     return xml_state;
 849 }
 850 
 851 xmlNode *
 852 do_lrm_query(gboolean is_replace, const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 853 {
 854     lrm_state_t *lrm_state = lrm_state_find(node_name);
 855     xmlNode *xml_state;
 856 
 857     if (!lrm_state) {
 858         crm_err("Could not query lrm state for lrmd node %s", node_name);
 859         return NULL;
 860     }
 861     xml_state = do_lrm_query_internal(lrm_state,
 862                                       node_update_cluster|node_update_peer);
 863 
 864     if (xml_state) {
 865         /* @COMPAT DC <1.1.8
 866          * In case this function is called to generate a join confirmation to
 867          * send to the DC, force the current and expected join state to member.
 868          * This isn't necessary for newer DCs but is backward compatible.
 869          */
 870         crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
 871         crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
 872     }
 873 
 874     return xml_state;
 875 }
 876 
 877 static void
 878 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 879 {
 880     lrmd_event_data_t *op = NULL;
 881     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
 882     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 883 
 884     crm_info("Notifying %s on %s that %s was%s deleted",
 885              from_sys, (from_host? from_host : "localhost"), rsc_id,
 886              ((rc == pcmk_ok)? "" : " not"));
 887 
 888     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
 889     CRM_ASSERT(op != NULL);
 890 
 891     if (rc == pcmk_ok) {
 892         op->op_status = PCMK_LRM_OP_DONE;
 893         op->rc = PCMK_OCF_OK;
 894     } else {
 895         op->op_status = PCMK_LRM_OP_ERROR;
 896         op->rc = PCMK_OCF_UNKNOWN_ERROR;
 897     }
 898 
 899     send_direct_ack(from_host, from_sys, NULL, op, rsc_id);
 900     lrmd_free_event(op);
 901 
 902     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
 903         /* this isn't expected - trigger a new transition */
 904         time_t now = time(NULL);
 905         char *now_s = crm_itoa(now);
 906 
 907         crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id);
 908 
 909         update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
 910                              "last-lrm-refresh", now_s, FALSE, NULL, NULL);
 911 
 912         free(now_s);
 913     }
 914 }
 915 
 916 static gboolean
 917 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 918 {
 919     struct delete_event_s *event = user_data;
 920     struct pending_deletion_op_s *op = value;
 921 
 922     if (crm_str_eq(event->rsc, op->rsc, TRUE)) {
 923         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
 924         return TRUE;
 925     }
 926     return FALSE;
 927 }
 928 
 929 static gboolean
 930 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 931 {
 932     const char *rsc = user_data;
 933     struct recurring_op_s *pending = value;
 934 
 935     if (crm_str_eq(rsc, pending->rsc_id, TRUE)) {
 936         crm_info("Removing op %s:%d for deleted resource %s",
 937                  pending->op_key, pending->call_id, rsc);
 938         return TRUE;
 939     }
 940     return FALSE;
 941 }
 942 
 943 /*
 944  * Remove the rsc from the CIB
 945  *
 946  * Avoids refreshing the entire LRM section of this host
 947  */
 948 #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
 949 
 950 static int
 951 delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options,
     /* [previous][next][first][last][top][bottom][index][help] */
 952                   const char *user_name)
 953 {
 954     char *rsc_xpath = NULL;
 955     int max = 0;
 956     int rc = pcmk_ok;
 957 
 958     CRM_CHECK(rsc_id != NULL, return -ENXIO);
 959 
 960     max = strlen(rsc_template) + strlen(lrm_state->node_name) + strlen(rsc_id) + 1;
 961     rsc_xpath = calloc(1, max);
 962     snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id);
 963 
 964     rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath,
 965                          NULL, NULL, call_options | cib_xpath, user_name);
 966 
 967     free(rsc_xpath);
 968     return rc;
 969 }
 970 
 971 static void
 972 delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
 973                  GHashTableIter * rsc_gIter, int rc, const char *user_name)
 974 {
 975     struct delete_event_s event;
 976 
 977     CRM_CHECK(rsc_id != NULL, return);
 978 
 979     if (rc == pcmk_ok) {
 980         char *rsc_id_copy = strdup(rsc_id);
 981 
 982         if (rsc_gIter)
 983             g_hash_table_iter_remove(rsc_gIter);
 984         else
 985             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
 986         crm_debug("sync: Sending delete op for %s", rsc_id_copy);
 987         delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name);
 988 
 989         g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy);
 990         free(rsc_id_copy);
 991     }
 992 
 993     if (input) {
 994         notify_deleted(lrm_state, input, rsc_id, rc);
 995     }
 996 
 997     event.rc = rc;
 998     event.rsc = rsc_id;
 999     event.lrm_state = lrm_state;
1000     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
1001 }
1002 
1003 /*!
1004  * \internal
1005  * \brief Erase an LRM history entry from the CIB, given the operation data
1006  *
1007  * \param[in] lrm_state  LRM state of the desired node
1008  * \param[in] op         Operation whose history should be deleted
1009  */
1010 static void
1011 erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
1012 {
1013     xmlNode *xml_top = NULL;
1014 
1015     CRM_CHECK(op != NULL, return);
1016 
1017     xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP);
1018     crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id);
1019     crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data);
1020 
1021     if (op->interval > 0) {
1022         char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval);
1023 
1024         /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */
1025         crm_xml_add(xml_top, XML_ATTR_ID, op_id);
1026         free(op_id);
1027     }
1028 
1029     crm_debug("Erasing LRM resource history for %s_%s_%d (call=%d)",
1030               op->rsc_id, op->op_type, op->interval, op->call_id);
1031 
1032     fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top,
1033                                cib_quorum_override);
1034 
1035     crm_log_xml_trace(xml_top, "op:cancel");
1036     free_xml(xml_top);
1037 }
1038 
1039 /* Define xpath to find LRM resource history entry by node and resource */
1040 #define XPATH_HISTORY                                   \
1041     "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS              \
1042     "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']"  \
1043     "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES       \
1044     "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']"  \
1045     "/" XML_LRM_TAG_RSC_OP
1046 
1047 /* ... and also by operation key */
1048 #define XPATH_HISTORY_ID XPATH_HISTORY \
1049     "[@" XML_ATTR_ID "='%s']"
1050 
1051 /* ... and also by operation key and operation call ID */
1052 #define XPATH_HISTORY_CALL XPATH_HISTORY \
1053     "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']"
1054 
1055 /* ... and also by operation key and original operation key */
1056 #define XPATH_HISTORY_ORIG XPATH_HISTORY \
1057     "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']"
1058 
1059 /*!
1060  * \internal
1061  * \brief Erase an LRM history entry from the CIB, given operation identifiers
1062  *
1063  * \param[in] lrm_state  LRM state of the node to clear history for
1064  * \param[in] rsc_id     Name of resource to clear history for
1065  * \param[in] key        Operation key of operation to clear history for
1066  * \param[in] orig_op    If specified, delete only if it has this original op
1067  * \param[in] call_id    If specified, delete entry only if it has this call ID
1068  */
1069 static void
1070 erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1071                         const char *key, const char *orig_op, int call_id)
1072 {
1073     char *op_xpath = NULL;
1074 
1075     CRM_CHECK((rsc_id != NULL) && (key != NULL), return);
1076 
1077     if (call_id > 0) {
1078         op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL,
1079                                      lrm_state->node_name, rsc_id, key,
1080                                      call_id);
1081 
1082     } else if (orig_op) {
1083         op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG,
1084                                      lrm_state->node_name, rsc_id, key,
1085                                      orig_op);
1086     } else {
1087         op_xpath = crm_strdup_printf(XPATH_HISTORY_ID,
1088                                      lrm_state->node_name, rsc_id, key);
1089     }
1090 
1091     crm_debug("Erasing LRM resource history for %s on %s (call=%d)",
1092               key, rsc_id, call_id);
1093     fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL,
1094                                cib_quorum_override | cib_xpath);
1095     free(op_xpath);
1096 }
1097 
1098 static inline gboolean
1099 last_failed_matches_op(rsc_history_t *entry, const char *op, int interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1100 {
1101     if (entry == NULL) {
1102         return FALSE;
1103     }
1104     if (op == NULL) {
1105         return TRUE;
1106     }
1107     return (safe_str_eq(op, entry->failed->op_type)
1108             && (interval == entry->failed->interval));
1109 }
1110 
1111 /*!
1112  * \internal
1113  * \brief Clear a resource's last failure
1114  *
1115  * Erase a resource's last failure on a particular node from both the
1116  * LRM resource history in the CIB, and the resource history remembered
1117  * for the LRM state.
1118  *
1119  * \param[in] rsc_id     Resource name
1120  * \param[in] node_name  Node name
1121  * \param[in] operation  If specified, only clear if matching this operation
1122  * \param[in] interval   If operation is specified, it has this interval in ms
1123  */
1124 void
1125 lrm_clear_last_failure(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
1126                        const char *operation, int interval)
1127 {
1128     char *op_key = NULL;
1129     char *orig_op_key = NULL;
1130     lrm_state_t *lrm_state = NULL;
1131 
1132     lrm_state = lrm_state_find(node_name);
1133     if (lrm_state == NULL) {
1134         return;
1135     }
1136 
1137     /* Erase from CIB */
1138     op_key = generate_op_key(rsc_id, "last_failure", 0);
1139     if (operation) {
1140         orig_op_key = generate_op_key(rsc_id, operation, interval);
1141     }
1142     erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0);
1143     free(op_key);
1144     free(orig_op_key);
1145 
1146     /* Remove from memory */
1147     if (lrm_state->resource_history) {
1148         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
1149                                                    rsc_id);
1150 
1151         if (last_failed_matches_op(entry, operation, interval)) {
1152             lrmd_free_event(entry->failed);
1153             entry->failed = NULL;
1154         }
1155     }
1156 }
1157 
1158 /* Returns: gboolean - cancellation is in progress */
1159 static gboolean
1160 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
1161 {
1162     int rc = pcmk_ok;
1163     char *local_key = NULL;
1164     struct recurring_op_s *pending = NULL;
1165 
1166     CRM_CHECK(op != 0, return FALSE);
1167     CRM_CHECK(rsc_id != NULL, return FALSE);
1168     if (key == NULL) {
1169         local_key = make_stop_id(rsc_id, op);
1170         key = local_key;
1171     }
1172     pending = g_hash_table_lookup(lrm_state->pending_ops, key);
1173 
1174     if (pending) {
1175         if (remove && pending->remove == FALSE) {
1176             pending->remove = TRUE;
1177             crm_debug("Scheduling %s for removal", key);
1178         }
1179 
1180         if (pending->cancelled) {
1181             crm_debug("Operation %s already cancelled", key);
1182             free(local_key);
1183             return FALSE;
1184         }
1185 
1186         pending->cancelled = TRUE;
1187 
1188     } else {
1189         crm_info("No pending op found for %s", key);
1190         free(local_key);
1191         return FALSE;
1192     }
1193 
1194     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
1195     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval);
1196     if (rc == pcmk_ok) {
1197         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
1198         free(local_key);
1199         return TRUE;
1200     }
1201 
1202     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
1203     /* The caller needs to make sure the entry is
1204      * removed from the pending_ops list
1205      *
1206      * Usually by returning TRUE inside the worker function
1207      * supplied to g_hash_table_foreach_remove()
1208      *
1209      * Not removing the entry from pending_ops will block
1210      * the node from shutting down
1211      */
1212     free(local_key);
1213     return FALSE;
1214 }
1215 
1216 struct cancel_data {
1217     gboolean done;
1218     gboolean remove;
1219     const char *key;
1220     lrmd_rsc_info_t *rsc;
1221     lrm_state_t *lrm_state;
1222 };
1223 
1224 static gboolean
1225 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1226 {
1227     gboolean remove = FALSE;
1228     struct cancel_data *data = user_data;
1229     struct recurring_op_s *op = (struct recurring_op_s *)value;
1230 
1231     if (crm_str_eq(op->op_key, data->key, TRUE)) {
1232         data->done = TRUE;
1233         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
1234     }
1235     return remove;
1236 }
1237 
1238 static gboolean
1239 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
1240 {
1241     guint removed = 0;
1242     struct cancel_data data;
1243 
1244     CRM_CHECK(rsc != NULL, return FALSE);
1245     CRM_CHECK(key != NULL, return FALSE);
1246 
1247     data.key = key;
1248     data.rsc = rsc;
1249     data.done = FALSE;
1250     data.remove = remove;
1251     data.lrm_state = lrm_state;
1252 
1253     removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data);
1254     crm_trace("Removed %u op cache entries, new size: %u",
1255               removed, g_hash_table_size(lrm_state->pending_ops));
1256     return data.done;
1257 }
1258 
1259 static lrmd_rsc_info_t *
1260 get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create)
     /* [previous][next][first][last][top][bottom][index][help] */
1261 {
1262     lrmd_rsc_info_t *rsc = NULL;
1263     const char *id = ID(resource);
1264     const char *type = crm_element_value(resource, XML_ATTR_TYPE);
1265     const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS);
1266     const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER);
1267     const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG);
1268 
1269     crm_trace("Retrieving %s from the LRM.", id);
1270     CRM_CHECK(id != NULL, return NULL);
1271 
1272     rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
1273 
1274     if (!rsc && long_id) {
1275         rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0);
1276     }
1277 
1278     if (!rsc && do_create) {
1279         CRM_CHECK(class != NULL, return NULL);
1280         CRM_CHECK(type != NULL, return NULL);
1281 
1282         crm_trace("Adding rsc %s before operation", id);
1283 
1284         lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring);
1285 
1286         rsc = lrm_state_get_rsc_info(lrm_state, id, 0);
1287 
1288         if (!rsc) {
1289             fsa_data_t *msg_data = NULL;
1290 
1291             crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name);
1292             /* only register this as a internal error if this involves the local
1293              * lrmd. Otherwise we're likely dealing with an unresponsive remote-node
1294              * which is not a FSA failure. */
1295             if (lrm_state_is_local(lrm_state) == TRUE) {
1296                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
1297             }
1298         }
1299     }
1300 
1301     return rsc;
1302 }
1303 
1304 static void
1305 delete_resource(lrm_state_t * lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1306                 const char *id,
1307                 lrmd_rsc_info_t * rsc,
1308                 GHashTableIter * gIter,
1309                 const char *sys,
1310                 const char *host,
1311                 const char *user,
1312                 ha_msg_input_t * request,
1313                 gboolean unregister)
1314 {
1315     int rc = pcmk_ok;
1316 
1317     crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host);
1318 
1319     if (rsc && unregister) {
1320         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
1321     }
1322 
1323     if (rc == pcmk_ok) {
1324         crm_trace("Resource '%s' deleted", id);
1325     } else if (rc == -EINPROGRESS) {
1326         crm_info("Deletion of resource '%s' pending", id);
1327         if (request) {
1328             struct pending_deletion_op_s *op = NULL;
1329             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
1330 
1331             op = calloc(1, sizeof(struct pending_deletion_op_s));
1332             op->rsc = strdup(rsc->id);
1333             op->input = copy_ha_msg_input(request);
1334             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
1335         }
1336         return;
1337     } else {
1338         crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d",
1339                  id, sys, user ? user : "internal", host, rc);
1340     }
1341 
1342     delete_rsc_entry(lrm_state, request, id, gIter, rc, user);
1343 }
1344 
1345 static int
1346 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1347 {
1348     int call_id = 999999999;
1349     rsc_history_t *entry = NULL;
1350 
1351     if(lrm_state) {
1352         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1353     }
1354 
1355     /* Make sure the call id is greater than the last successful operation,
1356      * otherwise the failure will not result in a possible recovery of the resource
1357      * as it could appear the failure occurred before the successful start */
1358     if (entry) {
1359         call_id = entry->last_callid + 1;
1360     }
1361 
1362     if (call_id < 0) {
1363         call_id = 1;
1364     }
1365     return call_id;
1366 }
1367 
1368 static void
1369 force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node)
     /* [previous][next][first][last][top][bottom][index][help] */
1370 {
1371         GHashTableIter gIter;
1372         rsc_history_t *entry = NULL;
1373 
1374 
1375         crm_info("clearing resource history on node %s", lrm_state->node_name);
1376         g_hash_table_iter_init(&gIter, lrm_state->resource_history);
1377         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1378             /* only unregister the resource during a reprobe if it is not a remote connection
1379              * resource. otherwise unregistering the connection will terminate remote-node
1380              * membership */
1381             gboolean unregister = TRUE;
1382 
1383             if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
1384                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
1385                 if (remote_lrm_state) {
1386                     /* when forcing a reprobe, make sure to clear remote node before
1387                      * clearing the remote node's connection resource */ 
1388                     force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE);
1389                 }
1390                 unregister = FALSE;
1391             }
1392 
1393             delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host,
1394                             user_name, NULL, unregister);
1395         }
1396 
1397         /* Now delete the copy in the CIB */
1398         erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local);
1399 
1400         /* And finally, _delete_ the value in attrd
1401          * Setting it to FALSE results in the PE sending us back here again
1402          */
1403         update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
1404 }
1405 
1406 static void
1407 synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) 
     /* [previous][next][first][last][top][bottom][index][help] */
1408 {
1409     lrmd_event_data_t *op = NULL;
1410     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
1411     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
1412     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
1413 
1414     if(xml_rsc == NULL) {
1415         /* @TODO Should we do something else, like direct ack? */
1416         crm_info("Skipping %s=%d on %s (%p): no resource",
1417                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state);
1418         return;
1419 
1420     } else if(operation == NULL) {
1421         /* This probably came from crm_resource -C, nothing to do */
1422         crm_info("Skipping %s=%d on %s (%p): no operation",
1423                  crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state);
1424         return;
1425     }
1426 
1427     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
1428     CRM_ASSERT(op != NULL);
1429 
1430     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
1431     if(safe_str_eq(operation, RSC_NOTIFY)) {
1432         /* Notifications can't fail yet */
1433         op->op_status = PCMK_LRM_OP_DONE;
1434         op->rc = PCMK_OCF_OK;
1435 
1436     } else {
1437         op->op_status = PCMK_LRM_OP_ERROR;
1438         op->rc = rc;
1439     }
1440     op->t_run = time(NULL);
1441     op->t_rcchange = op->t_run;
1442 
1443     crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state);
1444 
1445     if(lrm_state) {
1446         process_lrm_event(lrm_state, op, NULL);
1447 
1448     } else {
1449         lrmd_rsc_info_t rsc;
1450 
1451         rsc.id = strdup(op->rsc_id);
1452         rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE);
1453         rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS);
1454         rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER);
1455 
1456         do_update_resource(target_node, &rsc, op);
1457 
1458         free(rsc.id);
1459         free(rsc.type);
1460         free(rsc.class);
1461         free(rsc.provider);
1462     }
1463     lrmd_free_event(op);
1464 }
1465 
1466 
1467 /*       A_LRM_INVOKE   */
1468 void
1469 do_lrm_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1470               enum crmd_fsa_cause cause,
1471               enum crmd_fsa_state cur_state,
1472               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1473 {
1474     gboolean create_rsc = TRUE;
1475     lrm_state_t *lrm_state = NULL;
1476     const char *crm_op = NULL;
1477     const char *from_sys = NULL;
1478     const char *from_host = NULL;
1479     const char *operation = NULL;
1480     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
1481     const char *user_name = NULL;
1482     const char *target_node = NULL;
1483     gboolean is_remote_node = FALSE;
1484     gboolean crm_rsc_delete = FALSE;
1485 
1486     if (input->xml != NULL) {
1487         /* Remote node operations are routed here to their remote connections */
1488         target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
1489     }
1490     if (target_node == NULL) {
1491         target_node = fsa_our_uname;
1492     } else if (safe_str_neq(target_node, fsa_our_uname)) {
1493         is_remote_node = TRUE;
1494     }
1495 
1496     lrm_state = lrm_state_find(target_node);
1497 
1498     if (lrm_state == NULL && is_remote_node) {
1499         crm_err("Failing action because remote node %s has no connection to cluster node %s",
1500                 target_node, fsa_our_uname);
1501 
1502         /* The action must be recorded here and in the CIB as failed */
1503         synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED);
1504         return;
1505     }
1506 
1507     CRM_ASSERT(lrm_state != NULL);
1508 
1509 #if ENABLE_ACL
1510     user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL);
1511     crm_trace("LRM command from user '%s'", user_name);
1512 #endif
1513 
1514     crm_op = crm_element_value(input->msg, F_CRM_TASK);
1515     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
1516     if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) {
1517         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
1518     }
1519 
1520     crm_trace("LRM command from: %s", from_sys);
1521 
1522     if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) {
1523         /* remember this delete op came from crm_resource */
1524         crm_rsc_delete = TRUE;
1525         operation = CRMD_ACTION_DELETE;
1526 
1527     } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
1528         operation = CRM_OP_LRM_REFRESH;
1529 
1530     } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) {
1531         lrmd_event_data_t *op = NULL;
1532         lrmd_rsc_info_t *rsc = NULL;
1533         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
1534 
1535         CRM_CHECK(xml_rsc != NULL, return);
1536 
1537         /* The lrmd can not fail a resource, it does not understand the
1538          * concept of success or failure in relation to a resource, it simply
1539          * executes operations and reports the results. We determine what a failure is.
1540          * Because of this, if we want to fail a resource we have to fake what we
1541          * understand a failure to look like.
1542          *
1543          * To do this we create a fake lrmd operation event for the resource
1544          * we want to fail.  We then pass that event to the lrmd client callback
1545          * so it will be processed as if it actually came from the lrmd. */
1546         op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon");
1547         CRM_ASSERT(op != NULL);
1548 
1549         free((char *)op->user_data);
1550         op->user_data = NULL;
1551         op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
1552         op->interval = 0;
1553         op->op_status = PCMK_LRM_OP_DONE;
1554         op->rc = PCMK_OCF_UNKNOWN_ERROR;
1555         op->t_run = time(NULL);
1556         op->t_rcchange = op->t_run;
1557 
1558 #if ENABLE_ACL
1559         if (user_name && is_privileged(user_name) == FALSE) {
1560             crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
1561             send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
1562             lrmd_free_event(op);
1563             return;
1564         }
1565 #endif
1566 
1567         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
1568         if (rsc) {
1569             crm_info("Failing resource %s...", rsc->id);
1570             process_lrm_event(lrm_state, op, NULL);
1571             op->op_status = PCMK_LRM_OP_DONE;
1572             op->rc = PCMK_OCF_OK;
1573             lrmd_free_rsc_info(rsc);
1574         } else {
1575             crm_info("Cannot find/create resource in order to fail it...");
1576             crm_log_xml_warn(input->msg, "bad input");
1577         }
1578 
1579         send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc));
1580         lrmd_free_event(op);
1581         return;
1582 
1583     } else if (input->xml != NULL) {
1584         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
1585     }
1586 
1587     if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) {
1588         int rc = pcmk_ok;
1589         xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all);
1590 
1591         fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name);
1592         crm_info("Forced a local LRM refresh: call=%d", rc);
1593 
1594         if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) {
1595             xmlNode *reply = create_request(
1596                 CRM_OP_INVOKE_LRM, fragment,
1597                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
1598 
1599             crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host);
1600 
1601             if (relay_message(reply, TRUE) == FALSE) {
1602                 crm_log_xml_err(reply, "Unable to route reply");
1603             }
1604             free_xml(reply);
1605         }
1606 
1607         free_xml(fragment);
1608 
1609     } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) {
1610         xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all);
1611         xmlNode *reply = create_reply(input->msg, data);
1612 
1613         if (relay_message(reply, TRUE) == FALSE) {
1614             crm_err("Unable to route reply");
1615             crm_log_xml_err(reply, "reply");
1616         }
1617         free_xml(reply);
1618         free_xml(data);
1619 
1620     } else if (safe_str_eq(operation, CRM_OP_PROBED)) {
1621         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node);
1622 
1623     } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) {
1624         crm_notice("Forcing the status of all resources to be redetected");
1625 
1626         force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node);
1627 
1628         if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys)
1629            && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) {
1630 
1631             xmlNode *reply = create_request(
1632                 CRM_OP_INVOKE_LRM, NULL,
1633                 from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid);
1634 
1635             crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
1636 
1637             if (relay_message(reply, TRUE) == FALSE) {
1638                 crm_log_xml_err(reply, "Unable to route reply");
1639             }
1640             free_xml(reply);
1641         }
1642 
1643     } else if (operation != NULL) {
1644         lrmd_rsc_info_t *rsc = NULL;
1645         xmlNode *params = NULL;
1646         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
1647 
1648         CRM_CHECK(xml_rsc != NULL, return);
1649 
1650         params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
1651 
1652         if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
1653             create_rsc = FALSE;
1654         }
1655 
1656         if(lrm_state_is_connected(lrm_state) == FALSE) {
1657             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED);
1658             return;
1659         }
1660 
1661         rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc);
1662         if (rsc == NULL && create_rsc) {
1663             crm_err("Invalid resource definition for %s", ID(xml_rsc));
1664             crm_log_xml_warn(input->msg, "bad input");
1665 
1666             /* if the operation couldn't complete because we can't register
1667              * the resource, return a generic error */
1668             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED);
1669 
1670         } else if (rsc == NULL) {
1671             crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml));
1672             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name);
1673 
1674             /* Deleting something that does not exist is a success */
1675             send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation,
1676                              from_host, from_sys);
1677 
1678         } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) {
1679             char *op_key = NULL;
1680             char *meta_key = NULL;
1681             int call = 0;
1682             const char *call_id = NULL;
1683             const char *op_task = NULL;
1684             const char *op_interval = NULL;
1685             gboolean in_progress = FALSE;
1686 
1687             CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command");
1688                       lrmd_free_rsc_info(rsc); return);
1689 
1690             meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL);
1691             op_interval = crm_element_value(params, meta_key);
1692             free(meta_key);
1693 
1694             meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
1695             op_task = crm_element_value(params, meta_key);
1696             free(meta_key);
1697 
1698             meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
1699             call_id = crm_element_value(params, meta_key);
1700             free(meta_key);
1701 
1702             CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command");
1703                       lrmd_free_rsc_info(rsc); return);
1704             CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command");
1705                       lrmd_free_rsc_info(rsc); return);
1706 
1707             op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0"));
1708 
1709             crm_debug("PE requested op %s (call=%s) be cancelled",
1710                       op_key, call_id ? call_id : "NA");
1711             call = crm_parse_int(call_id, "0");
1712             if (call == 0) {
1713                 /* the normal case when the PE cancels a recurring op */
1714                 in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
1715 
1716             } else {
1717                 /* the normal case when the PE cancels an orphan op */
1718                 in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
1719             }
1720 
1721             /* Acknowledge the cancellation operation if it's for a remote connection resource */
1722             if (in_progress == FALSE || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1723                 char *op_id = make_stop_id(rsc->id, call);
1724 
1725                 if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
1726                     crm_info("Nothing known about operation %d for %s", call, op_key);
1727                 }
1728                 erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call);
1729                 send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1730                                  from_host, from_sys);
1731 
1732                 /* needed at least for cancellation of a remote operation */
1733                 g_hash_table_remove(lrm_state->pending_ops, op_id);
1734                 free(op_id);
1735 
1736             } else {
1737                 /* No ack is needed since abcdaa8, but peers with older versions
1738                  * in a rolling upgrade need one. We didn't bump the feature set
1739                  * at that commit, so we can only compare against the previous
1740                  * CRM version (3.0.8). If any peers have feature set 3.0.9 but
1741                  * not abcdaa8, they will time out waiting for the ack (no
1742                  * released versions of Pacemaker are affected).
1743                  */
1744                 const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
1745 
1746                 if (compare_version(peer_version, "3.0.8") <= 0) {
1747                     crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
1748                              op_key, from_host, peer_version);
1749                     send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1750                                      from_host, from_sys);
1751                 }
1752             }
1753 
1754             free(op_key);
1755 
1756         } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) {
1757             gboolean unregister = TRUE;
1758 
1759 #if ENABLE_ACL
1760             int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name);
1761             if (cib_rc != pcmk_ok) {
1762                 lrmd_event_data_t *op = NULL;
1763 
1764                 crm_err
1765                     ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s",
1766                      rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc,
1767                      pcmk_strerror(cib_rc));
1768 
1769                 op = construct_op(lrm_state, input->xml, rsc->id, operation);
1770                 op->op_status = PCMK_LRM_OP_ERROR;
1771 
1772                 if (cib_rc == -EACCES) {
1773                     op->rc = PCMK_OCF_INSUFFICIENT_PRIV;
1774                 } else {
1775                     op->rc = PCMK_OCF_UNKNOWN_ERROR;
1776                 }
1777                 send_direct_ack(from_host, from_sys, NULL, op, rsc->id);
1778                 lrmd_free_event(op);
1779                 lrmd_free_rsc_info(rsc);
1780                 return;
1781             }
1782 #endif
1783             if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1784                 unregister = FALSE;
1785             }
1786 
1787             delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister);
1788 
1789         } else {
1790             do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg);
1791         }
1792 
1793         lrmd_free_rsc_info(rsc);
1794 
1795     } else {
1796         crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op));
1797         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1798     }
1799 }
1800 
1801 static lrmd_event_data_t *
1802 construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation)
     /* [previous][next][first][last][top][bottom][index][help] */
1803 {
1804     lrmd_event_data_t *op = NULL;
1805     const char *op_delay = NULL;
1806     const char *op_timeout = NULL;
1807     const char *op_interval = NULL;
1808     GHashTable *params = NULL;
1809 
1810     const char *transition = NULL;
1811 
1812     CRM_ASSERT(rsc_id != NULL);
1813 
1814     op = calloc(1, sizeof(lrmd_event_data_t));
1815     op->type = lrmd_event_exec_complete;
1816     op->op_type = strdup(operation);
1817     op->op_status = PCMK_LRM_OP_PENDING;
1818     op->rc = -1;
1819     op->rsc_id = strdup(rsc_id);
1820     op->interval = 0;
1821     op->timeout = 0;
1822     op->start_delay = 0;
1823 
1824     if (rsc_op == NULL) {
1825         CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation));
1826         op->user_data = NULL;
1827         /* the stop_all_resources() case
1828          * by definition there is no DC (or they'd be shutting
1829          *   us down).
1830          * So we should put our version here.
1831          */
1832         op->params = crm_str_table_new();
1833 
1834         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
1835 
1836         crm_trace("Constructed %s op for %s", operation, rsc_id);
1837         return op;
1838     }
1839 
1840     params = xml2list(rsc_op);
1841     g_hash_table_remove(params, CRM_META "_op_target_rc");
1842 
1843     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
1844     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
1845     op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL);
1846 
1847     op->interval = crm_parse_int(op_interval, "0");
1848     op->timeout = crm_parse_int(op_timeout, "0");
1849     op->start_delay = crm_parse_int(op_delay, "0");
1850 
1851 #if ENABLE_VERSIONED_ATTRS
1852     // Resolve any versioned parameters
1853     if (safe_str_neq(op->op_type, RSC_METADATA)
1854         && safe_str_neq(op->op_type, CRMD_ACTION_DELETE)
1855         && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
1856 
1857         // Resource info *should* already be cached, so we don't get lrmd call
1858         lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0);
1859         struct ra_metadata_s *metadata;
1860 
1861         metadata = metadata_cache_get(lrm_state->metadata_cache, rsc);
1862         if (metadata) {
1863             xmlNode *versioned_attrs = NULL;
1864             GHashTable *hash = NULL;
1865             char *key = NULL;
1866             char *value = NULL;
1867             GHashTableIter iter;
1868 
1869             versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS);
1870             hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
1871             g_hash_table_iter_init(&iter, hash);
1872             while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
1873                 g_hash_table_iter_steal(&iter);
1874                 g_hash_table_replace(params, key, value);
1875                 // providing meta-names for instance_attributes is only for backward compatibility,
1876                 // and will be removed in a future release
1877                 g_hash_table_replace(params, crm_meta_name(key), strdup(value));
1878             }
1879             g_hash_table_destroy(hash);
1880 
1881             versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META);
1882             hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
1883             g_hash_table_iter_init(&iter, hash);
1884             while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
1885                 g_hash_table_replace(params, crm_meta_name(key), strdup(value));
1886 
1887                 if (safe_str_eq(key, XML_ATTR_TIMEOUT)) {
1888                     op->timeout = crm_parse_int(value, "0");
1889                 } else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) {
1890                     op->start_delay = crm_parse_int(value, "0");
1891                 }
1892             }
1893             g_hash_table_destroy(hash);
1894 
1895             versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS);
1896             hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version);
1897             g_hash_table_iter_init(&iter, hash);
1898             while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
1899                 g_hash_table_iter_steal(&iter);
1900                 g_hash_table_replace(params, key, value);
1901             }
1902             g_hash_table_destroy(hash);
1903         }
1904 
1905         lrmd_free_rsc_info(rsc);
1906     }
1907 #endif
1908 
1909     if (safe_str_neq(operation, RSC_STOP)) {
1910         op->params = params;
1911 
1912     } else {
1913         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1914 
1915         /* If we do not have stop parameters cached, use
1916          * whatever we are given */
1917         if (!entry || !entry->stop_params) {
1918             op->params = params;
1919         } else {
1920             /* Copy the cached parameter list so that we stop the resource
1921              * with the old attributes, not the new ones */
1922             op->params = crm_str_table_new();
1923 
1924             g_hash_table_foreach(params, copy_meta_keys, op->params);
1925             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
1926             g_hash_table_destroy(params);
1927             params = NULL;
1928         }
1929     }
1930 
1931     /* sanity */
1932     if (op->interval < 0) {
1933         op->interval = 0;
1934     }
1935     if (op->timeout <= 0) {
1936         op->timeout = op->interval;
1937     }
1938     if (op->start_delay < 0) {
1939         op->start_delay = 0;
1940     }
1941 
1942     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
1943     CRM_CHECK(transition != NULL, return op);
1944 
1945     op->user_data = strdup(transition);
1946 
1947     if (op->interval != 0) {
1948         if (safe_str_eq(operation, CRMD_ACTION_START)
1949             || safe_str_eq(operation, CRMD_ACTION_STOP)) {
1950             crm_err("Start and Stop actions cannot have an interval: %d", op->interval);
1951             op->interval = 0;
1952         }
1953     }
1954 
1955     crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval);
1956 
1957     return op;
1958 }
1959 
1960 void
1961 send_direct_ack(const char *to_host, const char *to_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1962                 lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id)
1963 {
1964     xmlNode *reply = NULL;
1965     xmlNode *update, *iter;
1966     crm_node_t *peer = NULL;
1967 
1968     CRM_CHECK(op != NULL, return);
1969     if (op->rsc_id == NULL) {
1970         CRM_ASSERT(rsc_id != NULL);
1971         op->rsc_id = strdup(rsc_id);
1972     }
1973     if (to_sys == NULL) {
1974         to_sys = CRM_SYSTEM_TENGINE;
1975     }
1976 
1977     peer = crm_get_peer(0, fsa_our_uname);
1978     update = create_node_state_update(peer, node_update_none, NULL,
1979                                       __FUNCTION__);
1980 
1981     iter = create_xml_node(update, XML_CIB_TAG_LRM);
1982     crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid);
1983     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
1984     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
1985 
1986     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
1987 
1988     build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__);
1989     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
1990 
1991     crm_log_xml_trace(update, "ACK Update");
1992 
1993     crm_debug("ACK'ing resource op %s_%s_%d from %s: %s",
1994               op->rsc_id, op->op_type, op->interval, op->user_data,
1995               crm_element_value(reply, XML_ATTR_REFERENCE));
1996 
1997     if (relay_message(reply, TRUE) == FALSE) {
1998         crm_log_xml_err(reply, "Unable to route reply");
1999     }
2000 
2001     free_xml(update);
2002     free_xml(reply);
2003 }
2004 
2005 gboolean
2006 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
2007 {
2008     gboolean res = TRUE;
2009     GList *lrm_state_list = lrm_state_get_list();
2010     GList *state_entry;
2011 
2012     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
2013         lrm_state_t *lrm_state = state_entry->data;
2014 
2015         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
2016             /* keep iterating through all even when false is returned */
2017             res = FALSE;
2018         }
2019     }
2020 
2021     set_bit(fsa_input_register, R_SENT_RSC_STOP);
2022     g_list_free(lrm_state_list); lrm_state_list = NULL;
2023     return res;
2024 }
2025 
2026 struct stop_recurring_action_s {
2027     lrmd_rsc_info_t *rsc;
2028     lrm_state_t *lrm_state;
2029 };
2030 
2031 static gboolean
2032 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2033 {
2034     gboolean remove = FALSE;
2035     struct stop_recurring_action_s *event = user_data;
2036     struct recurring_op_s *op = (struct recurring_op_s *)value;
2037 
2038     if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) {
2039         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
2040         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
2041     }
2042 
2043     return remove;
2044 }
2045 
2046 static gboolean
2047 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2048 {
2049     gboolean remove = FALSE;
2050     lrm_state_t *lrm_state = user_data;
2051     struct recurring_op_s *op = (struct recurring_op_s *)value;
2052 
2053     if (op->interval != 0) {
2054         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key);
2055         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
2056     }
2057 
2058     return remove;
2059 }
2060 
2061 static void
2062 record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
2063 {
2064     CRM_CHECK(node_name != NULL, return);
2065     CRM_CHECK(rsc != NULL, return);
2066     CRM_CHECK(op != NULL, return);
2067 
2068     if (op->op_type == NULL
2069         || safe_str_eq(op->op_type, CRMD_ACTION_CANCEL)
2070         || safe_str_eq(op->op_type, CRMD_ACTION_DELETE)) {
2071         return;
2072     }
2073 
2074     if (op->params == NULL) {
2075         return;
2076 
2077     } else {
2078         const char *record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING);
2079 
2080         if (record_pending == NULL || crm_is_true(record_pending) == FALSE) {
2081             return;
2082          }
2083     }
2084 
2085     op->call_id = -1;
2086     op->op_status = PCMK_LRM_OP_PENDING;
2087     op->rc = PCMK_OCF_UNKNOWN;
2088 
2089     op->t_run = time(NULL);
2090     op->t_rcchange = op->t_run;
2091 
2092     /* write a "pending" entry to the CIB, inhibit notification */
2093     crm_debug("Recording pending op %s_%s_%d on %s in the CIB",
2094               op->rsc_id, op->op_type, op->interval, node_name);
2095 
2096     do_update_resource(node_name, rsc, op);
2097 }
2098 
2099 static void
2100 do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg,
     /* [previous][next][first][last][top][bottom][index][help] */
2101               xmlNode * request)
2102 {
2103     int call_id = 0;
2104     char *op_id = NULL;
2105     lrmd_event_data_t *op = NULL;
2106     lrmd_key_value_t *params = NULL;
2107     fsa_data_t *msg_data = NULL;
2108     const char *transition = NULL;
2109     gboolean stop_recurring = FALSE;
2110     bool send_nack = FALSE;
2111 
2112     CRM_CHECK(rsc != NULL, return);
2113     CRM_CHECK(operation != NULL, return);
2114 
2115     if (msg != NULL) {
2116         transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
2117         if (transition == NULL) {
2118             crm_log_xml_err(msg, "Missing transition number");
2119         }
2120     }
2121 
2122     op = construct_op(lrm_state, msg, rsc->id, operation);
2123     CRM_CHECK(op != NULL, return);
2124 
2125     if (is_remote_lrmd_ra(NULL, NULL, rsc->id)
2126         && op->interval == 0
2127         && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) {
2128 
2129         /* pcmk remote connections are a special use case.
2130          * We never ever want to stop monitoring a connection resource until
2131          * the entire migration has completed. If the connection is unexpectedly
2132          * severed, even during a migration, this is an event we must detect.*/
2133         stop_recurring = FALSE;
2134 
2135     } else if (op->interval == 0
2136         && strcmp(operation, CRMD_ACTION_STATUS) != 0
2137         && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) {
2138 
2139         /* stop any previous monitor operations before changing the resource state */
2140         stop_recurring = TRUE;
2141     }
2142 
2143     if (stop_recurring == TRUE) {
2144         guint removed = 0;
2145         struct stop_recurring_action_s data;
2146 
2147         data.rsc = rsc;
2148         data.lrm_state = lrm_state;
2149         removed = g_hash_table_foreach_remove(
2150             lrm_state->pending_ops, stop_recurring_action_by_rsc, &data);
2151 
2152         if (removed) {
2153             crm_debug("Stopped %u recurring operation%s in preparation for %s_%s_%d",
2154                       removed, s_if_plural(removed), rsc->id, operation, op->interval);
2155         }
2156     }
2157 
2158     /* now do the op */
2159     crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval);
2160 
2161     if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) {
2162         register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
2163         send_nack = TRUE;
2164 
2165     } else if (fsa_state != S_NOT_DC
2166                && fsa_state != S_POLICY_ENGINE /* Recalculating */
2167                && fsa_state != S_TRANSITION_ENGINE
2168                && safe_str_neq(operation, "fail")
2169                && safe_str_neq(operation, CRMD_ACTION_STOP)) {
2170         send_nack = TRUE;
2171     }
2172 
2173     if(send_nack) {
2174         crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)",
2175                    operation, rsc->id, fsa_state2string(fsa_state),
2176                    is_set(fsa_input_register, R_SHUTDOWN)?"true":"false");
2177 
2178         op->rc = CRM_DIRECT_NACK_RC;
2179         op->op_status = PCMK_LRM_OP_ERROR;
2180         send_direct_ack(NULL, NULL, rsc, op, rsc->id);
2181         lrmd_free_event(op);
2182         free(op_id);
2183         return;
2184     }
2185 
2186     record_pending_op(lrm_state->node_name, rsc, op);
2187 
2188     op_id = generate_op_key(rsc->id, op->op_type, op->interval);
2189 
2190     if (op->interval > 0) {
2191         /* cancel it so we can then restart it without conflict */
2192         cancel_op_key(lrm_state, rsc, op_id, FALSE);
2193     }
2194 
2195     if (op->params) {
2196         char *key = NULL;
2197         char *value = NULL;
2198         GHashTableIter iter;
2199 
2200         g_hash_table_iter_init(&iter, op->params);
2201         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
2202             params = lrmd_key_value_add(params, key, value);
2203         }
2204     }
2205 
2206     call_id = lrm_state_exec(lrm_state,
2207                              rsc->id,
2208                              op->op_type,
2209                              op->user_data, op->interval, op->timeout, op->start_delay, params);
2210 
2211     if (call_id <= 0 && lrm_state_is_local(lrm_state)) {
2212         crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id);
2213         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
2214 
2215     } else if (call_id <= 0) {
2216 
2217         crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id);
2218         op->call_id = get_fake_call_id(lrm_state, rsc->id);
2219         op->op_status = PCMK_LRM_OP_DONE;
2220         op->rc = PCMK_OCF_UNKNOWN_ERROR;
2221         op->t_run = time(NULL);
2222         op->t_rcchange = op->t_run;
2223         process_lrm_event(lrm_state, op, NULL);
2224 
2225     } else {
2226         /* record all operations so we can wait
2227          * for them to complete during shutdown
2228          */
2229         char *call_id_s = make_stop_id(rsc->id, call_id);
2230         struct recurring_op_s *pending = NULL;
2231 
2232         pending = calloc(1, sizeof(struct recurring_op_s));
2233         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
2234 
2235         pending->call_id = call_id;
2236         pending->interval = op->interval;
2237         pending->op_type = strdup(operation);
2238         pending->op_key = strdup(op_id);
2239         pending->rsc_id = strdup(rsc->id);
2240         pending->start_time = time(NULL);
2241         pending->user_data = strdup(op->user_data);
2242         g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending);
2243 
2244         if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) {
2245             char *uuid = NULL;
2246             int dummy = 0, target_rc = 0;
2247 
2248             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
2249 
2250             decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc);
2251             free(uuid);
2252 
2253             op->rc = target_rc;
2254             op->op_status = PCMK_LRM_OP_DONE;
2255             send_direct_ack(NULL, NULL, rsc, op, rsc->id);
2256         }
2257 
2258         pending->params = op->params;
2259         op->params = NULL;
2260     }
2261 
2262     free(op_id);
2263     lrmd_free_event(op);
2264     return;
2265 }
2266 
2267 int last_resource_update = 0;
2268 
2269 static void
2270 cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2271 {
2272     switch (rc) {
2273         case pcmk_ok:
2274         case -pcmk_err_diff_failed:
2275         case -pcmk_err_diff_resync:
2276             crm_trace("Resource update %d complete: rc=%d", call_id, rc);
2277             break;
2278         default:
2279             crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
2280     }
2281 
2282     if (call_id == last_resource_update) {
2283         last_resource_update = 0;
2284         trigger_fsa(fsa_source);
2285     }
2286 }
2287 
2288 static int
2289 do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
2290 {
2291 /*
2292   <status>
2293   <nodes_status id=uname>
2294   <lrm>
2295   <lrm_resources>
2296   <lrm_resource id=...>
2297   </...>
2298 */
2299     int rc = pcmk_ok;
2300     xmlNode *update, *iter = NULL;
2301     int call_opt = crmd_cib_smart_opt();
2302     const char *uuid = NULL;
2303 
2304     CRM_CHECK(op != NULL, return 0);
2305 
2306     iter = create_xml_node(iter, XML_CIB_TAG_STATUS);
2307     update = iter;
2308     iter = create_xml_node(iter, XML_CIB_TAG_STATE);
2309 
2310     if (safe_str_eq(node_name, fsa_our_uname)) {
2311         uuid = fsa_our_uuid;
2312 
2313     } else {
2314         /* remote nodes uuid and uname are equal */
2315         uuid = node_name;
2316         crm_xml_add(iter, XML_NODE_IS_REMOTE, "true");
2317     }
2318 
2319     CRM_LOG_ASSERT(uuid != NULL);
2320     if(uuid == NULL) {
2321         rc = -EINVAL;
2322         goto done;
2323     }
2324 
2325     crm_xml_add(iter, XML_ATTR_UUID,  uuid);
2326     crm_xml_add(iter, XML_ATTR_UNAME, node_name);
2327     crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__);
2328 
2329     iter = create_xml_node(iter, XML_CIB_TAG_LRM);
2330     crm_xml_add(iter, XML_ATTR_ID, uuid);
2331 
2332     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
2333     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
2334     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
2335 
2336     build_operation_update(iter, rsc, op, node_name, __FUNCTION__);
2337 
2338     if (rsc) {
2339         const char *container = NULL;
2340 
2341         crm_xml_add(iter, XML_ATTR_TYPE, rsc->type);
2342         crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class);
2343         crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider);
2344 
2345         if (op->params) {
2346             container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
2347         }
2348         if (container) {
2349             crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container);
2350             crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container);
2351         }
2352 
2353     } else {
2354         crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id);
2355         send_direct_ack(NULL, NULL, rsc, op, op->rsc_id);
2356         goto cleanup;
2357     }
2358 
2359     crm_log_xml_trace(update, __FUNCTION__);
2360 
2361     /* make it an asynchronous call and be done with it
2362      *
2363      * Best case:
2364      *   the resource state will be discovered during
2365      *   the next signup or election.
2366      *
2367      * Bad case:
2368      *   we are shutting down and there is no DC at the time,
2369      *   but then why were we shutting down then anyway?
2370      *   (probably because of an internal error)
2371      *
2372      * Worst case:
2373      *   we get shot for having resources "running" when the really weren't
2374      *
2375      * the alternative however means blocking here for too long, which
2376      * isn't acceptable
2377      */
2378     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL);
2379 
2380     if (rc > 0) {
2381         last_resource_update = rc;
2382     }
2383   done:
2384     /* the return code is a call number, not an error code */
2385     crm_trace("Sent resource state update message: %d for %s=%d on %s", rc,
2386               op->op_type, op->interval, op->rsc_id);
2387     fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback);
2388 
2389   cleanup:
2390     free_xml(update);
2391     return rc;
2392 }
2393 
2394 void
2395 do_lrm_event(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
2396              enum crmd_fsa_cause cause,
2397              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
2398 {
2399     CRM_CHECK(FALSE, return);
2400 }
2401 
2402 static char *
2403 unescape_newlines(const char *string)
     /* [previous][next][first][last][top][bottom][index][help] */
2404 {
2405     char *pch = NULL;
2406     char *ret = NULL;
2407     static const char *escaped_newline = "\\n";
2408 
2409     if (!string) {
2410         return NULL;
2411     }
2412 
2413     ret = strdup(string);
2414     pch = strstr(ret, escaped_newline);
2415     while (pch != NULL) {
2416         strncpy(pch, "\n ", 2);
2417         pch = strstr(pch, escaped_newline);
2418     }
2419 
2420     return ret;
2421 }
2422 
2423 gboolean
2424 process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending)
     /* [previous][next][first][last][top][bottom][index][help] */
2425 {
2426     char *op_id = NULL;
2427     char *op_key = NULL;
2428 
2429     int update_id = 0;
2430     gboolean remove = FALSE;
2431     gboolean removed = FALSE;
2432     lrmd_rsc_info_t *rsc = NULL;
2433 
2434     CRM_CHECK(op != NULL, return FALSE);
2435     CRM_CHECK(op->rsc_id != NULL, return FALSE);
2436 
2437     op_id = make_stop_id(op->rsc_id, op->call_id);
2438     op_key = generate_op_key(op->rsc_id, op->op_type, op->interval);
2439     rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
2440     if(pending == NULL) {
2441         remove = TRUE;
2442         pending = g_hash_table_lookup(lrm_state->pending_ops, op_id);
2443     }
2444 
2445     if (op->op_status == PCMK_LRM_OP_ERROR) {
2446         switch(op->rc) {
2447             case PCMK_OCF_NOT_RUNNING:
2448             case PCMK_OCF_RUNNING_MASTER:
2449             case PCMK_OCF_DEGRADED:
2450             case PCMK_OCF_DEGRADED_MASTER:
2451                 /* Leave it up to the TE/PE to decide if this is an error */
2452                 op->op_status = PCMK_LRM_OP_DONE;
2453                 break;
2454             default:
2455                 /* Nothing to do */
2456                 break;
2457         }
2458     }
2459 
2460     if (op->op_status != PCMK_LRM_OP_CANCELLED) {
2461         if (safe_str_eq(op->op_type, RSC_NOTIFY) || safe_str_eq(op->op_type, RSC_METADATA)) {
2462             /* Keep notify and meta-data ops out of the CIB */
2463             send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
2464         } else {
2465             update_id = do_update_resource(lrm_state->node_name, rsc, op);
2466         }
2467     } else if (op->interval == 0) {
2468         /* This will occur when "crm resource cleanup" is called while actions are in-flight */
2469         crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id);
2470         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
2471 
2472     } else if (pending == NULL) {
2473         /* We don't need to do anything for cancelled ops
2474          * that are not in our pending op list. There are no
2475          * transition actions waiting on these operations. */
2476 
2477     } else if (op->user_data == NULL) {
2478         /* At this point we have a pending entry, but no transition
2479          * key present in the user_data field. report this */
2480         crm_err("Op %s (call=%d): No user data", op_key, op->call_id);
2481 
2482     } else if (pending->remove) {
2483         /* The tengine canceled this op, we have been waiting for the cancel to finish. */
2484         erase_lrm_history_by_op(lrm_state, op);
2485 
2486     } else if (pending && op->rsc_deleted) {
2487         /* The tengine initiated this op, but it was cancelled outside of the
2488          * tengine's control during a resource cleanup/re-probe request. The tengine
2489          * must be alerted that this operation completed, otherwise the tengine
2490          * will continue waiting for this update to occur until it is timed out.
2491          * We don't want this update going to the cib though, so use a direct ack. */
2492         crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id);
2493         send_direct_ack(NULL, NULL, NULL, op, op->rsc_id);
2494 
2495     } else {
2496         /* Before a stop is called, no need to direct ack */
2497         crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id);
2498     }
2499 
2500     if(remove == FALSE) {
2501         /* The caller will do this afterwards, but keep the logging consistent */
2502         removed = TRUE;
2503 
2504     } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) {
2505         removed = TRUE;
2506         crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
2507                   op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops));
2508 
2509     } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) {
2510         removed = TRUE;
2511         g_hash_table_remove(lrm_state->pending_ops, op_id);
2512     }
2513 
2514     switch (op->op_status) {
2515         case PCMK_LRM_OP_CANCELLED:
2516             crm_info("Result of %s operation for %s on %s: %s "
2517                      CRM_XS " call=%d key=%s confirmed=%s",
2518                      crm_action_str(op->op_type, op->interval),
2519                      op->rsc_id, lrm_state->node_name,
2520                      services_lrm_status_str(op->op_status),
2521                      op->call_id, op_key, (removed? "true" : "false"));
2522             break;
2523 
2524         case PCMK_LRM_OP_DONE:
2525             do_crm_log(op->interval?LOG_INFO:LOG_NOTICE,
2526                        "Result of %s operation for %s on %s: %d (%s) "
2527                        CRM_XS " call=%d key=%s confirmed=%s cib-update=%d",
2528                        crm_action_str(op->op_type, op->interval),
2529                        op->rsc_id, lrm_state->node_name,
2530                        op->rc, services_ocf_exitcode_str(op->rc),
2531                        op->call_id, op_key, (removed? "true" : "false"),
2532                        update_id);
2533             break;
2534 
2535         case PCMK_LRM_OP_TIMEOUT:
2536             crm_err("Result of %s operation for %s on %s: %s "
2537                     CRM_XS " call=%d key=%s timeout=%dms",
2538                     crm_action_str(op->op_type, op->interval),
2539                     op->rsc_id, lrm_state->node_name,
2540                     services_lrm_status_str(op->op_status),
2541                     op->call_id, op_key, op->timeout);
2542             break;
2543 
2544         default:
2545             crm_err("Result of %s operation for %s on %s: %s "
2546                     CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d",
2547                     crm_action_str(op->op_type, op->interval),
2548                     op->rsc_id, lrm_state->node_name,
2549                     services_lrm_status_str(op->op_status), op->call_id, op_key,
2550                     (removed? "true" : "false"), op->op_status, update_id);
2551     }
2552 
2553     if (op->output) {
2554         char *prefix =
2555             crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id);
2556 
2557         if (op->rc) {
2558             crm_log_output(LOG_NOTICE, prefix, op->output);
2559         } else {
2560             crm_log_output(LOG_DEBUG, prefix, op->output);
2561         }
2562         free(prefix);
2563     }
2564 
2565     if (safe_str_neq(op->op_type, RSC_METADATA)) {
2566         crmd_alert_resource_op(lrm_state->node_name, op);
2567     } else if (op->rc == PCMK_OCF_OK) {
2568         char *metadata = unescape_newlines(op->output);
2569 
2570         metadata_cache_update(lrm_state->metadata_cache, rsc, metadata);
2571         free(metadata);
2572     }
2573 
2574     if (op->rsc_deleted) {
2575         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
2576         delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL);
2577     }
2578 
2579     /* If a shutdown was escalated while operations were pending,
2580      * then the FSA will be stalled right now... allow it to continue
2581      */
2582     mainloop_set_trigger(fsa_source);
2583     update_history_cache(lrm_state, rsc, op);
2584 
2585     lrmd_free_rsc_info(rsc);
2586     free(op_key);
2587     free(op_id);
2588 
2589     return TRUE;
2590 }

/* [previous][next][first][last][top][bottom][index][help] */