root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. notify_one_client
  26. notify_of_new_client
  27. client_disconnect_cleanup
  28. action_complete
  29. stonith_action_complete
  30. lrmd_stonith_callback
  31. stonith_connection_failed
  32. execd_stonith_start
  33. execd_stonith_stop
  34. execd_stonith_monitor
  35. lrmd_rsc_execute_stonith
  36. lrmd_rsc_execute_service_lib
  37. lrmd_rsc_execute
  38. lrmd_rsc_dispatch
  39. free_rsc
  40. process_lrmd_signon
  41. process_lrmd_rsc_register
  42. process_lrmd_get_rsc_info
  43. process_lrmd_rsc_unregister
  44. process_lrmd_rsc_exec
  45. cancel_op
  46. cancel_all_recurring
  47. process_lrmd_rsc_cancel
  48. add_recurring_op_xml
  49. process_lrmd_get_recurring
  50. process_lrmd_message

   1 /*
   2  * Copyright 2012-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/fencing/internal.h>
  12 
  13 #include <glib.h>
  14 
  15 // Check whether we have a high-resolution monotonic clock
  16 #undef PCMK__TIME_USE_CGT
  17 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  18 #  define PCMK__TIME_USE_CGT
  19 #  include <time.h>  /* clock_gettime */
  20 #endif
  21 
  22 #include <unistd.h>
  23 
  24 #include <crm/crm.h>
  25 #include <crm/fencing/internal.h>
  26 #include <crm/services.h>
  27 #include <crm/services_internal.h>
  28 #include <crm/common/mainloop.h>
  29 #include <crm/common/ipc.h>
  30 #include <crm/common/ipc_internal.h>
  31 #include <crm/msg_xml.h>
  32 
  33 #include "pacemaker-execd.h"
  34 
  35 GHashTable *rsc_list = NULL;
  36 
  37 typedef struct lrmd_cmd_s {
  38     int timeout;
  39     guint interval_ms;
  40     int start_delay;
  41     int timeout_orig;
  42 
  43     int call_id;
  44 
  45     int call_opts;
  46     /* Timer ids, must be removed on cmd destruction. */
  47     int delay_id;
  48     int stonith_recurring_id;
  49 
  50     int rsc_deleted;
  51 
  52     int service_flags;
  53 
  54     char *client_id;
  55     char *origin;
  56     char *rsc_id;
  57     char *action;
  58     char *real_action;
  59     char *userdata_str;
  60 
  61     pcmk__action_result_t result;
  62 
  63     /* We can track operation queue time and run time, to be saved with the CIB
  64      * resource history (and displayed in cluster status). We need
  65      * high-resolution monotonic time for this purpose, so we use
  66      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  67      * is disabled).
  68      *
  69      * However, we also need epoch timestamps for recording the time the command
  70      * last ran and the time its return value last changed, for use in time
  71      * displays (as opposed to interval calculations). We keep time_t values for
  72      * this purpose.
  73      *
  74      * The last run time is used for both purposes, so we keep redundant
  75      * monotonic and epoch values for this. Technically the two could represent
  76      * different times, but since time_t has only second resolution and the
  77      * values are used for distinct purposes, that is not significant.
  78      */
  79 #ifdef PCMK__TIME_USE_CGT
  80     /* Recurring and systemd operations may involve more than one executor
  81      * command per operation, so they need info about the original and the most
  82      * recent.
  83      */
  84     struct timespec t_first_run;    // When op first ran
  85     struct timespec t_run;          // When op most recently ran
  86     struct timespec t_first_queue;  // When op was first queued
  87     struct timespec t_queue;        // When op was most recently queued
  88 #endif
  89     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  90     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  91 
  92     bool first_notify_sent;
  93     int last_notify_rc;
  94     int last_notify_op_status;
  95     int last_pid;
  96 
  97     GHashTable *params;
  98 } lrmd_cmd_t;
  99 
 100 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 101 static gboolean lrmd_rsc_dispatch(gpointer user_data);
 102 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 103 
 104 #ifdef PCMK__TIME_USE_CGT
 105 
 106 /*!
 107  * \internal
 108  * \brief Check whether a struct timespec has been set
 109  *
 110  * \param[in] timespec  Time to check
 111  *
 112  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 113  */
 114 static inline bool
 115 time_is_set(struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117     return (timespec != NULL) &&
 118            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 119 }
 120 
 121 /*
 122  * \internal
 123  * \brief Set a timespec (and its original if unset) to the current time
 124  *
 125  * \param[out] t_current  Where to store current time
 126  * \param[out] t_orig     Where to copy t_current if unset
 127  */
 128 static void
 129 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131     clock_gettime(CLOCK_MONOTONIC, t_current);
 132     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 133         *t_orig = *t_current;
 134     }
 135 }
 136 
 137 /*!
 138  * \internal
 139  * \brief Return difference between two times in milliseconds
 140  *
 141  * \param[in] now  More recent time (or NULL to use current time)
 142  * \param[in] old  Earlier time
 143  *
 144  * \return milliseconds difference (or 0 if old is NULL or unset)
 145  *
 146  * \note Can overflow on 32bit machines when the differences is around
 147  *       24 days or more.
 148  */
 149 static int
 150 time_diff_ms(struct timespec *now, struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152     int diff_ms = 0;
 153 
 154     if (time_is_set(old)) {
 155         struct timespec local_now = { 0, };
 156 
 157         if (now == NULL) {
 158             clock_gettime(CLOCK_MONOTONIC, &local_now);
 159             now = &local_now;
 160         }
 161         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 162                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 163     }
 164     return diff_ms;
 165 }
 166 
 167 /*!
 168  * \internal
 169  * \brief Reset a command's operation times to their original values.
 170  *
 171  * Reset a command's run and queued timestamps to the timestamps of the original
 172  * command, so we report the entire time since then and not just the time since
 173  * the most recent command (for recurring and systemd operations).
 174  *
 175  * \param[in] cmd  Executor command object to reset
 176  *
 177  * \note It's not obvious what the queued time should be for a systemd
 178  *       start/stop operation, which might go like this:
 179  *         initial command queued 5ms, runs 3s
 180  *         monitor command queued 10ms, runs 10s
 181  *         monitor command queued 10ms, runs 10s
 182  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 183  *       implementation will report 5ms. If it's 25ms, then we need to
 184  *       subtract 20ms from the total exec time so as not to count it twice.
 185  *       We can implement that later if it matters to anyone ...
 186  */
 187 static void
 188 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190     cmd->t_run = cmd->t_first_run;
 191     cmd->t_queue = cmd->t_first_queue;
 192 }
 193 #endif
 194 
 195 static inline bool
 196 action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198     return (cmd->interval_ms == interval_ms)
 199            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 200 }
 201 
 202 /*!
 203  * \internal
 204  * \brief Log the result of an asynchronous command
 205  *
 206  * \param[in] cmd            Command to log result for
 207  * \param[in] exec_time_ms   Execution time in milliseconds, if known
 208  * \param[in] queue_time_ms  Queue time in milliseconds, if known
 209  */
 210 static void
 211 log_finished(lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213     int log_level = LOG_INFO;
 214     GString *str = g_string_sized_new(100); // reasonable starting size
 215 
 216     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 217         log_level = LOG_DEBUG;
 218     }
 219 
 220     g_string_printf(str, "%s %s (call %d",
 221                     cmd->rsc_id, cmd->action, cmd->call_id);
 222     if (cmd->last_pid != 0) {
 223         g_string_append_printf(str, ", PID %d", cmd->last_pid);
 224     }
 225     if (cmd->result.execution_status == PCMK_EXEC_DONE) {
 226         g_string_append_printf(str, ") exited with status %d",
 227                                cmd->result.exit_status);
 228     } else {
 229         g_string_append_printf(str, ") could not be executed: %s",
 230                                pcmk_exec_status_str(cmd->result.execution_status));
 231     }
 232     if (cmd->result.exit_reason != NULL) {
 233         g_string_append_printf(str, " (%s)", cmd->result.exit_reason);
 234     }
 235 
 236 #ifdef PCMK__TIME_USE_CGT
 237     g_string_append_printf(str, " (execution time %s",
 238                            pcmk__readable_interval(exec_time_ms));
 239     if (queue_time_ms > 0) {
 240         g_string_append_printf(str, " after being queued %s",
 241                                pcmk__readable_interval(queue_time_ms));
 242     }
 243     g_string_append(str, ")");
 244 #endif
 245 
 246     do_crm_log(log_level, "%s", str->str);
 247     g_string_free(str, TRUE);
 248 }
 249 
 250 static void
 251 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 252 {
 253     int log_level = LOG_INFO;
 254 
 255     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 256         log_level = LOG_DEBUG;
 257     }
 258 
 259     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 260                cmd->rsc_id, cmd->action, cmd->call_id);
 261 }
 262 
 263 static const char *
 264 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 265 {
 266     if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
 267         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 268         return "status";
 269     }
 270     return action;
 271 }
 272 
 273 static lrmd_rsc_t *
 274 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 275 {
 276     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 277     lrmd_rsc_t *rsc = NULL;
 278 
 279     rsc = calloc(1, sizeof(lrmd_rsc_t));
 280 
 281     crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
 282 
 283     rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 284     rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
 285     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
 286     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
 287     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
 288 
 289     // Initialize fence device probes (to return "not running")
 290     pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
 291                      PCMK_EXEC_NO_FENCE_DEVICE, NULL);
 292     return rsc;
 293 }
 294 
 295 static lrmd_cmd_t *
 296 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 297 {
 298     int call_options = 0;
 299     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 300     lrmd_cmd_t *cmd = NULL;
 301 
 302     cmd = calloc(1, sizeof(lrmd_cmd_t));
 303 
 304     crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
 305     cmd->call_opts = call_options;
 306     cmd->client_id = strdup(client->id);
 307 
 308     crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
 309     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
 310     crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
 311     crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
 312     cmd->timeout_orig = cmd->timeout;
 313 
 314     cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
 315     cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
 316     cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
 317     cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 318 
 319     cmd->params = xml2list(rsc_xml);
 320 
 321     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
 322         crm_debug("Setting flag to leave pid group on timeout and "
 323                   "only kill action pid for " PCMK__OP_FMT,
 324                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 325         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 326                                                 LOG_TRACE, "Action",
 327                                                 cmd->action, 0,
 328                                                 SVC_ACTION_LEAVE_GROUP,
 329                                                 "SVC_ACTION_LEAVE_GROUP");
 330     }
 331     return cmd;
 332 }
 333 
 334 static void
 335 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 336 {
 337     if (cmd) {
 338         if (cmd->stonith_recurring_id) {
 339             g_source_remove(cmd->stonith_recurring_id);
 340         }
 341         cmd->stonith_recurring_id = 0;
 342     }
 343 }
 344 
 345 static void
 346 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 347 {
 348     stop_recurring_timer(cmd);
 349     if (cmd->delay_id) {
 350         g_source_remove(cmd->delay_id);
 351     }
 352     if (cmd->params) {
 353         g_hash_table_destroy(cmd->params);
 354     }
 355     pcmk__reset_result(&(cmd->result));
 356     free(cmd->origin);
 357     free(cmd->action);
 358     free(cmd->real_action);
 359     free(cmd->userdata_str);
 360     free(cmd->rsc_id);
 361     free(cmd->client_id);
 362     free(cmd);
 363 }
 364 
 365 static gboolean
 366 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 367 {
 368     lrmd_cmd_t *cmd = data;
 369     lrmd_rsc_t *rsc;
 370 
 371     cmd->stonith_recurring_id = 0;
 372 
 373     if (!cmd->rsc_id) {
 374         return FALSE;
 375     }
 376 
 377     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 378 
 379     CRM_ASSERT(rsc != NULL);
 380     /* take it out of recurring_ops list, and put it in the pending ops
 381      * to be executed */
 382     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 383     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 384 #ifdef PCMK__TIME_USE_CGT
 385     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 386 #endif
 387     mainloop_set_trigger(rsc->work);
 388 
 389     return FALSE;
 390 }
 391 
 392 static inline void
 393 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 394 {
 395     if (cmd && (cmd->interval_ms > 0)) {
 396         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 397                                                   stonith_recurring_op_helper,
 398                                                   cmd);
 399     }
 400 }
 401 
 402 static gboolean
 403 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 404 {
 405     lrmd_cmd_t *cmd = data;
 406     lrmd_rsc_t *rsc = NULL;
 407 
 408     cmd->delay_id = 0;
 409     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 410 
 411     if (rsc) {
 412         mainloop_set_trigger(rsc->work);
 413     }
 414 
 415     return FALSE;
 416 }
 417 
 418 /*!
 419  * \internal
 420  * \brief Check whether a list already contains the equivalent of a given action
 421  */
 422 static lrmd_cmd_t *
 423 find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 424 {
 425     for (GList *item = action_list; item != NULL; item = item->next) {
 426         lrmd_cmd_t *dup = item->data;
 427 
 428         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 429             return dup;
 430         }
 431     }
 432     return NULL;
 433 }
 434 
 435 static bool
 436 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 437 {
 438     lrmd_cmd_t * dup = NULL;
 439     bool dup_pending = true;
 440 
 441     if (cmd->interval_ms == 0) {
 442         return false;
 443     }
 444 
 445     // Search for a duplicate of this action (in-flight or not)
 446     dup = find_duplicate_action(rsc->pending_ops, cmd);
 447     if (dup == NULL) {
 448         dup_pending = false;
 449         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 450         if (dup == NULL) {
 451             return false;
 452         }
 453     }
 454 
 455     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 456      * the cancellation separately.
 457      */
 458     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 459                      pcmk__str_casei)
 460         && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
 461         return false;
 462     }
 463 
 464     /* This should not occur. If it does, we need to investigate how something
 465      * like this is possible in the controller.
 466      */
 467     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 468              "), merging with previous op entry",
 469              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 470              dup->interval_ms);
 471 
 472     // Merge new action's call ID and user data into existing action
 473     dup->first_notify_sent = false;
 474     free(dup->userdata_str);
 475     dup->userdata_str = cmd->userdata_str;
 476     cmd->userdata_str = NULL;
 477     dup->call_id = cmd->call_id;
 478     free_lrmd_cmd(cmd);
 479     cmd = NULL;
 480 
 481     /* If dup is not pending, that means it has already executed at least once
 482      * and is waiting in the interval. In that case, stop waiting and initiate
 483      * a new instance now.
 484      */
 485     if (!dup_pending) {
 486         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 487                          pcmk__str_casei)) {
 488             stop_recurring_timer(dup);
 489             stonith_recurring_op_helper(dup);
 490         } else {
 491             services_action_kick(rsc->rsc_id,
 492                                  normalize_action_name(rsc, dup->action),
 493                                  dup->interval_ms);
 494         }
 495     }
 496     return true;
 497 }
 498 
 499 static void
 500 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502     CRM_CHECK(cmd != NULL, return);
 503     CRM_CHECK(rsc != NULL, return);
 504 
 505     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 506 
 507     if (merge_recurring_duplicate(rsc, cmd)) {
 508         // Equivalent of cmd has already been scheduled
 509         return;
 510     }
 511 
 512     /* The controller expects the executor to automatically cancel
 513      * recurring operations before a resource stops.
 514      */
 515     if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 516         cancel_all_recurring(rsc, NULL);
 517     }
 518 
 519     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 520 #ifdef PCMK__TIME_USE_CGT
 521     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 522 #endif
 523     mainloop_set_trigger(rsc->work);
 524 
 525     if (cmd->start_delay) {
 526         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 527     }
 528 }
 529 
 530 static xmlNode *
 531 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 532 {
 533     xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
 534 
 535     crm_xml_add(reply, F_LRMD_ORIGIN, origin);
 536     crm_xml_add_int(reply, F_LRMD_RC, rc);
 537     crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
 538     return reply;
 539 }
 540 
 541 static void
 542 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 543 {
 544     xmlNode *update_msg = user_data;
 545     pcmk__client_t *client = value;
 546     int rc;
 547     int log_level = LOG_WARNING;
 548     const char *msg = NULL;
 549 
 550     CRM_CHECK(client != NULL, return);
 551     if (client->name == NULL) {
 552         crm_trace("Skipping notification to client without name");
 553         return;
 554     }
 555     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 556         /* We only want to notify clients of the executor IPC API. If we are
 557          * running as Pacemaker Remote, we may have clients proxied to other
 558          * IPC services in the cluster, so skip those.
 559          */
 560         crm_trace("Skipping executor API notification to client %s",
 561                   pcmk__client_name(client));
 562         return;
 563     }
 564 
 565     rc = lrmd_server_send_notify(client, update_msg);
 566     if (rc == pcmk_rc_ok) {
 567         return;
 568     }
 569 
 570     switch (rc) {
 571         case ENOTCONN:
 572         case EPIPE: // Client exited without waiting for notification
 573             log_level = LOG_INFO;
 574             msg = "Disconnected";
 575             break;
 576 
 577         default:
 578             msg = pcmk_rc_str(rc);
 579             break;
 580     }
 581     do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
 582                pcmk__client_name(client), msg, rc);
 583 }
 584 
 585 static void
 586 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 587 {
 588     xmlNode *notify = NULL;
 589     int exec_time = 0;
 590     int queue_time = 0;
 591 
 592 #ifdef PCMK__TIME_USE_CGT
 593     exec_time = time_diff_ms(NULL, &(cmd->t_run));
 594     queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 595 #endif
 596     log_finished(cmd, exec_time, queue_time);
 597 
 598     /* if the first notify result for a cmd has already been sent earlier, and the
 599      * the option to only send notifies on result changes is set. Check to see
 600      * if the last result is the same as the new one. If so, suppress this update */
 601     if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
 602         if ((cmd->last_notify_rc == cmd->result.exit_status) &&
 603             (cmd->last_notify_op_status == cmd->result.execution_status)) {
 604 
 605             /* only send changes */
 606             return;
 607         }
 608 
 609     }
 610 
 611     cmd->first_notify_sent = true;
 612     cmd->last_notify_rc = cmd->result.exit_status;
 613     cmd->last_notify_op_status = cmd->result.execution_status;
 614 
 615     notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 616 
 617     crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 618     crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
 619     crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
 620     crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
 621     crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
 622     crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
 623     crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
 624     crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
 625 
 626     crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
 627                    (long long) cmd->epoch_last_run);
 628     crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
 629                    (long long) cmd->epoch_rcchange);
 630 #ifdef PCMK__TIME_USE_CGT
 631     crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
 632     crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
 633 #endif
 634 
 635     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
 636     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
 637     if(cmd->real_action) {
 638         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
 639     } else {
 640         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
 641     }
 642     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 643     crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
 644 
 645     if (cmd->result.action_stderr != NULL) {
 646         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
 647 
 648     } else if (cmd->result.action_stdout != NULL) {
 649         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
 650     }
 651 
 652     if (cmd->params) {
 653         char *key = NULL;
 654         char *value = NULL;
 655         GHashTableIter iter;
 656 
 657         xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
 658 
 659         g_hash_table_iter_init(&iter, cmd->params);
 660         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 661             hash2smartfield((gpointer) key, (gpointer) value, args);
 662         }
 663     }
 664     if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
 665         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 666 
 667         if (client) {
 668             send_client_notify(client->id, client, notify);
 669         }
 670     } else {
 671         pcmk__foreach_ipc_client(send_client_notify, notify);
 672     }
 673 
 674     free_xml(notify);
 675 }
 676 
 677 static void
 678 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680     if (pcmk__ipc_client_count() != 0) {
 681         int call_id = 0;
 682         xmlNode *notify = NULL;
 683         xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
 684         const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
 685         const char *op = crm_element_value(request, F_LRMD_OPERATION);
 686 
 687         crm_element_value_int(request, F_LRMD_CALLID, &call_id);
 688 
 689         notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 690         crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 691         crm_xml_add_int(notify, F_LRMD_RC, rc);
 692         crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
 693         crm_xml_add(notify, F_LRMD_OPERATION, op);
 694         crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
 695 
 696         pcmk__foreach_ipc_client(send_client_notify, notify);
 697 
 698         free_xml(notify);
 699     }
 700 }
 701 
 702 static void
 703 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 704 {
 705     cmd->last_pid = 0;
 706 #ifdef PCMK__TIME_USE_CGT
 707     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 708     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 709 #endif
 710     cmd->epoch_last_run = 0;
 711 
 712     pcmk__reset_result(&(cmd->result));
 713     cmd->result.execution_status = PCMK_EXEC_DONE;
 714 }
 715 
 716 static void
 717 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 718 {
 719     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 720               rsc ? rsc->active : NULL, cmd);
 721 
 722     if (rsc && (rsc->active == cmd)) {
 723         rsc->active = NULL;
 724         mainloop_set_trigger(rsc->work);
 725     }
 726 
 727     if (!rsc) {
 728         cmd->rsc_deleted = 1;
 729     }
 730 
 731     /* reset original timeout so client notification has correct information */
 732     cmd->timeout = cmd->timeout_orig;
 733 
 734     send_cmd_complete_notify(cmd);
 735 
 736     if ((cmd->interval_ms != 0)
 737         && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
 738 
 739         if (rsc) {
 740             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 741             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 742         }
 743         free_lrmd_cmd(cmd);
 744     } else if (cmd->interval_ms == 0) {
 745         if (rsc) {
 746             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 747         }
 748         free_lrmd_cmd(cmd);
 749     } else {
 750         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 751         cmd_reset(cmd);
 752     }
 753 }
 754 
 755 struct notify_new_client_data {
 756     xmlNode *notify;
 757     pcmk__client_t *new_client;
 758 };
 759 
 760 static void
 761 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 762 {
 763     pcmk__client_t *client = value;
 764     struct notify_new_client_data *data = user_data;
 765 
 766     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 767         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 768     }
 769 }
 770 
 771 void
 772 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 773 {
 774     struct notify_new_client_data data;
 775 
 776     data.new_client = new_client;
 777     data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 778     crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
 779     crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
 780     pcmk__foreach_ipc_client(notify_one_client, &data);
 781     free_xml(data.notify);
 782 }
 783 
 784 void
 785 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 786 {
 787     GHashTableIter iter;
 788     lrmd_rsc_t *rsc = NULL;
 789     char *key = NULL;
 790 
 791     g_hash_table_iter_init(&iter, rsc_list);
 792     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 793         if (rsc->call_opts & lrmd_opt_drop_recurring) {
 794             /* This client is disconnecting, drop any recurring operations
 795              * it may have initiated on the resource */
 796             cancel_all_recurring(rsc, client_id);
 797         }
 798     }
 799 }
 800 
 801 static void
 802 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 803 {
 804     lrmd_rsc_t *rsc;
 805     lrmd_cmd_t *cmd = action->cb_data;
 806     enum ocf_exitcode code;
 807 
 808 #ifdef PCMK__TIME_USE_CGT
 809     const char *rclass = NULL;
 810     bool goagain = false;
 811 #endif
 812 
 813     if (!cmd) {
 814         crm_err("Completed executor action (%s) does not match any known operations",
 815                 action->id);
 816         return;
 817     }
 818 
 819 #ifdef PCMK__TIME_USE_CGT
 820     if (cmd->result.exit_status != action->rc) {
 821         cmd->epoch_rcchange = time(NULL);
 822     }
 823 #endif
 824 
 825     cmd->last_pid = action->pid;
 826 
 827     // Cast variable instead of function return to keep compilers happy
 828     code = services_result2ocf(action->standard, cmd->action, action->rc);
 829     pcmk__set_result(&(cmd->result), (int) code,
 830                      action->status, services__exit_reason(action));
 831 
 832     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 833 
 834 #ifdef PCMK__TIME_USE_CGT
 835     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
 836         rclass = resources_find_service_class(rsc->type);
 837     } else if(rsc) {
 838         rclass = rsc->class;
 839     }
 840 
 841     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 842         if (pcmk__result_ok(&(cmd->result))
 843             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
 844             /* systemd returns from start and stop actions after the action
 845              * begins, not after it completes. We have to jump through a few
 846              * hoops so that we don't report 'complete' to the rest of pacemaker
 847              * until it's actually done.
 848              */
 849             goagain = true;
 850             cmd->real_action = cmd->action;
 851             cmd->action = strdup("monitor");
 852 
 853         } else if (cmd->real_action != NULL) {
 854             // This is follow-up monitor to check whether start/stop completed
 855             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 856                 goagain = true;
 857 
 858             } else if (pcmk__result_ok(&(cmd->result))
 859                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 860                 goagain = true;
 861 
 862             } else {
 863                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 864                 int timeout_left = cmd->timeout_orig - time_sum;
 865 
 866                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 867                           "remaining=%dms): %s (%d)",
 868                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 869                           services_ocf_exitcode_str(cmd->result.exit_status),
 870                           cmd->result.exit_status);
 871                 cmd_original_times(cmd);
 872 
 873                 // Monitors may return "not running", but start/stop shouldn't
 874                 if ((cmd->result.execution_status == PCMK_EXEC_DONE)
 875                     && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
 876 
 877                     if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
 878                         cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
 879                     } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 880                         cmd->result.exit_status = PCMK_OCF_OK;
 881                     }
 882                 }
 883             }
 884         }
 885     }
 886 #endif
 887 
 888 #if SUPPORT_NAGIOS
 889     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 890         if (action_matches(cmd, "monitor", 0)
 891             && pcmk__result_ok(&(cmd->result))) {
 892             /* Successfully executed --version for the nagios plugin */
 893             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
 894 
 895         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
 896                    && !pcmk__result_ok(&(cmd->result))) {
 897 #ifdef PCMK__TIME_USE_CGT
 898             goagain = true;
 899 #endif
 900         }
 901     }
 902 #endif
 903 
 904 #ifdef PCMK__TIME_USE_CGT
 905     if (goagain) {
 906         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 907         int timeout_left = cmd->timeout_orig - time_sum;
 908         int delay = cmd->timeout_orig / 10;
 909 
 910         if(delay >= timeout_left && timeout_left > 20) {
 911             delay = timeout_left/2;
 912         }
 913 
 914         delay = QB_MIN(2000, delay);
 915         if (delay < timeout_left) {
 916             cmd->start_delay = delay;
 917             cmd->timeout = timeout_left;
 918 
 919             if (pcmk__result_ok(&(cmd->result))) {
 920                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 921                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
 922 
 923             } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 924                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 925                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
 926 
 927             } else {
 928                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 929                            cmd->rsc_id, cmd->action,
 930                            services_ocf_exitcode_str(cmd->result.exit_status),
 931                            cmd->result.exit_status, time_sum, timeout_left,
 932                            delay);
 933             }
 934 
 935             cmd_reset(cmd);
 936             if(rsc) {
 937                 rsc->active = NULL;
 938             }
 939             schedule_lrmd_cmd(rsc, cmd);
 940 
 941             /* Don't finalize cmd, we're not done with it yet */
 942             return;
 943 
 944         } else {
 945             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
 946                        cmd->rsc_id,
 947                        (cmd->real_action? cmd->real_action : cmd->action),
 948                        cmd->result.exit_status, time_sum, timeout_left);
 949             pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 950                              PCMK_EXEC_TIMEOUT,
 951                              "Investigate reason for timeout, and adjust "
 952                              "configured operation timeout if necessary");
 953             cmd_original_times(cmd);
 954         }
 955     }
 956 #endif
 957 
 958     pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
 959                             services__grab_stderr(action));
 960     cmd_finalize(cmd, rsc);
 961 }
 962 
 963 /*!
 964  * \internal
 965  * \brief Process the result of a fence device action (start, stop, or monitor)
 966  *
 967  * \param[in] cmd               Fence device action that completed
 968  * \param[in] exit_status       Fencer API exit status for action
 969  * \param[in] execution_status  Fencer API execution status for action
 970  * \param[in] exit_reason       Human-friendly detail, if action failed
 971  */
 972 static void
 973 stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
 974                         enum pcmk_exec_status execution_status,
 975                         const char *exit_reason)
 976 {
 977     // This can be NULL if resource was removed before command completed
 978     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 979 
 980     // Simplify fencer exit status to uniform exit status
 981     if (exit_status != CRM_EX_OK) {
 982         exit_status = PCMK_OCF_UNKNOWN_ERROR;
 983     }
 984 
 985     if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
 986         /* An in-flight fence action was cancelled. The execution status is
 987          * already correct, so don't overwrite it.
 988          */
 989         execution_status = PCMK_EXEC_CANCELLED;
 990 
 991     } else {
 992         /* Some execution status codes have specific meanings for the fencer
 993          * that executor clients may not expect, so map them to a simple error
 994          * status.
 995          */
 996         switch (execution_status) {
 997             case PCMK_EXEC_NOT_CONNECTED:
 998             case PCMK_EXEC_INVALID:
 999                 execution_status = PCMK_EXEC_ERROR;
1000                 break;
1001 
1002             case PCMK_EXEC_NO_FENCE_DEVICE:
1003                 /* This should be possible only for probes in practice, but
1004                  * interpret for all actions to be safe.
1005                  */
1006                 if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS,
1007                                  pcmk__str_none)) {
1008                     exit_status = PCMK_OCF_NOT_RUNNING;
1009 
1010                 } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP,
1011                                         pcmk__str_none)) {
1012                     exit_status = PCMK_OCF_OK;
1013 
1014                 } else {
1015                     exit_status = PCMK_OCF_NOT_INSTALLED;
1016                 }
1017                 execution_status = PCMK_EXEC_ERROR;
1018                 break;
1019 
1020             case PCMK_EXEC_NOT_SUPPORTED:
1021                 exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1022                 break;
1023 
1024             default:
1025                 break;
1026         }
1027     }
1028 
1029     pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1030 
1031     // Certain successful actions change the known state of the resource
1032     if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1033 
1034         if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1035             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1036                              PCMK_EXEC_DONE, NULL); // "running"
1037 
1038         } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1039             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1040                              PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1041         }
1042     }
1043 
1044     /* The recurring timer should not be running at this point in any case, but
1045      * as a failsafe, stop it if it is.
1046      */
1047     stop_recurring_timer(cmd);
1048 
1049     /* Reschedule this command if appropriate. If a recurring command is *not*
1050      * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1051      * not be removed from recurring_ops by cmd_finalize().
1052      */
1053     if (rsc && (cmd->interval_ms > 0)
1054         && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1055         start_recurring_timer(cmd);
1056     }
1057 
1058     cmd_finalize(cmd, rsc);
1059 }
1060 
1061 static void
1062 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1063 {
1064     if ((data == NULL) || (data->userdata == NULL)) {
1065         crm_err("Ignoring fence action result: "
1066                 "Invalid callback arguments (bug?)");
1067     } else {
1068         stonith_action_complete((lrmd_cmd_t *) data->userdata,
1069                                 stonith__exit_status(data),
1070                                 stonith__execution_status(data),
1071                                 stonith__exit_reason(data));
1072     }
1073 }
1074 
1075 void
1076 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1077 {
1078     GHashTableIter iter;
1079     GList *cmd_list = NULL;
1080     GList *cmd_iter = NULL;
1081     lrmd_rsc_t *rsc = NULL;
1082     char *key = NULL;
1083 
1084     g_hash_table_iter_init(&iter, rsc_list);
1085     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
1086         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1087             /* If we registered this fence device, we don't know whether the
1088              * fencer still has the registration or not. Cause future probes to
1089              * return an error until the resource is stopped or started
1090              * successfully. This is especially important if the controller also
1091              * went away (possibly due to a cluster layer restart) and won't
1092              * receive our client notification of any monitors finalized below.
1093              */
1094             if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1095                 pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1096                                  PCMK_EXEC_NOT_CONNECTED,
1097                                  "Lost connection to fencer");
1098             }
1099 
1100             if (rsc->active) {
1101                 cmd_list = g_list_append(cmd_list, rsc->active);
1102             }
1103             if (rsc->recurring_ops) {
1104                 cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
1105             }
1106             if (rsc->pending_ops) {
1107                 cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
1108             }
1109             rsc->pending_ops = rsc->recurring_ops = NULL;
1110         }
1111     }
1112 
1113     if (!cmd_list) {
1114         return;
1115     }
1116 
1117     crm_err("Connection to fencer failed, finalizing %d pending operations",
1118             g_list_length(cmd_list));
1119     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1120         stonith_action_complete((lrmd_cmd_t *) cmd_iter->data,
1121                                 CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1122                                 "Lost connection to fencer");
1123     }
1124     g_list_free(cmd_list);
1125 }
1126 
1127 /*!
1128  * \internal
1129  * \brief Execute a stonith resource "start" action
1130  *
1131  * Start a stonith resource by registering it with the fencer.
1132  * (Stonith agents don't have a start command.)
1133  *
1134  * \param[in] stonith_api  Connection to fencer
1135  * \param[in] rsc          Stonith resource to start
1136  * \param[in] cmd          Start command to execute
1137  *
1138  * \return pcmk_ok on success, -errno otherwise
1139  */
1140 static int
1141 execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1142 {
1143     char *key = NULL;
1144     char *value = NULL;
1145     stonith_key_value_t *device_params = NULL;
1146     int rc = pcmk_ok;
1147 
1148     // Convert command parameters to stonith API key/values
1149     if (cmd->params) {
1150         GHashTableIter iter;
1151 
1152         g_hash_table_iter_init(&iter, cmd->params);
1153         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1154             device_params = stonith_key_value_add(device_params, key, value);
1155         }
1156     }
1157 
1158     /* The fencer will automatically register devices via CIB notifications
1159      * when the CIB changes, but to avoid a possible race condition between
1160      * the fencer receiving the notification and the executor requesting that
1161      * resource, the executor registers the device as well. The fencer knows how
1162      * to handle duplicate registrations.
1163      */
1164     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1165                                             cmd->rsc_id, rsc->provider,
1166                                             rsc->type, device_params);
1167 
1168     stonith_key_value_freeall(device_params, 1, 1);
1169     return rc;
1170 }
1171 
1172 /*!
1173  * \internal
1174  * \brief Execute a stonith resource "stop" action
1175  *
1176  * Stop a stonith resource by unregistering it with the fencer.
1177  * (Stonith agents don't have a stop command.)
1178  *
1179  * \param[in] stonith_api  Connection to fencer
1180  * \param[in] rsc          Stonith resource to stop
1181  *
1182  * \return pcmk_ok on success, -errno otherwise
1183  */
1184 static inline int
1185 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1186 {
1187     /* @TODO Failure would indicate a problem communicating with fencer;
1188      * perhaps we should try reconnecting and retrying a few times?
1189      */
1190     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1191                                             rsc->rsc_id);
1192 }
1193 
1194 /*!
1195  * \internal
1196  * \brief Initiate a stonith resource agent recurring "monitor" action
1197  *
1198  * \param[in] stonith_api  Connection to fencer
1199  * \param[in] rsc          Stonith resource to monitor
1200  * \param[in] cmd          Monitor command being executed
1201  *
1202  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1203  */
1204 static inline int
1205 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1206 {
1207     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1208                                         cmd->timeout / 1000);
1209 
1210     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1211                                               "lrmd_stonith_callback",
1212                                               lrmd_stonith_callback);
1213     if (rc == TRUE) {
1214         rsc->active = cmd;
1215         rc = pcmk_ok;
1216     } else {
1217         rc = -pcmk_err_generic;
1218     }
1219     return rc;
1220 }
1221 
1222 static void
1223 lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1224 {
1225     int rc = 0;
1226     bool do_monitor = FALSE;
1227 
1228     stonith_t *stonith_api = get_stonith_connection();
1229 
1230     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)
1231         && (cmd->interval_ms == 0)) {
1232         // Probes don't require a fencer connection
1233         stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
1234                                 rsc->fence_probe_result.execution_status,
1235                                 rsc->fence_probe_result.exit_reason);
1236         return;
1237 
1238     } else if (stonith_api == NULL) {
1239         stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1240                                 PCMK_EXEC_NOT_CONNECTED,
1241                                 "No connection to fencer");
1242         return;
1243 
1244     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1245         rc = execd_stonith_start(stonith_api, rsc, cmd);
1246         if (rc == pcmk_ok) {
1247             do_monitor = TRUE;
1248         }
1249 
1250     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1251         rc = execd_stonith_stop(stonith_api, rsc);
1252 
1253     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1254         do_monitor = TRUE;
1255 
1256     } else {
1257         stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1258                                 PCMK_EXEC_ERROR,
1259                                 "Invalid fence device action (bug?)");
1260         return;
1261     }
1262 
1263     if (do_monitor) {
1264         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1265         if (rc == pcmk_ok) {
1266             // Don't clean up yet, we will find out result of the monitor later
1267             return;
1268         }
1269     }
1270 
1271     stonith_action_complete(cmd,
1272                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
1273                             stonith__legacy2status(rc),
1274                             ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
1275 }
1276 
1277 static int
1278 lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1279 {
1280     svc_action_t *action = NULL;
1281     GHashTable *params_copy = NULL;
1282 
1283     CRM_ASSERT(rsc);
1284     CRM_ASSERT(cmd);
1285 
1286     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1287               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1288 
1289 #if SUPPORT_NAGIOS
1290     /* Recurring operations are cancelled anyway for a stop operation */
1291     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1292         && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1293 
1294         cmd->result.exit_status = PCMK_OCF_OK;
1295         goto exec_done;
1296     }
1297 #endif
1298 
1299     params_copy = pcmk__str_table_dup(cmd->params);
1300 
1301     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1302                                      rsc->type,
1303                                      normalize_action_name(rsc, cmd->action),
1304                                      cmd->interval_ms, cmd->timeout,
1305                                      params_copy, cmd->service_flags);
1306 
1307     if (action == NULL) {
1308         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1309                          PCMK_EXEC_ERROR, strerror(ENOMEM));
1310         goto exec_done;
1311     }
1312 
1313     if (action->rc != PCMK_OCF_UNKNOWN) {
1314         pcmk__set_result(&(cmd->result), action->rc, action->status,
1315                          services__exit_reason(action));
1316         services_action_free(action);
1317         goto exec_done;
1318     }
1319 
1320     action->cb_data = cmd;
1321 
1322     if (services_action_async(action, action_complete)) {
1323         /* When services_action_async() returns TRUE, the callback might have
1324          * been called -- in this case action_complete(), which might free cmd,
1325          * so cmd cannot be used here.
1326          */
1327         return TRUE;
1328     }
1329 
1330     pcmk__set_result(&(cmd->result), action->rc, action->status,
1331                      services__exit_reason(action));
1332     services_action_free(action);
1333     action = NULL;
1334 
1335   exec_done:
1336     cmd_finalize(cmd, rsc);
1337     return TRUE;
1338 }
1339 
1340 static gboolean
1341 lrmd_rsc_execute(lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1342 {
1343     lrmd_cmd_t *cmd = NULL;
1344 
1345     CRM_CHECK(rsc != NULL, return FALSE);
1346 
1347     if (rsc->active) {
1348         crm_trace("%s is still active", rsc->rsc_id);
1349         return TRUE;
1350     }
1351 
1352     if (rsc->pending_ops) {
1353         GList *first = rsc->pending_ops;
1354 
1355         cmd = first->data;
1356         if (cmd->delay_id) {
1357             crm_trace
1358                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1359                  cmd->rsc_id, cmd->action, cmd->start_delay);
1360             return TRUE;
1361         }
1362         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1363         g_list_free_1(first);
1364 
1365 #ifdef PCMK__TIME_USE_CGT
1366         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1367 #endif
1368         cmd->epoch_last_run = time(NULL);
1369     }
1370 
1371     if (!cmd) {
1372         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1373         return TRUE;
1374     }
1375 
1376     rsc->active = cmd;          /* only one op at a time for a rsc */
1377     if (cmd->interval_ms) {
1378         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1379     }
1380 
1381     log_execute(cmd);
1382 
1383     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1384         lrmd_rsc_execute_stonith(rsc, cmd);
1385     } else {
1386         lrmd_rsc_execute_service_lib(rsc, cmd);
1387     }
1388 
1389     return TRUE;
1390 }
1391 
1392 static gboolean
1393 lrmd_rsc_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1394 {
1395     return lrmd_rsc_execute(user_data);
1396 }
1397 
1398 void
1399 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1400 {
1401     GList *gIter = NULL;
1402     lrmd_rsc_t *rsc = data;
1403     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1404                                   pcmk__str_casei);
1405 
1406     gIter = rsc->pending_ops;
1407     while (gIter != NULL) {
1408         GList *next = gIter->next;
1409         lrmd_cmd_t *cmd = gIter->data;
1410 
1411         /* command was never executed */
1412         cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1413         cmd_finalize(cmd, NULL);
1414 
1415         gIter = next;
1416     }
1417     /* frees list, but not list elements. */
1418     g_list_free(rsc->pending_ops);
1419 
1420     gIter = rsc->recurring_ops;
1421     while (gIter != NULL) {
1422         GList *next = gIter->next;
1423         lrmd_cmd_t *cmd = gIter->data;
1424 
1425         if (is_stonith) {
1426             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1427             /* If a stonith command is in-flight, just mark it as cancelled;
1428              * it is not safe to finalize/free the cmd until the stonith api
1429              * says it has either completed or timed out.
1430              */
1431             if (rsc->active != cmd) {
1432                 cmd_finalize(cmd, NULL);
1433             }
1434         } else {
1435             /* This command is already handed off to service library,
1436              * let service library cancel it and tell us via the callback
1437              * when it is cancelled. The rsc can be safely destroyed
1438              * even if we are waiting for the cancel result */
1439             services_action_cancel(rsc->rsc_id,
1440                                    normalize_action_name(rsc, cmd->action),
1441                                    cmd->interval_ms);
1442         }
1443 
1444         gIter = next;
1445     }
1446     /* frees list, but not list elements. */
1447     g_list_free(rsc->recurring_ops);
1448 
1449     free(rsc->rsc_id);
1450     free(rsc->class);
1451     free(rsc->provider);
1452     free(rsc->type);
1453     mainloop_destroy_trigger(rsc->work);
1454 
1455     free(rsc);
1456 }
1457 
1458 static int
1459 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1460                     xmlNode **reply)
1461 {
1462     int rc = pcmk_ok;
1463     const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
1464 
1465     if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
1466         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1467                 LRMD_MIN_PROTOCOL_VERSION, protocol_version);
1468         rc = -EPROTO;
1469     }
1470 
1471     if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) {
1472 #ifdef PCMK__COMPILE_REMOTE
1473         if ((client->remote != NULL) && client->remote->tls_handshake_complete) {
1474             // This is a remote connection from a cluster node's controller
1475             ipc_proxy_add_provider(client);
1476         } else {
1477             rc = -EACCES;
1478         }
1479 #else
1480         rc = -EPROTONOSUPPORT;
1481 #endif
1482     }
1483 
1484     *reply = create_lrmd_reply(__func__, rc, call_id);
1485     crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
1486     crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
1487     crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1488 
1489     return rc;
1490 }
1491 
1492 static int
1493 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1494 {
1495     int rc = pcmk_ok;
1496     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1497     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1498 
1499     if (dup &&
1500         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1501         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1502 
1503         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1504         free_rsc(rsc);
1505         return rc;
1506     }
1507 
1508     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1509     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1510     return rc;
1511 }
1512 
1513 static xmlNode *
1514 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1515 {
1516     int rc = pcmk_ok;
1517     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1518     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1519     xmlNode *reply = NULL;
1520     lrmd_rsc_t *rsc = NULL;
1521 
1522     if (rsc_id == NULL) {
1523         rc = -ENODEV;
1524     } else {
1525         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1526         if (rsc == NULL) {
1527             crm_info("Agent information for '%s' not in cache", rsc_id);
1528             rc = -ENODEV;
1529         }
1530     }
1531 
1532     reply = create_lrmd_reply(__func__, rc, call_id);
1533     if (rsc) {
1534         crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
1535         crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
1536         crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
1537         crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
1538     }
1539     return reply;
1540 }
1541 
1542 static int
1543 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1544                             xmlNode *request)
1545 {
1546     int rc = pcmk_ok;
1547     lrmd_rsc_t *rsc = NULL;
1548     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1549     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1550 
1551     if (!rsc_id) {
1552         return -ENODEV;
1553     }
1554 
1555     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1556     if (rsc == NULL) {
1557         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1558                  rsc_id);
1559         return pcmk_ok;
1560     }
1561 
1562     if (rsc->active) {
1563         /* let the caller know there are still active ops on this rsc to watch for */
1564         crm_trace("Operation (%p) still in progress for unregistered resource %s",
1565                   rsc->active, rsc_id);
1566         rc = -EINPROGRESS;
1567     }
1568 
1569     g_hash_table_remove(rsc_list, rsc_id);
1570 
1571     return rc;
1572 }
1573 
1574 static int
1575 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1576 {
1577     lrmd_rsc_t *rsc = NULL;
1578     lrmd_cmd_t *cmd = NULL;
1579     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1580     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1581     int call_id;
1582 
1583     if (!rsc_id) {
1584         return -EINVAL;
1585     }
1586     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1587         crm_info("Resource '%s' not found (%d active resources)",
1588                  rsc_id, g_hash_table_size(rsc_list));
1589         return -ENODEV;
1590     }
1591 
1592     cmd = create_lrmd_cmd(request, client);
1593     call_id = cmd->call_id;
1594 
1595     /* Don't reference cmd after handing it off to be scheduled.
1596      * The cmd could get merged and freed. */
1597     schedule_lrmd_cmd(rsc, cmd);
1598 
1599     return call_id;
1600 }
1601 
1602 static int
1603 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1604 {
1605     GList *gIter = NULL;
1606     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1607 
1608     /* How to cancel an action.
1609      * 1. Check pending ops list, if it hasn't been handed off
1610      *    to the service library or stonith recurring list remove
1611      *    it there and that will stop it.
1612      * 2. If it isn't in the pending ops list, then it's either a
1613      *    recurring op in the stonith recurring list, or the service
1614      *    library's recurring list.  Stop it there
1615      * 3. If not found in any lists, then this operation has either
1616      *    been executed already and is not a recurring operation, or
1617      *    never existed.
1618      */
1619     if (!rsc) {
1620         return -ENODEV;
1621     }
1622 
1623     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1624         lrmd_cmd_t *cmd = gIter->data;
1625 
1626         if (action_matches(cmd, action, interval_ms)) {
1627             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1628             cmd_finalize(cmd, rsc);
1629             return pcmk_ok;
1630         }
1631     }
1632 
1633     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1634         /* The service library does not handle stonith operations.
1635          * We have to handle recurring stonith operations ourselves. */
1636         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1637             lrmd_cmd_t *cmd = gIter->data;
1638 
1639             if (action_matches(cmd, action, interval_ms)) {
1640                 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1641                 if (rsc->active != cmd) {
1642                     cmd_finalize(cmd, rsc);
1643                 }
1644                 return pcmk_ok;
1645             }
1646         }
1647     } else if (services_action_cancel(rsc_id,
1648                                       normalize_action_name(rsc, action),
1649                                       interval_ms) == TRUE) {
1650         /* The service library will tell the action_complete callback function
1651          * this action was cancelled, which will destroy the cmd and remove
1652          * it from the recurring_op list. Do not do that in this function
1653          * if the service library says it cancelled it. */
1654         return pcmk_ok;
1655     }
1656 
1657     return -EOPNOTSUPP;
1658 }
1659 
1660 static void
1661 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1662 {
1663     GList *cmd_list = NULL;
1664     GList *cmd_iter = NULL;
1665 
1666     /* Notice a copy of each list is created when concat is called.
1667      * This prevents odd behavior from occurring when the cmd_list
1668      * is iterated through later on.  It is possible the cancel_op
1669      * function may end up modifying the recurring_ops and pending_ops
1670      * lists.  If we did not copy those lists, our cmd_list iteration
1671      * could get messed up.*/
1672     if (rsc->recurring_ops) {
1673         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1674     }
1675     if (rsc->pending_ops) {
1676         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1677     }
1678     if (!cmd_list) {
1679         return;
1680     }
1681 
1682     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1683         lrmd_cmd_t *cmd = cmd_iter->data;
1684 
1685         if (cmd->interval_ms == 0) {
1686             continue;
1687         }
1688 
1689         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1690             continue;
1691         }
1692 
1693         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1694     }
1695     /* frees only the copied list data, not the cmds */
1696     g_list_free(cmd_list);
1697 }
1698 
1699 static int
1700 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1701 {
1702     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1703     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1704     const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
1705     guint interval_ms = 0;
1706 
1707     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
1708 
1709     if (!rsc_id || !action) {
1710         return -EINVAL;
1711     }
1712 
1713     return cancel_op(rsc_id, action, interval_ms);
1714 }
1715 
1716 static void
1717 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1718 {
1719     xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
1720 
1721     crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
1722     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1723         lrmd_cmd_t *cmd = item->data;
1724         xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
1725 
1726         crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
1727                     (cmd->real_action? cmd->real_action : cmd->action));
1728         crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
1729         crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
1730     }
1731 }
1732 
1733 static xmlNode *
1734 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1735 {
1736     int rc = pcmk_ok;
1737     const char *rsc_id = NULL;
1738     lrmd_rsc_t *rsc = NULL;
1739     xmlNode *reply = NULL;
1740     xmlNode *rsc_xml = NULL;
1741 
1742     // Resource ID is optional
1743     rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
1744     if (rsc_xml) {
1745         rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
1746     }
1747     if (rsc_xml) {
1748         rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1749     }
1750 
1751     // If resource ID is specified, resource must exist
1752     if (rsc_id != NULL) {
1753         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1754         if (rsc == NULL) {
1755             crm_info("Resource '%s' not found (%d active resources)",
1756                      rsc_id, g_hash_table_size(rsc_list));
1757             rc = -ENODEV;
1758         }
1759     }
1760 
1761     reply = create_lrmd_reply(__func__, rc, call_id);
1762 
1763     // If resource ID is not specified, check all resources
1764     if (rsc_id == NULL) {
1765         GHashTableIter iter;
1766         char *key = NULL;
1767 
1768         g_hash_table_iter_init(&iter, rsc_list);
1769         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1770                                       (gpointer *) &rsc)) {
1771             add_recurring_op_xml(reply, rsc);
1772         }
1773     } else if (rsc) {
1774         add_recurring_op_xml(reply, rsc);
1775     }
1776     return reply;
1777 }
1778 
1779 void
1780 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1781 {
1782     int rc = pcmk_ok;
1783     int call_id = 0;
1784     const char *op = crm_element_value(request, F_LRMD_OPERATION);
1785     int do_reply = 0;
1786     int do_notify = 0;
1787     xmlNode *reply = NULL;
1788 
1789     /* Certain IPC commands may be done only by privileged users (i.e. root or
1790      * hacluster), because they would otherwise provide a means of bypassing
1791      * ACLs.
1792      */
1793     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1794 
1795     crm_trace("Processing %s operation from %s", op, client->id);
1796     crm_element_value_int(request, F_LRMD_CALLID, &call_id);
1797 
1798     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1799 #ifdef PCMK__COMPILE_REMOTE
1800         if (allowed) {
1801             ipc_proxy_forward_client(client, request);
1802         } else {
1803             rc = -EACCES;
1804         }
1805 #else
1806         rc = -EPROTONOSUPPORT;
1807 #endif
1808         do_reply = 1;
1809     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1810         rc = process_lrmd_signon(client, request, call_id, &reply);
1811         do_reply = 1;
1812     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1813         if (allowed) {
1814             rc = process_lrmd_rsc_register(client, id, request);
1815             do_notify = 1;
1816         } else {
1817             rc = -EACCES;
1818         }
1819         do_reply = 1;
1820     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1821         if (allowed) {
1822             reply = process_lrmd_get_rsc_info(request, call_id);
1823         } else {
1824             rc = -EACCES;
1825         }
1826         do_reply = 1;
1827     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1828         if (allowed) {
1829             rc = process_lrmd_rsc_unregister(client, id, request);
1830             /* don't notify anyone about failed un-registers */
1831             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1832                 do_notify = 1;
1833             }
1834         } else {
1835             rc = -EACCES;
1836         }
1837         do_reply = 1;
1838     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1839         if (allowed) {
1840             rc = process_lrmd_rsc_exec(client, id, request);
1841         } else {
1842             rc = -EACCES;
1843         }
1844         do_reply = 1;
1845     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1846         if (allowed) {
1847             rc = process_lrmd_rsc_cancel(client, id, request);
1848         } else {
1849             rc = -EACCES;
1850         }
1851         do_reply = 1;
1852     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1853         do_notify = 1;
1854         do_reply = 1;
1855     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1856         if (allowed) {
1857             xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
1858 
1859             CRM_LOG_ASSERT(data != NULL);
1860             pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
1861         } else {
1862             rc = -EACCES;
1863         }
1864     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1865         if (allowed) {
1866             rc = process_lrmd_alert_exec(client, id, request);
1867         } else {
1868             rc = -EACCES;
1869         }
1870         do_reply = 1;
1871     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1872         if (allowed) {
1873             reply = process_lrmd_get_recurring(request, call_id);
1874         } else {
1875             rc = -EACCES;
1876         }
1877         do_reply = 1;
1878     } else {
1879         rc = -EOPNOTSUPP;
1880         do_reply = 1;
1881         crm_err("Unknown IPC request '%s' from client %s",
1882                 op, pcmk__client_name(client));
1883     }
1884 
1885     if (rc == -EACCES) {
1886         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1887                  op, pcmk__client_name(client));
1888     }
1889 
1890     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1891               op, client->id, rc, do_reply, do_notify);
1892 
1893     if (do_reply) {
1894         int send_rc = pcmk_rc_ok;
1895 
1896         if (reply == NULL) {
1897             reply = create_lrmd_reply(__func__, rc, call_id);
1898         }
1899         send_rc = lrmd_server_send_reply(client, id, reply);
1900         free_xml(reply);
1901         if (send_rc != pcmk_rc_ok) {
1902             crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
1903                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
1904         }
1905     }
1906 
1907     if (do_notify) {
1908         send_generic_notify(rc, request);
1909     }
1910 }

/* [previous][next][first][last][top][bottom][index][help] */