root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. notify_one_client
  26. notify_of_new_client
  27. client_disconnect_cleanup
  28. action_complete
  29. stonith_action_complete
  30. lrmd_stonith_callback
  31. stonith_connection_failed
  32. execd_stonith_start
  33. execd_stonith_stop
  34. execd_stonith_monitor
  35. execute_stonith_action
  36. execute_nonstonith_action
  37. execute_resource_action
  38. free_rsc
  39. process_lrmd_signon
  40. process_lrmd_rsc_register
  41. process_lrmd_get_rsc_info
  42. process_lrmd_rsc_unregister
  43. process_lrmd_rsc_exec
  44. cancel_op
  45. cancel_all_recurring
  46. process_lrmd_rsc_cancel
  47. add_recurring_op_xml
  48. process_lrmd_get_recurring
  49. process_lrmd_message

   1 /*
   2  * Copyright 2012-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/fencing/internal.h>
  12 
  13 #include <glib.h>
  14 #include <libxml/tree.h>                // xmlNode
  15 
  16 // Check whether we have a high-resolution monotonic clock
  17 #undef PCMK__TIME_USE_CGT
  18 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  19 #  define PCMK__TIME_USE_CGT
  20 #  include <time.h>  /* clock_gettime */
  21 #endif
  22 
  23 #include <unistd.h>
  24 
  25 #include <crm/crm.h>
  26 #include <crm/fencing/internal.h>
  27 #include <crm/services.h>
  28 #include <crm/services_internal.h>
  29 #include <crm/common/mainloop.h>
  30 #include <crm/common/ipc.h>
  31 #include <crm/common/ipc_internal.h>
  32 #include <crm/common/xml.h>
  33 
  34 #include "pacemaker-execd.h"
  35 
  36 GHashTable *rsc_list = NULL;
  37 
  38 typedef struct lrmd_cmd_s {
  39     int timeout;
  40     guint interval_ms;
  41     int start_delay;
  42     int timeout_orig;
  43 
  44     int call_id;
  45 
  46     int call_opts;
  47     /* Timer ids, must be removed on cmd destruction. */
  48     int delay_id;
  49     int stonith_recurring_id;
  50 
  51     int rsc_deleted;
  52 
  53     int service_flags;
  54 
  55     char *client_id;
  56     char *origin;
  57     char *rsc_id;
  58     char *action;
  59     char *real_action;
  60     char *userdata_str;
  61 
  62     pcmk__action_result_t result;
  63 
  64     /* We can track operation queue time and run time, to be saved with the CIB
  65      * resource history (and displayed in cluster status). We need
  66      * high-resolution monotonic time for this purpose, so we use
  67      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  68      * is disabled).
  69      *
  70      * However, we also need epoch timestamps for recording the time the command
  71      * last ran and the time its return value last changed, for use in time
  72      * displays (as opposed to interval calculations). We keep time_t values for
  73      * this purpose.
  74      *
  75      * The last run time is used for both purposes, so we keep redundant
  76      * monotonic and epoch values for this. Technically the two could represent
  77      * different times, but since time_t has only second resolution and the
  78      * values are used for distinct purposes, that is not significant.
  79      */
  80 #ifdef PCMK__TIME_USE_CGT
  81     /* Recurring and systemd operations may involve more than one executor
  82      * command per operation, so they need info about the original and the most
  83      * recent.
  84      */
  85     struct timespec t_first_run;    // When op first ran
  86     struct timespec t_run;          // When op most recently ran
  87     struct timespec t_first_queue;  // When op was first queued
  88     struct timespec t_queue;        // When op was most recently queued
  89 #endif
  90     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  91     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  92 
  93     bool first_notify_sent;
  94     int last_notify_rc;
  95     int last_notify_op_status;
  96     int last_pid;
  97 
  98     GHashTable *params;
  99 } lrmd_cmd_t;
 100 
 101 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 102 static gboolean execute_resource_action(gpointer user_data);
 103 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 104 
 105 #ifdef PCMK__TIME_USE_CGT
 106 
 107 /*!
 108  * \internal
 109  * \brief Check whether a struct timespec has been set
 110  *
 111  * \param[in] timespec  Time to check
 112  *
 113  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 114  */
 115 static inline bool
 116 time_is_set(const struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118     return (timespec != NULL) &&
 119            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 120 }
 121 
 122 /*
 123  * \internal
 124  * \brief Set a timespec (and its original if unset) to the current time
 125  *
 126  * \param[out] t_current  Where to store current time
 127  * \param[out] t_orig     Where to copy t_current if unset
 128  */
 129 static void
 130 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 131 {
 132     clock_gettime(CLOCK_MONOTONIC, t_current);
 133     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 134         *t_orig = *t_current;
 135     }
 136 }
 137 
 138 /*!
 139  * \internal
 140  * \brief Return difference between two times in milliseconds
 141  *
 142  * \param[in] now  More recent time (or NULL to use current time)
 143  * \param[in] old  Earlier time
 144  *
 145  * \return milliseconds difference (or 0 if old is NULL or unset)
 146  *
 147  * \note Can overflow on 32bit machines when the differences is around
 148  *       24 days or more.
 149  */
 150 static int
 151 time_diff_ms(const struct timespec *now, const struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153     int diff_ms = 0;
 154 
 155     if (time_is_set(old)) {
 156         struct timespec local_now = { 0, };
 157 
 158         if (now == NULL) {
 159             clock_gettime(CLOCK_MONOTONIC, &local_now);
 160             now = &local_now;
 161         }
 162         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 163                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 164     }
 165     return diff_ms;
 166 }
 167 
 168 /*!
 169  * \internal
 170  * \brief Reset a command's operation times to their original values.
 171  *
 172  * Reset a command's run and queued timestamps to the timestamps of the original
 173  * command, so we report the entire time since then and not just the time since
 174  * the most recent command (for recurring and systemd operations).
 175  *
 176  * \param[in,out] cmd  Executor command object to reset
 177  *
 178  * \note It's not obvious what the queued time should be for a systemd
 179  *       start/stop operation, which might go like this:
 180  *         initial command queued 5ms, runs 3s
 181  *         monitor command queued 10ms, runs 10s
 182  *         monitor command queued 10ms, runs 10s
 183  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 184  *       implementation will report 5ms. If it's 25ms, then we need to
 185  *       subtract 20ms from the total exec time so as not to count it twice.
 186  *       We can implement that later if it matters to anyone ...
 187  */
 188 static void
 189 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 190 {
 191     cmd->t_run = cmd->t_first_run;
 192     cmd->t_queue = cmd->t_first_queue;
 193 }
 194 #endif
 195 
 196 static inline bool
 197 action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199     return (cmd->interval_ms == interval_ms)
 200            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 201 }
 202 
 203 /*!
 204  * \internal
 205  * \brief Log the result of an asynchronous command
 206  *
 207  * \param[in] cmd            Command to log result for
 208  * \param[in] exec_time_ms   Execution time in milliseconds, if known
 209  * \param[in] queue_time_ms  Queue time in milliseconds, if known
 210  */
 211 static void
 212 log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 213 {
 214     int log_level = LOG_INFO;
 215     GString *str = g_string_sized_new(100); // reasonable starting size
 216 
 217     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
 218         log_level = LOG_DEBUG;
 219     }
 220 
 221     g_string_append_printf(str, "%s %s (call %d",
 222                            cmd->rsc_id, cmd->action, cmd->call_id);
 223     if (cmd->last_pid != 0) {
 224         g_string_append_printf(str, ", PID %d", cmd->last_pid);
 225     }
 226     switch (cmd->result.execution_status) {
 227         case PCMK_EXEC_DONE:
 228             g_string_append_printf(str, ") exited with status %d",
 229                                    cmd->result.exit_status);
 230             break;
 231         case PCMK_EXEC_CANCELLED:
 232             g_string_append_printf(str, ") cancelled");
 233             break;
 234         default:
 235             pcmk__g_strcat(str, ") could not be executed: ",
 236                            pcmk_exec_status_str(cmd->result.execution_status),
 237                            NULL);
 238             break;
 239     }
 240     if (cmd->result.exit_reason != NULL) {
 241         pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
 242     }
 243 
 244 #ifdef PCMK__TIME_USE_CGT
 245     pcmk__g_strcat(str, " (execution time ",
 246                    pcmk__readable_interval(exec_time_ms), NULL);
 247     if (queue_time_ms > 0) {
 248         pcmk__g_strcat(str, " after being queued ",
 249                        pcmk__readable_interval(queue_time_ms), NULL);
 250     }
 251     g_string_append_c(str, ')');
 252 #endif
 253 
 254     do_crm_log(log_level, "%s", str->str);
 255     g_string_free(str, TRUE);
 256 }
 257 
 258 static void
 259 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 260 {
 261     int log_level = LOG_INFO;
 262 
 263     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
 264         log_level = LOG_DEBUG;
 265     }
 266 
 267     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 268                cmd->rsc_id, cmd->action, cmd->call_id);
 269 }
 270 
 271 static const char *
 272 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274     if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
 275         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 276         return PCMK_ACTION_STATUS;
 277     }
 278     return action;
 279 }
 280 
 281 static lrmd_rsc_t *
 282 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284     xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
 285                                             LOG_ERR);
 286     lrmd_rsc_t *rsc = NULL;
 287 
 288     rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
 289 
 290     crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
 291 
 292     rsc->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 293     rsc->class = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
 294     rsc->provider = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
 295     rsc->type = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
 296     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
 297                                      rsc);
 298 
 299     // Initialize fence device probes (to return "not running")
 300     pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
 301                      PCMK_EXEC_NO_FENCE_DEVICE, NULL);
 302     return rsc;
 303 }
 304 
 305 static lrmd_cmd_t *
 306 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 307 {
 308     int call_options = 0;
 309     xmlNode *rsc_xml = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_LRMD_RSC,
 310                                             LOG_ERR);
 311     lrmd_cmd_t *cmd = NULL;
 312 
 313     cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
 314 
 315     crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
 316     cmd->call_opts = call_options;
 317     cmd->client_id = pcmk__str_copy(client->id);
 318 
 319     crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
 320     crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL,
 321                          &cmd->interval_ms);
 322     crm_element_value_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
 323     crm_element_value_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY,
 324                           &cmd->start_delay);
 325     cmd->timeout_orig = cmd->timeout;
 326 
 327     cmd->origin = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
 328     cmd->action = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
 329     cmd->userdata_str = crm_element_value_copy(rsc_xml,
 330                                                PCMK__XA_LRMD_RSC_USERDATA_STR);
 331     cmd->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 332 
 333     cmd->params = xml2list(rsc_xml);
 334 
 335     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
 336                      PCMK_VALUE_BLOCK, pcmk__str_casei)) {
 337         crm_debug("Setting flag to leave pid group on timeout and "
 338                   "only kill action pid for " PCMK__OP_FMT,
 339                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 340         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 341                                                 LOG_TRACE, "Action",
 342                                                 cmd->action, 0,
 343                                                 SVC_ACTION_LEAVE_GROUP,
 344                                                 "SVC_ACTION_LEAVE_GROUP");
 345     }
 346     return cmd;
 347 }
 348 
 349 static void
 350 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 351 {
 352     if (cmd) {
 353         if (cmd->stonith_recurring_id) {
 354             g_source_remove(cmd->stonith_recurring_id);
 355         }
 356         cmd->stonith_recurring_id = 0;
 357     }
 358 }
 359 
 360 static void
 361 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 362 {
 363     stop_recurring_timer(cmd);
 364     if (cmd->delay_id) {
 365         g_source_remove(cmd->delay_id);
 366     }
 367     if (cmd->params) {
 368         g_hash_table_destroy(cmd->params);
 369     }
 370     pcmk__reset_result(&(cmd->result));
 371     free(cmd->origin);
 372     free(cmd->action);
 373     free(cmd->real_action);
 374     free(cmd->userdata_str);
 375     free(cmd->rsc_id);
 376     free(cmd->client_id);
 377     free(cmd);
 378 }
 379 
 380 static gboolean
 381 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 382 {
 383     lrmd_cmd_t *cmd = data;
 384     lrmd_rsc_t *rsc;
 385 
 386     cmd->stonith_recurring_id = 0;
 387 
 388     if (!cmd->rsc_id) {
 389         return FALSE;
 390     }
 391 
 392     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 393 
 394     pcmk__assert(rsc != NULL);
 395     /* take it out of recurring_ops list, and put it in the pending ops
 396      * to be executed */
 397     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 398     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 399 #ifdef PCMK__TIME_USE_CGT
 400     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 401 #endif
 402     mainloop_set_trigger(rsc->work);
 403 
 404     return FALSE;
 405 }
 406 
 407 static inline void
 408 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 409 {
 410     if (!cmd || (cmd->interval_ms <= 0)) {
 411         return;
 412     }
 413 
 414     cmd->stonith_recurring_id = pcmk__create_timer(cmd->interval_ms,
 415                                                    stonith_recurring_op_helper,
 416                                                    cmd);
 417 }
 418 
 419 static gboolean
 420 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 421 {
 422     lrmd_cmd_t *cmd = data;
 423     lrmd_rsc_t *rsc = NULL;
 424 
 425     cmd->delay_id = 0;
 426     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 427 
 428     if (rsc) {
 429         mainloop_set_trigger(rsc->work);
 430     }
 431 
 432     return FALSE;
 433 }
 434 
 435 /*!
 436  * \internal
 437  * \brief Check whether a list already contains the equivalent of a given action
 438  *
 439  * \param[in] action_list  List to search
 440  * \param[in] cmd          Action to search for
 441  */
 442 static lrmd_cmd_t *
 443 find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 444 {
 445     for (const GList *item = action_list; item != NULL; item = item->next) {
 446         lrmd_cmd_t *dup = item->data;
 447 
 448         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 449             return dup;
 450         }
 451     }
 452     return NULL;
 453 }
 454 
 455 static bool
 456 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 457 {
 458     lrmd_cmd_t * dup = NULL;
 459     bool dup_pending = true;
 460 
 461     if (cmd->interval_ms == 0) {
 462         return false;
 463     }
 464 
 465     // Search for a duplicate of this action (in-flight or not)
 466     dup = find_duplicate_action(rsc->pending_ops, cmd);
 467     if (dup == NULL) {
 468         dup_pending = false;
 469         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 470         if (dup == NULL) {
 471             return false;
 472         }
 473     }
 474 
 475     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 476      * the cancellation separately.
 477      */
 478     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 479                      pcmk__str_casei)
 480         && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
 481         return false;
 482     }
 483 
 484     /* This should not occur. If it does, we need to investigate how something
 485      * like this is possible in the controller.
 486      */
 487     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 488              "), merging with previous op entry",
 489              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 490              dup->interval_ms);
 491 
 492     // Merge new action's call ID and user data into existing action
 493     dup->first_notify_sent = false;
 494     free(dup->userdata_str);
 495     dup->userdata_str = cmd->userdata_str;
 496     cmd->userdata_str = NULL;
 497     dup->call_id = cmd->call_id;
 498     free_lrmd_cmd(cmd);
 499     cmd = NULL;
 500 
 501     /* If dup is not pending, that means it has already executed at least once
 502      * and is waiting in the interval. In that case, stop waiting and initiate
 503      * a new instance now.
 504      */
 505     if (!dup_pending) {
 506         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 507                          pcmk__str_casei)) {
 508             stop_recurring_timer(dup);
 509             stonith_recurring_op_helper(dup);
 510         } else {
 511             services_action_kick(rsc->rsc_id,
 512                                  normalize_action_name(rsc, dup->action),
 513                                  dup->interval_ms);
 514         }
 515     }
 516     return true;
 517 }
 518 
 519 static void
 520 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 521 {
 522     CRM_CHECK(cmd != NULL, return);
 523     CRM_CHECK(rsc != NULL, return);
 524 
 525     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 526 
 527     if (merge_recurring_duplicate(rsc, cmd)) {
 528         // Equivalent of cmd has already been scheduled
 529         return;
 530     }
 531 
 532     /* The controller expects the executor to automatically cancel
 533      * recurring operations before a resource stops.
 534      */
 535     if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
 536         cancel_all_recurring(rsc, NULL);
 537     }
 538 
 539     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 540 #ifdef PCMK__TIME_USE_CGT
 541     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 542 #endif
 543     mainloop_set_trigger(rsc->work);
 544 
 545     if (cmd->start_delay) {
 546         cmd->delay_id = pcmk__create_timer(cmd->start_delay, start_delay_helper, cmd);
 547     }
 548 }
 549 
 550 static xmlNode *
 551 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 552 {
 553     xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
 554 
 555     crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin);
 556     crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc);
 557     crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id);
 558     return reply;
 559 }
 560 
 561 static void
 562 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 563 {
 564     xmlNode *update_msg = user_data;
 565     pcmk__client_t *client = value;
 566     int rc;
 567     int log_level = LOG_WARNING;
 568     const char *msg = NULL;
 569 
 570     CRM_CHECK(client != NULL, return);
 571     if (client->name == NULL) {
 572         crm_trace("Skipping notification to client without name");
 573         return;
 574     }
 575     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 576         /* We only want to notify clients of the executor IPC API. If we are
 577          * running as Pacemaker Remote, we may have clients proxied to other
 578          * IPC services in the cluster, so skip those.
 579          */
 580         crm_trace("Skipping executor API notification to client %s",
 581                   pcmk__client_name(client));
 582         return;
 583     }
 584 
 585     rc = lrmd_server_send_notify(client, update_msg);
 586     if (rc == pcmk_rc_ok) {
 587         return;
 588     }
 589 
 590     switch (rc) {
 591         case ENOTCONN:
 592         case EPIPE: // Client exited without waiting for notification
 593             log_level = LOG_INFO;
 594             msg = "Disconnected";
 595             break;
 596 
 597         default:
 598             msg = pcmk_rc_str(rc);
 599             break;
 600     }
 601     do_crm_log(log_level, "Could not notify client %s: %s " QB_XS " rc=%d",
 602                pcmk__client_name(client), msg, rc);
 603 }
 604 
 605 static void
 606 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 607 {
 608     xmlNode *notify = NULL;
 609     int exec_time = 0;
 610     int queue_time = 0;
 611 
 612 #ifdef PCMK__TIME_USE_CGT
 613     exec_time = time_diff_ms(NULL, &(cmd->t_run));
 614     queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 615 #endif
 616     log_finished(cmd, exec_time, queue_time);
 617 
 618     /* If the originator requested to be notified only for changes in recurring
 619      * operation results, skip the notification if the result hasn't changed.
 620      */
 621     if (cmd->first_notify_sent
 622         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
 623         && (cmd->last_notify_rc == cmd->result.exit_status)
 624         && (cmd->last_notify_op_status == cmd->result.execution_status)) {
 625         return;
 626     }
 627 
 628     cmd->first_notify_sent = true;
 629     cmd->last_notify_rc = cmd->result.exit_status;
 630     cmd->last_notify_op_status = cmd->result.execution_status;
 631 
 632     notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 633 
 634     crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
 635     crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
 636     crm_xml_add_ms(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
 637     crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
 638     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
 639     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
 640                     cmd->result.execution_status);
 641     crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
 642     crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
 643 
 644     crm_xml_add_ll(notify, PCMK__XA_LRMD_RUN_TIME,
 645                    (long long) cmd->epoch_last_run);
 646     crm_xml_add_ll(notify, PCMK__XA_LRMD_RCCHANGE_TIME,
 647                    (long long) cmd->epoch_rcchange);
 648 #ifdef PCMK__TIME_USE_CGT
 649     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
 650     crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
 651 #endif
 652 
 653     crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
 654     crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
 655     if(cmd->real_action) {
 656         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
 657     } else {
 658         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
 659     }
 660     crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 661     crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
 662 
 663     if (cmd->result.action_stderr != NULL) {
 664         crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
 665                     cmd->result.action_stderr);
 666 
 667     } else if (cmd->result.action_stdout != NULL) {
 668         crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
 669                     cmd->result.action_stdout);
 670     }
 671 
 672     if (cmd->params) {
 673         char *key = NULL;
 674         char *value = NULL;
 675         GHashTableIter iter;
 676 
 677         xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
 678 
 679         g_hash_table_iter_init(&iter, cmd->params);
 680         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 681             hash2smartfield((gpointer) key, (gpointer) value, args);
 682         }
 683     }
 684     if ((cmd->client_id != NULL)
 685         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
 686 
 687         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 688 
 689         if (client != NULL) {
 690             send_client_notify(client->id, client, notify);
 691         }
 692     } else {
 693         pcmk__foreach_ipc_client(send_client_notify, notify);
 694     }
 695 
 696     pcmk__xml_free(notify);
 697 }
 698 
 699 static void
 700 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 701 {
 702     if (pcmk__ipc_client_count() != 0) {
 703         int call_id = 0;
 704         xmlNode *notify = NULL;
 705         xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
 706                                                 "//" PCMK__XE_LRMD_RSC,
 707                                                 LOG_ERR);
 708         const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 709         const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
 710 
 711         crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
 712 
 713         notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 714         crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
 715         crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc);
 716         crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id);
 717         crm_xml_add(notify, PCMK__XA_LRMD_OP, op);
 718         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
 719 
 720         pcmk__foreach_ipc_client(send_client_notify, notify);
 721 
 722         pcmk__xml_free(notify);
 723     }
 724 }
 725 
 726 static void
 727 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 728 {
 729     cmd->last_pid = 0;
 730 #ifdef PCMK__TIME_USE_CGT
 731     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 732     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 733 #endif
 734     cmd->epoch_last_run = 0;
 735 
 736     pcmk__reset_result(&(cmd->result));
 737     cmd->result.execution_status = PCMK_EXEC_DONE;
 738 }
 739 
 740 static void
 741 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 742 {
 743     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 744               rsc ? rsc->active : NULL, cmd);
 745 
 746     if (rsc && (rsc->active == cmd)) {
 747         rsc->active = NULL;
 748         mainloop_set_trigger(rsc->work);
 749     }
 750 
 751     if (!rsc) {
 752         cmd->rsc_deleted = 1;
 753     }
 754 
 755     /* reset original timeout so client notification has correct information */
 756     cmd->timeout = cmd->timeout_orig;
 757 
 758     send_cmd_complete_notify(cmd);
 759 
 760     if ((cmd->interval_ms != 0)
 761         && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
 762 
 763         if (rsc) {
 764             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 765             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 766         }
 767         free_lrmd_cmd(cmd);
 768     } else if (cmd->interval_ms == 0) {
 769         if (rsc) {
 770             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 771         }
 772         free_lrmd_cmd(cmd);
 773     } else {
 774         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 775         cmd_reset(cmd);
 776     }
 777 }
 778 
 779 struct notify_new_client_data {
 780     xmlNode *notify;
 781     pcmk__client_t *new_client;
 782 };
 783 
 784 static void
 785 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 786 {
 787     pcmk__client_t *client = value;
 788     struct notify_new_client_data *data = user_data;
 789 
 790     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 791         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 792     }
 793 }
 794 
 795 void
 796 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 797 {
 798     struct notify_new_client_data data;
 799 
 800     data.new_client = new_client;
 801     data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 802     crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
 803     crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
 804     pcmk__foreach_ipc_client(notify_one_client, &data);
 805     pcmk__xml_free(data.notify);
 806 }
 807 
 808 void
 809 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 810 {
 811     GHashTableIter iter;
 812     lrmd_rsc_t *rsc = NULL;
 813     char *key = NULL;
 814 
 815     g_hash_table_iter_init(&iter, rsc_list);
 816     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 817         if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
 818             /* This client is disconnecting, drop any recurring operations
 819              * it may have initiated on the resource */
 820             cancel_all_recurring(rsc, client_id);
 821         }
 822     }
 823 }
 824 
 825 static void
 826 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 827 {
 828     lrmd_rsc_t *rsc;
 829     lrmd_cmd_t *cmd = action->cb_data;
 830     enum ocf_exitcode code;
 831 
 832 #ifdef PCMK__TIME_USE_CGT
 833     const char *rclass = NULL;
 834     bool goagain = false;
 835     int time_sum = 0;
 836     int timeout_left = 0;
 837     int delay = 0;
 838 #endif
 839 
 840     if (!cmd) {
 841         crm_err("Completed executor action (%s) does not match any known operations",
 842                 action->id);
 843         return;
 844     }
 845 
 846 #ifdef PCMK__TIME_USE_CGT
 847     if (cmd->result.exit_status != action->rc) {
 848         cmd->epoch_rcchange = time(NULL);
 849     }
 850 #endif
 851 
 852     cmd->last_pid = action->pid;
 853 
 854     // Cast variable instead of function return to keep compilers happy
 855     code = services_result2ocf(action->standard, cmd->action, action->rc);
 856     pcmk__set_result(&(cmd->result), (int) code,
 857                      action->status, services__exit_reason(action));
 858 
 859     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 860 
 861 #ifdef PCMK__TIME_USE_CGT
 862     if (rsc != NULL) {
 863         rclass = rsc->class;
 864 #if PCMK__ENABLE_SERVICE
 865         if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
 866                          pcmk__str_casei)) {
 867             rclass = resources_find_service_class(rsc->type);
 868         }
 869 #endif
 870     }
 871 
 872     if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 873         goto finalize;
 874     }
 875 
 876     if (pcmk__result_ok(&(cmd->result))
 877         && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
 878                                 PCMK_ACTION_STOP, NULL)) {
 879         /* Getting results for when a start or stop action completes is now
 880          * handled by watching for JobRemoved() signals from systemd and
 881          * reacting to them. So, we can bypass the rest of the code in this
 882          * function for those actions, and simply finalize cmd.
 883          *
 884          * @TODO When monitors are handled in the same way, this function
 885          * can either be drastically simplified or done away with entirely.
 886          */
 887         services__copy_result(action, &(cmd->result));
 888         goto finalize;
 889 
 890     } else if (cmd->result.execution_status == PCMK_EXEC_PENDING &&
 891                pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL) &&
 892                cmd->interval_ms == 0 &&
 893                cmd->real_action == NULL) {
 894         /* If the state is Pending at the time of probe, execute follow-up monitor. */
 895         goagain = true;
 896         cmd->real_action = cmd->action;
 897         cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
 898     } else if (cmd->real_action != NULL) {
 899         // This is follow-up monitor to check whether start/stop/probe(monitor) completed
 900         if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 901             goagain = true;
 902 
 903         } else if (pcmk__result_ok(&(cmd->result))
 904                    && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
 905                                    pcmk__str_casei)) {
 906             goagain = true;
 907 
 908         } else {
 909             int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 910             int timeout_left = cmd->timeout_orig - time_sum;
 911 
 912             crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 913                       "remaining=%dms): %s (%d)",
 914                       cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 915                       crm_exit_str(cmd->result.exit_status),
 916                       cmd->result.exit_status);
 917             cmd_original_times(cmd);
 918 
 919             // Monitors may return "not running", but start/stop shouldn't
 920             if ((cmd->result.execution_status == PCMK_EXEC_DONE)
 921                 && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
 922 
 923                 if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
 924                                  pcmk__str_casei)) {
 925                     cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
 926                 } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
 927                                         pcmk__str_casei)) {
 928                     cmd->result.exit_status = PCMK_OCF_OK;
 929                 }
 930             }
 931         }
 932     } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)
 933                && (cmd->interval_ms > 0)) {
 934         /* For monitors, excluding follow-up monitors,                                  */
 935         /* if the pending state persists from the first notification until its timeout, */
 936         /* it will be treated as a timeout.                                             */
 937 
 938         if ((cmd->result.execution_status == PCMK_EXEC_PENDING) &&
 939             (cmd->last_notify_op_status == PCMK_EXEC_PENDING)) {
 940             int time_left = time(NULL) - (cmd->epoch_rcchange + (cmd->timeout_orig/1000));
 941 
 942             if (time_left >= 0) {
 943                 crm_notice("Giving up on %s %s (rc=%d): monitor pending timeout "
 944                            "(first pending notification=%s timeout=%ds)",
 945                            cmd->rsc_id, cmd->action, cmd->result.exit_status,
 946                            pcmk__trim(ctime(&cmd->epoch_rcchange)), cmd->timeout_orig);
 947                 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 948                                  PCMK_EXEC_TIMEOUT,
 949                                  "Investigate reason for timeout, and adjust "
 950                                  "configured operation timeout if necessary");
 951                 cmd_original_times(cmd);
 952             }
 953         }
 954     }
 955 
 956     if (!goagain) {
 957         goto finalize;
 958     }
 959 
 960     time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 961     timeout_left = cmd->timeout_orig - time_sum;
 962     delay = cmd->timeout_orig / 10;
 963 
 964     if (delay >= timeout_left && timeout_left > 20) {
 965         delay = timeout_left/2;
 966     }
 967 
 968     delay = QB_MIN(2000, delay);
 969     if (delay < timeout_left) {
 970         cmd->start_delay = delay;
 971         cmd->timeout = timeout_left;
 972 
 973         if (pcmk__result_ok(&(cmd->result))) {
 974             crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 975                       cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
 976 
 977         } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 978             crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 979                      cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
 980 
 981         } else {
 982             crm_notice("%s %s failed: %s: Re-scheduling (remaining "
 983                        "timeout %s) " QB_XS
 984                        " exitstatus=%d elapsed=%dms start_delay=%dms)",
 985                        cmd->rsc_id, cmd->action,
 986                        crm_exit_str(cmd->result.exit_status),
 987                        pcmk__readable_interval(timeout_left),
 988                        cmd->result.exit_status, time_sum, delay);
 989         }
 990 
 991         cmd_reset(cmd);
 992         if (rsc) {
 993             rsc->active = NULL;
 994         }
 995         schedule_lrmd_cmd(rsc, cmd);
 996 
 997         /* Don't finalize cmd, we're not done with it yet */
 998         return;
 999 
1000     } else {
1001         crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
1002                    cmd->rsc_id,
1003                    (cmd->real_action? cmd->real_action : cmd->action),
1004                    cmd->result.exit_status, time_sum, timeout_left);
1005         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1006                          PCMK_EXEC_TIMEOUT,
1007                          "Investigate reason for timeout, and adjust "
1008                          "configured operation timeout if necessary");
1009         cmd_original_times(cmd);
1010     }
1011 #endif
1012 
1013 finalize:
1014     pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
1015                             services__grab_stderr(action));
1016     cmd_finalize(cmd, rsc);
1017 }
1018 
1019 /*!
1020  * \internal
1021  * \brief Process the result of a fence device action (start, stop, or monitor)
1022  *
1023  * \param[in,out] cmd               Fence device action that completed
1024  * \param[in]     exit_status       Fencer API exit status for action
1025  * \param[in]     execution_status  Fencer API execution status for action
1026  * \param[in]     exit_reason       Human-friendly detail, if action failed
1027  */
1028 static void
1029 stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
1030                         enum pcmk_exec_status execution_status,
1031                         const char *exit_reason)
1032 {
1033     // This can be NULL if resource was removed before command completed
1034     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1035 
1036     // Simplify fencer exit status to uniform exit status
1037     if (exit_status != CRM_EX_OK) {
1038         exit_status = PCMK_OCF_UNKNOWN_ERROR;
1039     }
1040 
1041     if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
1042         /* An in-flight fence action was cancelled. The execution status is
1043          * already correct, so don't overwrite it.
1044          */
1045         execution_status = PCMK_EXEC_CANCELLED;
1046 
1047     } else {
1048         /* Some execution status codes have specific meanings for the fencer
1049          * that executor clients may not expect, so map them to a simple error
1050          * status.
1051          */
1052         switch (execution_status) {
1053             case PCMK_EXEC_NOT_CONNECTED:
1054             case PCMK_EXEC_INVALID:
1055                 execution_status = PCMK_EXEC_ERROR;
1056                 break;
1057 
1058             case PCMK_EXEC_NO_FENCE_DEVICE:
1059                 /* This should be possible only for probes in practice, but
1060                  * interpret for all actions to be safe.
1061                  */
1062                 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1063                                  pcmk__str_none)) {
1064                     exit_status = PCMK_OCF_NOT_RUNNING;
1065 
1066                 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1067                                         pcmk__str_none)) {
1068                     exit_status = PCMK_OCF_OK;
1069 
1070                 } else {
1071                     exit_status = PCMK_OCF_NOT_INSTALLED;
1072                 }
1073                 execution_status = PCMK_EXEC_ERROR;
1074                 break;
1075 
1076             case PCMK_EXEC_NOT_SUPPORTED:
1077                 exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1078                 break;
1079 
1080             default:
1081                 break;
1082         }
1083     }
1084 
1085     pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1086 
1087     // Certain successful actions change the known state of the resource
1088     if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1089 
1090         if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1091             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1092                              PCMK_EXEC_DONE, NULL); // "running"
1093 
1094         } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1095                                 pcmk__str_casei)) {
1096             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1097                              PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1098         }
1099     }
1100 
1101     /* The recurring timer should not be running at this point in any case, but
1102      * as a failsafe, stop it if it is.
1103      */
1104     stop_recurring_timer(cmd);
1105 
1106     /* Reschedule this command if appropriate. If a recurring command is *not*
1107      * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1108      * not be removed from recurring_ops by cmd_finalize().
1109      */
1110     if (rsc && (cmd->interval_ms > 0)
1111         && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1112         start_recurring_timer(cmd);
1113     }
1114 
1115     cmd_finalize(cmd, rsc);
1116 }
1117 
1118 static void
1119 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1120 {
1121     if ((data == NULL) || (data->userdata == NULL)) {
1122         crm_err("Ignoring fence action result: "
1123                 "Invalid callback arguments (bug?)");
1124     } else {
1125         stonith_action_complete((lrmd_cmd_t *) data->userdata,
1126                                 stonith__exit_status(data),
1127                                 stonith__execution_status(data),
1128                                 stonith__exit_reason(data));
1129     }
1130 }
1131 
1132 void
1133 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1134 {
1135     GHashTableIter iter;
1136     lrmd_rsc_t *rsc = NULL;
1137 
1138     crm_warn("Connection to fencer lost (any pending operations for "
1139              "fence devices will be considered failed)");
1140 
1141     g_hash_table_iter_init(&iter, rsc_list);
1142     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
1143         if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1144                           pcmk__str_none)) {
1145             continue;
1146         }
1147 
1148         /* If we registered this fence device, we don't know whether the
1149          * fencer still has the registration or not. Cause future probes to
1150          * return an error until the resource is stopped or started
1151          * successfully. This is especially important if the controller also
1152          * went away (possibly due to a cluster layer restart) and won't
1153          * receive our client notification of any monitors finalized below.
1154          */
1155         if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1156             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1157                              PCMK_EXEC_NOT_CONNECTED,
1158                              "Lost connection to fencer");
1159         }
1160 
1161         // Consider any active, pending, or recurring operations as failed
1162 
1163         for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
1164             lrmd_cmd_t *cmd = op->data;
1165 
1166             /* This won't free a recurring op but instead restart its timer.
1167              * If cmd is rsc->active, this will set rsc->active to NULL, so we
1168              * don't have to worry about finalizing it a second time below.
1169              */
1170             stonith_action_complete(cmd,
1171                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1172                                     "Lost connection to fencer");
1173         }
1174 
1175         if (rsc->active != NULL) {
1176             rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
1177         }
1178         while (rsc->pending_ops != NULL) {
1179             // This will free the op and remove it from rsc->pending_ops
1180             stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
1181                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1182                                     "Lost connection to fencer");
1183         }
1184     }
1185 }
1186 
1187 /*!
1188  * \internal
1189  * \brief Execute a stonith resource "start" action
1190  *
1191  * Start a stonith resource by registering it with the fencer.
1192  * (Stonith agents don't have a start command.)
1193  *
1194  * \param[in,out] stonith_api  Connection to fencer
1195  * \param[in]     rsc          Stonith resource to start
1196  * \param[in]     cmd          Start command to execute
1197  *
1198  * \return pcmk_ok on success, -errno otherwise
1199  */
1200 static int
1201 execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1202                     const lrmd_cmd_t *cmd)
1203 {
1204     char *key = NULL;
1205     char *value = NULL;
1206     stonith_key_value_t *device_params = NULL;
1207     int rc = pcmk_ok;
1208 
1209     // Convert command parameters to stonith API key/values
1210     if (cmd->params) {
1211         GHashTableIter iter;
1212 
1213         g_hash_table_iter_init(&iter, cmd->params);
1214         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1215             device_params = stonith__key_value_add(device_params, key, value);
1216         }
1217     }
1218 
1219     /* The fencer will automatically register devices via CIB notifications
1220      * when the CIB changes, but to avoid a possible race condition between
1221      * the fencer receiving the notification and the executor requesting that
1222      * resource, the executor registers the device as well. The fencer knows how
1223      * to handle duplicate registrations.
1224      */
1225     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1226                                             cmd->rsc_id, rsc->provider,
1227                                             rsc->type, device_params);
1228 
1229     stonith__key_value_freeall(device_params, true, true);
1230     return rc;
1231 }
1232 
1233 /*!
1234  * \internal
1235  * \brief Execute a stonith resource "stop" action
1236  *
1237  * Stop a stonith resource by unregistering it with the fencer.
1238  * (Stonith agents don't have a stop command.)
1239  *
1240  * \param[in,out] stonith_api  Connection to fencer
1241  * \param[in]     rsc          Stonith resource to stop
1242  *
1243  * \return pcmk_ok on success, -errno otherwise
1244  */
1245 static inline int
1246 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1247 {
1248     /* @TODO Failure would indicate a problem communicating with fencer;
1249      * perhaps we should try reconnecting and retrying a few times?
1250      */
1251     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1252                                             rsc->rsc_id);
1253 }
1254 
1255 /*!
1256  * \internal
1257  * \brief Initiate a stonith resource agent recurring "monitor" action
1258  *
1259  * \param[in,out] stonith_api  Connection to fencer
1260  * \param[in,out] rsc          Stonith resource to monitor
1261  * \param[in]     cmd          Monitor command being executed
1262  *
1263  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1264  */
1265 static inline int
1266 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1267 {
1268     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1269                                         pcmk__timeout_ms2s(cmd->timeout));
1270 
1271     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1272                                               "lrmd_stonith_callback",
1273                                               lrmd_stonith_callback);
1274     if (rc == TRUE) {
1275         rsc->active = cmd;
1276         rc = pcmk_ok;
1277     } else {
1278         rc = -pcmk_err_generic;
1279     }
1280     return rc;
1281 }
1282 
1283 static void
1284 execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1285 {
1286     int rc = pcmk_ok;
1287     const char *rc_s = NULL;
1288     bool do_monitor = false;
1289 
1290     // Don't free; belongs to pacemaker-execd.c
1291     stonith_t *stonith_api = get_stonith_connection();
1292 
1293     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
1294         && (cmd->interval_ms == 0)) {
1295         // Probes don't require a fencer connection
1296         stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
1297                                 rsc->fence_probe_result.execution_status,
1298                                 rsc->fence_probe_result.exit_reason);
1299         return;
1300     }
1301 
1302     if (stonith_api == NULL) {
1303         stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1304                                 PCMK_EXEC_NOT_CONNECTED,
1305                                 "No connection to fencer");
1306         return;
1307     }
1308 
1309     if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1310         rc = execd_stonith_start(stonith_api, rsc, cmd);
1311         if (rc == pcmk_ok) {
1312             do_monitor = true;
1313         }
1314 
1315     } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
1316         rc = execd_stonith_stop(stonith_api, rsc);
1317 
1318     } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1319                             pcmk__str_casei)) {
1320         do_monitor = true;
1321 
1322     } else {
1323         stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1324                                 PCMK_EXEC_ERROR,
1325                                 "Invalid fence device action (bug?)");
1326         return;
1327     }
1328 
1329     if (do_monitor) {
1330         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1331         if (rc == pcmk_ok) {
1332             // Don't clean up yet. We will get the result of the monitor later.
1333             return;
1334         }
1335     }
1336 
1337     if (rc != -pcmk_err_generic) {
1338         rc_s = pcmk_strerror(rc);
1339     }
1340     stonith_action_complete(cmd,
1341                             ((rc == pcmk_rc_ok)? CRM_EX_OK : CRM_EX_ERROR),
1342                             stonith__legacy2status(rc), rc_s);
1343 }
1344 
1345 static void
1346 execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1347 {
1348     svc_action_t *action = NULL;
1349     GHashTable *params_copy = NULL;
1350 
1351     pcmk__assert((rsc != NULL) && (cmd != NULL));
1352 
1353     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1354               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1355 
1356     params_copy = pcmk__str_table_dup(cmd->params);
1357 
1358     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1359                                      rsc->type,
1360                                      normalize_action_name(rsc, cmd->action),
1361                                      cmd->interval_ms, cmd->timeout,
1362                                      params_copy, cmd->service_flags);
1363 
1364     if (action == NULL) {
1365         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1366                          PCMK_EXEC_ERROR, strerror(ENOMEM));
1367         cmd_finalize(cmd, rsc);
1368         return;
1369     }
1370 
1371     if (action->rc != PCMK_OCF_UNKNOWN) {
1372         services__copy_result(action, &(cmd->result));
1373         services_action_free(action);
1374         cmd_finalize(cmd, rsc);
1375         return;
1376     }
1377 
1378     action->cb_data = cmd;
1379 
1380     if (services_action_async(action, action_complete)) {
1381         /* The services library has taken responsibility for the action. It
1382          * could be pending, blocked, or merged into a duplicate recurring
1383          * action, in which case the action callback (action_complete())
1384          * will be called when the action completes, otherwise the callback has
1385          * already been called.
1386          *
1387          * action_complete() calls cmd_finalize() which can free cmd, so cmd
1388          * cannot be used here.
1389          */
1390     } else {
1391         /* This is a recurring action that is not being cancelled and could not
1392          * be initiated. It has been rescheduled, and the action callback
1393          * (action_complete()) has been called, which in this case has already
1394          * called cmd_finalize(), which in this case should only reset (not
1395          * free) cmd.
1396          */
1397         services__copy_result(action, &(cmd->result));
1398         services_action_free(action);
1399     }
1400 }
1401 
1402 static gboolean
1403 execute_resource_action(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1404 {
1405     lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
1406     lrmd_cmd_t *cmd = NULL;
1407 
1408     CRM_CHECK(rsc != NULL, return FALSE);
1409 
1410     if (rsc->active) {
1411         crm_trace("%s is still active", rsc->rsc_id);
1412         return TRUE;
1413     }
1414 
1415     if (rsc->pending_ops) {
1416         GList *first = rsc->pending_ops;
1417 
1418         cmd = first->data;
1419         if (cmd->delay_id) {
1420             crm_trace
1421                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1422                  cmd->rsc_id, cmd->action, cmd->start_delay);
1423             return TRUE;
1424         }
1425         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1426         g_list_free_1(first);
1427 
1428 #ifdef PCMK__TIME_USE_CGT
1429         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1430 #endif
1431         cmd->epoch_last_run = time(NULL);
1432     }
1433 
1434     if (!cmd) {
1435         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1436         return TRUE;
1437     }
1438 
1439     rsc->active = cmd;          /* only one op at a time for a rsc */
1440     if (cmd->interval_ms) {
1441         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1442     }
1443 
1444     log_execute(cmd);
1445 
1446     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1447         execute_stonith_action(rsc, cmd);
1448     } else {
1449         execute_nonstonith_action(rsc, cmd);
1450     }
1451 
1452     return TRUE;
1453 }
1454 
1455 void
1456 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1457 {
1458     GList *gIter = NULL;
1459     lrmd_rsc_t *rsc = data;
1460     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1461                                   pcmk__str_casei);
1462 
1463     gIter = rsc->pending_ops;
1464     while (gIter != NULL) {
1465         GList *next = gIter->next;
1466         lrmd_cmd_t *cmd = gIter->data;
1467 
1468         /* command was never executed */
1469         cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1470         cmd_finalize(cmd, NULL);
1471 
1472         gIter = next;
1473     }
1474     /* frees list, but not list elements. */
1475     g_list_free(rsc->pending_ops);
1476 
1477     gIter = rsc->recurring_ops;
1478     while (gIter != NULL) {
1479         GList *next = gIter->next;
1480         lrmd_cmd_t *cmd = gIter->data;
1481 
1482         if (is_stonith) {
1483             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1484             /* If a stonith command is in-flight, just mark it as cancelled;
1485              * it is not safe to finalize/free the cmd until the stonith api
1486              * says it has either completed or timed out.
1487              */
1488             if (rsc->active != cmd) {
1489                 cmd_finalize(cmd, NULL);
1490             }
1491         } else {
1492             /* This command is already handed off to service library,
1493              * let service library cancel it and tell us via the callback
1494              * when it is cancelled. The rsc can be safely destroyed
1495              * even if we are waiting for the cancel result */
1496             services_action_cancel(rsc->rsc_id,
1497                                    normalize_action_name(rsc, cmd->action),
1498                                    cmd->interval_ms);
1499         }
1500 
1501         gIter = next;
1502     }
1503     /* frees list, but not list elements. */
1504     g_list_free(rsc->recurring_ops);
1505 
1506     free(rsc->rsc_id);
1507     free(rsc->class);
1508     free(rsc->provider);
1509     free(rsc->type);
1510     mainloop_destroy_trigger(rsc->work);
1511 
1512     free(rsc);
1513 }
1514 
1515 static int
1516 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1517                     xmlNode **reply)
1518 {
1519     int rc = pcmk_ok;
1520     time_t now = time(NULL);
1521     const char *protocol_version =
1522         crm_element_value(request, PCMK__XA_LRMD_PROTOCOL_VERSION);
1523     const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
1524 
1525     if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) {
1526         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1527                 LRMD_COMPATIBLE_PROTOCOL, protocol_version);
1528         rc = -EPROTO;
1529     }
1530 
1531     if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
1532 #ifdef PCMK__COMPILE_REMOTE
1533         if ((client->remote != NULL)
1534             && pcmk_is_set(client->flags,
1535                            pcmk__client_tls_handshake_complete)) {
1536             const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
1537 
1538             // This is a remote connection from a cluster node's controller
1539             ipc_proxy_add_provider(client);
1540 
1541             /* @TODO Allowing multiple proxies makes no sense given that clients
1542              * have no way to choose between them. Maybe always use the most
1543              * recent one and switch any existing IPC connections to use it,
1544              * by iterating over ipc_clients here, and if client->id doesn't
1545              * match the client's userdata, replace the userdata with the new
1546              * ID. After the iteration, call lrmd_remote_client_destroy() on any
1547              * of the replaced values in ipc_providers.
1548              */
1549 
1550             /* If this was a register operation, also ask for new schema files but
1551              * only if it's supported by the protocol version.
1552              */
1553             if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
1554                 LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
1555                 remoted_request_cib_schema_files();
1556             }
1557         } else {
1558             rc = -EACCES;
1559         }
1560 #else
1561         rc = -EPROTONOSUPPORT;
1562 #endif
1563     }
1564 
1565     *reply = create_lrmd_reply(__func__, rc, call_id);
1566     crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
1567     crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
1568     crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1569     crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
1570 
1571     if (start_state) {
1572         crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state);
1573     }
1574 
1575     return rc;
1576 }
1577 
1578 static int
1579 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1580 {
1581     int rc = pcmk_ok;
1582     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1583     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1584 
1585     if (dup &&
1586         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1587         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1588 
1589         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1590         free_rsc(rsc);
1591         return rc;
1592     }
1593 
1594     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1595     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1596     return rc;
1597 }
1598 
1599 static xmlNode *
1600 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1601 {
1602     int rc = pcmk_ok;
1603     xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1604                                             "//" PCMK__XE_LRMD_RSC,
1605                                             LOG_ERR);
1606     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1607     xmlNode *reply = NULL;
1608     lrmd_rsc_t *rsc = NULL;
1609 
1610     if (rsc_id == NULL) {
1611         rc = -ENODEV;
1612     } else {
1613         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1614         if (rsc == NULL) {
1615             crm_info("Agent information for '%s' not in cache", rsc_id);
1616             rc = -ENODEV;
1617         }
1618     }
1619 
1620     reply = create_lrmd_reply(__func__, rc, call_id);
1621     if (rsc) {
1622         crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1623         crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class);
1624         crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
1625         crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type);
1626     }
1627     return reply;
1628 }
1629 
1630 static int
1631 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1632                             xmlNode *request)
1633 {
1634     int rc = pcmk_ok;
1635     lrmd_rsc_t *rsc = NULL;
1636     xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1637                                             "//" PCMK__XE_LRMD_RSC,
1638                                             LOG_ERR);
1639     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1640 
1641     if (!rsc_id) {
1642         return -ENODEV;
1643     }
1644 
1645     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1646     if (rsc == NULL) {
1647         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1648                  rsc_id);
1649         return pcmk_ok;
1650     }
1651 
1652     if (rsc->active) {
1653         /* let the caller know there are still active ops on this rsc to watch for */
1654         crm_trace("Operation (%p) still in progress for unregistered resource %s",
1655                   rsc->active, rsc_id);
1656         rc = -EINPROGRESS;
1657     }
1658 
1659     g_hash_table_remove(rsc_list, rsc_id);
1660 
1661     return rc;
1662 }
1663 
1664 static int
1665 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1666 {
1667     lrmd_rsc_t *rsc = NULL;
1668     lrmd_cmd_t *cmd = NULL;
1669     xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1670                                             "//" PCMK__XE_LRMD_RSC,
1671                                             LOG_ERR);
1672     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1673     int call_id;
1674 
1675     if (!rsc_id) {
1676         return -EINVAL;
1677     }
1678     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1679         crm_info("Resource '%s' not found (%d active resources)",
1680                  rsc_id, g_hash_table_size(rsc_list));
1681         return -ENODEV;
1682     }
1683 
1684     cmd = create_lrmd_cmd(request, client);
1685     call_id = cmd->call_id;
1686 
1687     /* Don't reference cmd after handing it off to be scheduled.
1688      * The cmd could get merged and freed. */
1689     schedule_lrmd_cmd(rsc, cmd);
1690 
1691     return call_id;
1692 }
1693 
1694 static int
1695 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1696 {
1697     GList *gIter = NULL;
1698     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1699 
1700     /* How to cancel an action.
1701      * 1. Check pending ops list, if it hasn't been handed off
1702      *    to the service library or stonith recurring list remove
1703      *    it there and that will stop it.
1704      * 2. If it isn't in the pending ops list, then it's either a
1705      *    recurring op in the stonith recurring list, or the service
1706      *    library's recurring list.  Stop it there
1707      * 3. If not found in any lists, then this operation has either
1708      *    been executed already and is not a recurring operation, or
1709      *    never existed.
1710      */
1711     if (!rsc) {
1712         return -ENODEV;
1713     }
1714 
1715     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1716         lrmd_cmd_t *cmd = gIter->data;
1717 
1718         if (action_matches(cmd, action, interval_ms)) {
1719             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1720             cmd_finalize(cmd, rsc);
1721             return pcmk_ok;
1722         }
1723     }
1724 
1725     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1726         /* The service library does not handle stonith operations.
1727          * We have to handle recurring stonith operations ourselves. */
1728         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1729             lrmd_cmd_t *cmd = gIter->data;
1730 
1731             if (action_matches(cmd, action, interval_ms)) {
1732                 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1733                 if (rsc->active != cmd) {
1734                     cmd_finalize(cmd, rsc);
1735                 }
1736                 return pcmk_ok;
1737             }
1738         }
1739     } else if (services_action_cancel(rsc_id,
1740                                       normalize_action_name(rsc, action),
1741                                       interval_ms) == TRUE) {
1742         /* The service library will tell the action_complete callback function
1743          * this action was cancelled, which will destroy the cmd and remove
1744          * it from the recurring_op list. Do not do that in this function
1745          * if the service library says it cancelled it. */
1746         return pcmk_ok;
1747     }
1748 
1749     return -EOPNOTSUPP;
1750 }
1751 
1752 static void
1753 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1754 {
1755     GList *cmd_list = NULL;
1756     GList *cmd_iter = NULL;
1757 
1758     /* Notice a copy of each list is created when concat is called.
1759      * This prevents odd behavior from occurring when the cmd_list
1760      * is iterated through later on.  It is possible the cancel_op
1761      * function may end up modifying the recurring_ops and pending_ops
1762      * lists.  If we did not copy those lists, our cmd_list iteration
1763      * could get messed up.*/
1764     if (rsc->recurring_ops) {
1765         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1766     }
1767     if (rsc->pending_ops) {
1768         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1769     }
1770     if (!cmd_list) {
1771         return;
1772     }
1773 
1774     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1775         lrmd_cmd_t *cmd = cmd_iter->data;
1776 
1777         if (cmd->interval_ms == 0) {
1778             continue;
1779         }
1780 
1781         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1782             continue;
1783         }
1784 
1785         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1786     }
1787     /* frees only the copied list data, not the cmds */
1788     g_list_free(cmd_list);
1789 }
1790 
1791 static int
1792 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1793 {
1794     xmlNode *rsc_xml = pcmk__xpath_find_one(request->doc,
1795                                             "//" PCMK__XE_LRMD_RSC,
1796                                             LOG_ERR);
1797     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1798     const char *action = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
1799     guint interval_ms = 0;
1800 
1801     crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
1802 
1803     if (!rsc_id || !action) {
1804         return -EINVAL;
1805     }
1806 
1807     return cancel_op(rsc_id, action, interval_ms);
1808 }
1809 
1810 static void
1811 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1812 {
1813     xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
1814 
1815     crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1816     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1817         lrmd_cmd_t *cmd = item->data;
1818         xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
1819 
1820         crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION,
1821                     pcmk__s(cmd->real_action, cmd->action));
1822         crm_xml_add_ms(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
1823         crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
1824     }
1825 }
1826 
1827 static xmlNode *
1828 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1829 {
1830     int rc = pcmk_ok;
1831     const char *rsc_id = NULL;
1832     lrmd_rsc_t *rsc = NULL;
1833     xmlNode *reply = NULL;
1834     xmlNode *rsc_xml = NULL;
1835 
1836     // Resource ID is optional
1837     rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
1838     if (rsc_xml) {
1839         rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
1840     }
1841     if (rsc_xml) {
1842         rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1843     }
1844 
1845     // If resource ID is specified, resource must exist
1846     if (rsc_id != NULL) {
1847         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1848         if (rsc == NULL) {
1849             crm_info("Resource '%s' not found (%d active resources)",
1850                      rsc_id, g_hash_table_size(rsc_list));
1851             rc = -ENODEV;
1852         }
1853     }
1854 
1855     reply = create_lrmd_reply(__func__, rc, call_id);
1856 
1857     // If resource ID is not specified, check all resources
1858     if (rsc_id == NULL) {
1859         GHashTableIter iter;
1860         char *key = NULL;
1861 
1862         g_hash_table_iter_init(&iter, rsc_list);
1863         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1864                                       (gpointer *) &rsc)) {
1865             add_recurring_op_xml(reply, rsc);
1866         }
1867     } else if (rsc) {
1868         add_recurring_op_xml(reply, rsc);
1869     }
1870     return reply;
1871 }
1872 
1873 void
1874 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1875 {
1876     int rc = pcmk_ok;
1877     int call_id = 0;
1878     const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
1879     int do_reply = 0;
1880     int do_notify = 0;
1881     xmlNode *reply = NULL;
1882 
1883     /* Certain IPC commands may be done only by privileged users (i.e. root or
1884      * hacluster), because they would otherwise provide a means of bypassing
1885      * ACLs.
1886      */
1887     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1888 
1889     crm_trace("Processing %s operation from %s", op, client->id);
1890     crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
1891 
1892     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1893 #ifdef PCMK__COMPILE_REMOTE
1894         if (allowed) {
1895             ipc_proxy_forward_client(client, request);
1896         } else {
1897             rc = -EACCES;
1898         }
1899 #else
1900         rc = -EPROTONOSUPPORT;
1901 #endif
1902         do_reply = 1;
1903     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1904         rc = process_lrmd_signon(client, request, call_id, &reply);
1905         do_reply = 1;
1906     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1907         if (allowed) {
1908             rc = process_lrmd_rsc_register(client, id, request);
1909             do_notify = 1;
1910         } else {
1911             rc = -EACCES;
1912         }
1913         do_reply = 1;
1914     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1915         if (allowed) {
1916             reply = process_lrmd_get_rsc_info(request, call_id);
1917         } else {
1918             rc = -EACCES;
1919         }
1920         do_reply = 1;
1921     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1922         if (allowed) {
1923             rc = process_lrmd_rsc_unregister(client, id, request);
1924             /* don't notify anyone about failed un-registers */
1925             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1926                 do_notify = 1;
1927             }
1928         } else {
1929             rc = -EACCES;
1930         }
1931         do_reply = 1;
1932     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1933         if (allowed) {
1934             rc = process_lrmd_rsc_exec(client, id, request);
1935         } else {
1936             rc = -EACCES;
1937         }
1938         do_reply = 1;
1939     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1940         if (allowed) {
1941             rc = process_lrmd_rsc_cancel(client, id, request);
1942         } else {
1943             rc = -EACCES;
1944         }
1945         do_reply = 1;
1946     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1947         do_notify = 1;
1948         do_reply = 1;
1949     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1950         if (allowed) {
1951             xmlNode *wrapper = pcmk__xe_first_child(request,
1952                                                     PCMK__XE_LRMD_CALLDATA,
1953                                                     NULL, NULL);
1954             xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
1955 
1956             const char *timeout = NULL;
1957 
1958             CRM_LOG_ASSERT(data != NULL);
1959             timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG);
1960             pcmk__valid_stonith_watchdog_timeout(timeout);
1961         } else {
1962             rc = -EACCES;
1963         }
1964     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1965         if (allowed) {
1966             rc = process_lrmd_alert_exec(client, id, request);
1967         } else {
1968             rc = -EACCES;
1969         }
1970         do_reply = 1;
1971     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1972         if (allowed) {
1973             reply = process_lrmd_get_recurring(request, call_id);
1974         } else {
1975             rc = -EACCES;
1976         }
1977         do_reply = 1;
1978     } else {
1979         rc = -EOPNOTSUPP;
1980         do_reply = 1;
1981         crm_err("Unknown IPC request '%s' from client %s",
1982                 op, pcmk__client_name(client));
1983     }
1984 
1985     if (rc == -EACCES) {
1986         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1987                  op, pcmk__client_name(client));
1988     }
1989 
1990     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1991               op, client->id, rc, do_reply, do_notify);
1992 
1993     if (do_reply) {
1994         int send_rc = pcmk_rc_ok;
1995 
1996         if (reply == NULL) {
1997             reply = create_lrmd_reply(__func__, rc, call_id);
1998         }
1999         send_rc = lrmd_server_send_reply(client, id, reply);
2000         pcmk__xml_free(reply);
2001         if (send_rc != pcmk_rc_ok) {
2002             crm_warn("Reply to client %s failed: %s " QB_XS " rc=%d",
2003                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
2004         }
2005     }
2006 
2007     if (do_notify) {
2008         send_generic_notify(rc, request);
2009     }
2010 }

/* [previous][next][first][last][top][bottom][index][help] */