root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. notify_one_client
  26. notify_of_new_client
  27. client_disconnect_cleanup
  28. action_complete
  29. stonith_action_complete
  30. lrmd_stonith_callback
  31. stonith_connection_failed
  32. execd_stonith_start
  33. execd_stonith_stop
  34. execd_stonith_monitor
  35. execute_stonith_action
  36. execute_nonstonith_action
  37. execute_resource_action
  38. free_rsc
  39. process_lrmd_signon
  40. process_lrmd_rsc_register
  41. process_lrmd_get_rsc_info
  42. process_lrmd_rsc_unregister
  43. process_lrmd_rsc_exec
  44. cancel_op
  45. cancel_all_recurring
  46. process_lrmd_rsc_cancel
  47. add_recurring_op_xml
  48. process_lrmd_get_recurring
  49. process_lrmd_message

   1 /*
   2  * Copyright 2012-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/fencing/internal.h>
  12 
  13 #include <glib.h>
  14 
  15 // Check whether we have a high-resolution monotonic clock
  16 #undef PCMK__TIME_USE_CGT
  17 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  18 #  define PCMK__TIME_USE_CGT
  19 #  include <time.h>  /* clock_gettime */
  20 #endif
  21 
  22 #include <unistd.h>
  23 
  24 #include <crm/crm.h>
  25 #include <crm/fencing/internal.h>
  26 #include <crm/services.h>
  27 #include <crm/services_internal.h>
  28 #include <crm/common/mainloop.h>
  29 #include <crm/common/ipc.h>
  30 #include <crm/common/ipc_internal.h>
  31 #include <crm/common/xml.h>
  32 
  33 #include "pacemaker-execd.h"
  34 
  35 GHashTable *rsc_list = NULL;
  36 
  37 typedef struct lrmd_cmd_s {
  38     int timeout;
  39     guint interval_ms;
  40     int start_delay;
  41     int timeout_orig;
  42 
  43     int call_id;
  44 
  45     int call_opts;
  46     /* Timer ids, must be removed on cmd destruction. */
  47     int delay_id;
  48     int stonith_recurring_id;
  49 
  50     int rsc_deleted;
  51 
  52     int service_flags;
  53 
  54     char *client_id;
  55     char *origin;
  56     char *rsc_id;
  57     char *action;
  58     char *real_action;
  59     char *userdata_str;
  60 
  61     pcmk__action_result_t result;
  62 
  63     /* We can track operation queue time and run time, to be saved with the CIB
  64      * resource history (and displayed in cluster status). We need
  65      * high-resolution monotonic time for this purpose, so we use
  66      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  67      * is disabled).
  68      *
  69      * However, we also need epoch timestamps for recording the time the command
  70      * last ran and the time its return value last changed, for use in time
  71      * displays (as opposed to interval calculations). We keep time_t values for
  72      * this purpose.
  73      *
  74      * The last run time is used for both purposes, so we keep redundant
  75      * monotonic and epoch values for this. Technically the two could represent
  76      * different times, but since time_t has only second resolution and the
  77      * values are used for distinct purposes, that is not significant.
  78      */
  79 #ifdef PCMK__TIME_USE_CGT
  80     /* Recurring and systemd operations may involve more than one executor
  81      * command per operation, so they need info about the original and the most
  82      * recent.
  83      */
  84     struct timespec t_first_run;    // When op first ran
  85     struct timespec t_run;          // When op most recently ran
  86     struct timespec t_first_queue;  // When op was first queued
  87     struct timespec t_queue;        // When op was most recently queued
  88 #endif
  89     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  90     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  91 
  92     bool first_notify_sent;
  93     int last_notify_rc;
  94     int last_notify_op_status;
  95     int last_pid;
  96 
  97     GHashTable *params;
  98 } lrmd_cmd_t;
  99 
 100 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 101 static gboolean execute_resource_action(gpointer user_data);
 102 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 103 
 104 #ifdef PCMK__TIME_USE_CGT
 105 
 106 /*!
 107  * \internal
 108  * \brief Check whether a struct timespec has been set
 109  *
 110  * \param[in] timespec  Time to check
 111  *
 112  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 113  */
 114 static inline bool
 115 time_is_set(const struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117     return (timespec != NULL) &&
 118            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 119 }
 120 
 121 /*
 122  * \internal
 123  * \brief Set a timespec (and its original if unset) to the current time
 124  *
 125  * \param[out] t_current  Where to store current time
 126  * \param[out] t_orig     Where to copy t_current if unset
 127  */
 128 static void
 129 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 130 {
 131     clock_gettime(CLOCK_MONOTONIC, t_current);
 132     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 133         *t_orig = *t_current;
 134     }
 135 }
 136 
 137 /*!
 138  * \internal
 139  * \brief Return difference between two times in milliseconds
 140  *
 141  * \param[in] now  More recent time (or NULL to use current time)
 142  * \param[in] old  Earlier time
 143  *
 144  * \return milliseconds difference (or 0 if old is NULL or unset)
 145  *
 146  * \note Can overflow on 32bit machines when the differences is around
 147  *       24 days or more.
 148  */
 149 static int
 150 time_diff_ms(const struct timespec *now, const struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 151 {
 152     int diff_ms = 0;
 153 
 154     if (time_is_set(old)) {
 155         struct timespec local_now = { 0, };
 156 
 157         if (now == NULL) {
 158             clock_gettime(CLOCK_MONOTONIC, &local_now);
 159             now = &local_now;
 160         }
 161         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 162                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 163     }
 164     return diff_ms;
 165 }
 166 
 167 /*!
 168  * \internal
 169  * \brief Reset a command's operation times to their original values.
 170  *
 171  * Reset a command's run and queued timestamps to the timestamps of the original
 172  * command, so we report the entire time since then and not just the time since
 173  * the most recent command (for recurring and systemd operations).
 174  *
 175  * \param[in,out] cmd  Executor command object to reset
 176  *
 177  * \note It's not obvious what the queued time should be for a systemd
 178  *       start/stop operation, which might go like this:
 179  *         initial command queued 5ms, runs 3s
 180  *         monitor command queued 10ms, runs 10s
 181  *         monitor command queued 10ms, runs 10s
 182  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 183  *       implementation will report 5ms. If it's 25ms, then we need to
 184  *       subtract 20ms from the total exec time so as not to count it twice.
 185  *       We can implement that later if it matters to anyone ...
 186  */
 187 static void
 188 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 189 {
 190     cmd->t_run = cmd->t_first_run;
 191     cmd->t_queue = cmd->t_first_queue;
 192 }
 193 #endif
 194 
 195 static inline bool
 196 action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 197 {
 198     return (cmd->interval_ms == interval_ms)
 199            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 200 }
 201 
 202 /*!
 203  * \internal
 204  * \brief Log the result of an asynchronous command
 205  *
 206  * \param[in] cmd            Command to log result for
 207  * \param[in] exec_time_ms   Execution time in milliseconds, if known
 208  * \param[in] queue_time_ms  Queue time in milliseconds, if known
 209  */
 210 static void
 211 log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 212 {
 213     int log_level = LOG_INFO;
 214     GString *str = g_string_sized_new(100); // reasonable starting size
 215 
 216     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
 217         log_level = LOG_DEBUG;
 218     }
 219 
 220     g_string_append_printf(str, "%s %s (call %d",
 221                            cmd->rsc_id, cmd->action, cmd->call_id);
 222     if (cmd->last_pid != 0) {
 223         g_string_append_printf(str, ", PID %d", cmd->last_pid);
 224     }
 225     if (cmd->result.execution_status == PCMK_EXEC_DONE) {
 226         g_string_append_printf(str, ") exited with status %d",
 227                                cmd->result.exit_status);
 228     } else {
 229         pcmk__g_strcat(str, ") could not be executed: ",
 230                        pcmk_exec_status_str(cmd->result.execution_status),
 231                        NULL);
 232     }
 233     if (cmd->result.exit_reason != NULL) {
 234         pcmk__g_strcat(str, " (", cmd->result.exit_reason, ")", NULL);
 235     }
 236 
 237 #ifdef PCMK__TIME_USE_CGT
 238     pcmk__g_strcat(str, " (execution time ",
 239                    pcmk__readable_interval(exec_time_ms), NULL);
 240     if (queue_time_ms > 0) {
 241         pcmk__g_strcat(str, " after being queued ",
 242                        pcmk__readable_interval(queue_time_ms), NULL);
 243     }
 244     g_string_append_c(str, ')');
 245 #endif
 246 
 247     do_crm_log(log_level, "%s", str->str);
 248     g_string_free(str, TRUE);
 249 }
 250 
 251 static void
 252 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254     int log_level = LOG_INFO;
 255 
 256     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
 257         log_level = LOG_DEBUG;
 258     }
 259 
 260     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 261                cmd->rsc_id, cmd->action, cmd->call_id);
 262 }
 263 
 264 static const char *
 265 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 266 {
 267     if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_casei) &&
 268         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 269         return PCMK_ACTION_STATUS;
 270     }
 271     return action;
 272 }
 273 
 274 static lrmd_rsc_t *
 275 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, msg, LOG_ERR);
 278     lrmd_rsc_t *rsc = NULL;
 279 
 280     rsc = pcmk__assert_alloc(1, sizeof(lrmd_rsc_t));
 281 
 282     crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &rsc->call_opts);
 283 
 284     rsc->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 285     rsc->class = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_CLASS);
 286     rsc->provider = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_PROVIDER);
 287     rsc->type = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_TYPE);
 288     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action,
 289                                      rsc);
 290 
 291     // Initialize fence device probes (to return "not running")
 292     pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
 293                      PCMK_EXEC_NO_FENCE_DEVICE, NULL);
 294     return rsc;
 295 }
 296 
 297 static lrmd_cmd_t *
 298 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 299 {
 300     int call_options = 0;
 301     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, msg, LOG_ERR);
 302     lrmd_cmd_t *cmd = NULL;
 303 
 304     cmd = pcmk__assert_alloc(1, sizeof(lrmd_cmd_t));
 305 
 306     crm_element_value_int(msg, PCMK__XA_LRMD_CALLOPT, &call_options);
 307     cmd->call_opts = call_options;
 308     cmd->client_id = pcmk__str_copy(client->id);
 309 
 310     crm_element_value_int(msg, PCMK__XA_LRMD_CALLID, &cmd->call_id);
 311     crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL,
 312                          &cmd->interval_ms);
 313     crm_element_value_int(rsc_xml, PCMK__XA_LRMD_TIMEOUT, &cmd->timeout);
 314     crm_element_value_int(rsc_xml, PCMK__XA_LRMD_RSC_START_DELAY,
 315                           &cmd->start_delay);
 316     cmd->timeout_orig = cmd->timeout;
 317 
 318     cmd->origin = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_ORIGIN);
 319     cmd->action = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
 320     cmd->userdata_str = crm_element_value_copy(rsc_xml,
 321                                                PCMK__XA_LRMD_RSC_USERDATA_STR);
 322     cmd->rsc_id = crm_element_value_copy(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 323 
 324     cmd->params = xml2list(rsc_xml);
 325 
 326     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"),
 327                      PCMK_VALUE_BLOCK, pcmk__str_casei)) {
 328         crm_debug("Setting flag to leave pid group on timeout and "
 329                   "only kill action pid for " PCMK__OP_FMT,
 330                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 331         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 332                                                 LOG_TRACE, "Action",
 333                                                 cmd->action, 0,
 334                                                 SVC_ACTION_LEAVE_GROUP,
 335                                                 "SVC_ACTION_LEAVE_GROUP");
 336     }
 337     return cmd;
 338 }
 339 
 340 static void
 341 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 342 {
 343     if (cmd) {
 344         if (cmd->stonith_recurring_id) {
 345             g_source_remove(cmd->stonith_recurring_id);
 346         }
 347         cmd->stonith_recurring_id = 0;
 348     }
 349 }
 350 
 351 static void
 352 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 353 {
 354     stop_recurring_timer(cmd);
 355     if (cmd->delay_id) {
 356         g_source_remove(cmd->delay_id);
 357     }
 358     if (cmd->params) {
 359         g_hash_table_destroy(cmd->params);
 360     }
 361     pcmk__reset_result(&(cmd->result));
 362     free(cmd->origin);
 363     free(cmd->action);
 364     free(cmd->real_action);
 365     free(cmd->userdata_str);
 366     free(cmd->rsc_id);
 367     free(cmd->client_id);
 368     free(cmd);
 369 }
 370 
 371 static gboolean
 372 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 373 {
 374     lrmd_cmd_t *cmd = data;
 375     lrmd_rsc_t *rsc;
 376 
 377     cmd->stonith_recurring_id = 0;
 378 
 379     if (!cmd->rsc_id) {
 380         return FALSE;
 381     }
 382 
 383     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 384 
 385     pcmk__assert(rsc != NULL);
 386     /* take it out of recurring_ops list, and put it in the pending ops
 387      * to be executed */
 388     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 389     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 390 #ifdef PCMK__TIME_USE_CGT
 391     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 392 #endif
 393     mainloop_set_trigger(rsc->work);
 394 
 395     return FALSE;
 396 }
 397 
 398 static inline void
 399 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 400 {
 401     if (cmd && (cmd->interval_ms > 0)) {
 402         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 403                                                   stonith_recurring_op_helper,
 404                                                   cmd);
 405     }
 406 }
 407 
 408 static gboolean
 409 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 410 {
 411     lrmd_cmd_t *cmd = data;
 412     lrmd_rsc_t *rsc = NULL;
 413 
 414     cmd->delay_id = 0;
 415     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 416 
 417     if (rsc) {
 418         mainloop_set_trigger(rsc->work);
 419     }
 420 
 421     return FALSE;
 422 }
 423 
 424 /*!
 425  * \internal
 426  * \brief Check whether a list already contains the equivalent of a given action
 427  *
 428  * \param[in] action_list  List to search
 429  * \param[in] cmd          Action to search for
 430  */
 431 static lrmd_cmd_t *
 432 find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 433 {
 434     for (const GList *item = action_list; item != NULL; item = item->next) {
 435         lrmd_cmd_t *dup = item->data;
 436 
 437         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 438             return dup;
 439         }
 440     }
 441     return NULL;
 442 }
 443 
 444 static bool
 445 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 446 {
 447     lrmd_cmd_t * dup = NULL;
 448     bool dup_pending = true;
 449 
 450     if (cmd->interval_ms == 0) {
 451         return false;
 452     }
 453 
 454     // Search for a duplicate of this action (in-flight or not)
 455     dup = find_duplicate_action(rsc->pending_ops, cmd);
 456     if (dup == NULL) {
 457         dup_pending = false;
 458         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 459         if (dup == NULL) {
 460             return false;
 461         }
 462     }
 463 
 464     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 465      * the cancellation separately.
 466      */
 467     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 468                      pcmk__str_casei)
 469         && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
 470         return false;
 471     }
 472 
 473     /* This should not occur. If it does, we need to investigate how something
 474      * like this is possible in the controller.
 475      */
 476     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 477              "), merging with previous op entry",
 478              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 479              dup->interval_ms);
 480 
 481     // Merge new action's call ID and user data into existing action
 482     dup->first_notify_sent = false;
 483     free(dup->userdata_str);
 484     dup->userdata_str = cmd->userdata_str;
 485     cmd->userdata_str = NULL;
 486     dup->call_id = cmd->call_id;
 487     free_lrmd_cmd(cmd);
 488     cmd = NULL;
 489 
 490     /* If dup is not pending, that means it has already executed at least once
 491      * and is waiting in the interval. In that case, stop waiting and initiate
 492      * a new instance now.
 493      */
 494     if (!dup_pending) {
 495         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 496                          pcmk__str_casei)) {
 497             stop_recurring_timer(dup);
 498             stonith_recurring_op_helper(dup);
 499         } else {
 500             services_action_kick(rsc->rsc_id,
 501                                  normalize_action_name(rsc, dup->action),
 502                                  dup->interval_ms);
 503         }
 504     }
 505     return true;
 506 }
 507 
 508 static void
 509 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 510 {
 511     CRM_CHECK(cmd != NULL, return);
 512     CRM_CHECK(rsc != NULL, return);
 513 
 514     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 515 
 516     if (merge_recurring_duplicate(rsc, cmd)) {
 517         // Equivalent of cmd has already been scheduled
 518         return;
 519     }
 520 
 521     /* The controller expects the executor to automatically cancel
 522      * recurring operations before a resource stops.
 523      */
 524     if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
 525         cancel_all_recurring(rsc, NULL);
 526     }
 527 
 528     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 529 #ifdef PCMK__TIME_USE_CGT
 530     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 531 #endif
 532     mainloop_set_trigger(rsc->work);
 533 
 534     if (cmd->start_delay) {
 535         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 536     }
 537 }
 538 
 539 static xmlNode *
 540 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 541 {
 542     xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_LRMD_REPLY);
 543 
 544     crm_xml_add(reply, PCMK__XA_LRMD_ORIGIN, origin);
 545     crm_xml_add_int(reply, PCMK__XA_LRMD_RC, rc);
 546     crm_xml_add_int(reply, PCMK__XA_LRMD_CALLID, call_id);
 547     return reply;
 548 }
 549 
 550 static void
 551 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 552 {
 553     xmlNode *update_msg = user_data;
 554     pcmk__client_t *client = value;
 555     int rc;
 556     int log_level = LOG_WARNING;
 557     const char *msg = NULL;
 558 
 559     CRM_CHECK(client != NULL, return);
 560     if (client->name == NULL) {
 561         crm_trace("Skipping notification to client without name");
 562         return;
 563     }
 564     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 565         /* We only want to notify clients of the executor IPC API. If we are
 566          * running as Pacemaker Remote, we may have clients proxied to other
 567          * IPC services in the cluster, so skip those.
 568          */
 569         crm_trace("Skipping executor API notification to client %s",
 570                   pcmk__client_name(client));
 571         return;
 572     }
 573 
 574     rc = lrmd_server_send_notify(client, update_msg);
 575     if (rc == pcmk_rc_ok) {
 576         return;
 577     }
 578 
 579     switch (rc) {
 580         case ENOTCONN:
 581         case EPIPE: // Client exited without waiting for notification
 582             log_level = LOG_INFO;
 583             msg = "Disconnected";
 584             break;
 585 
 586         default:
 587             msg = pcmk_rc_str(rc);
 588             break;
 589     }
 590     do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
 591                pcmk__client_name(client), msg, rc);
 592 }
 593 
 594 static void
 595 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 596 {
 597     xmlNode *notify = NULL;
 598     int exec_time = 0;
 599     int queue_time = 0;
 600 
 601 #ifdef PCMK__TIME_USE_CGT
 602     exec_time = time_diff_ms(NULL, &(cmd->t_run));
 603     queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 604 #endif
 605     log_finished(cmd, exec_time, queue_time);
 606 
 607     /* If the originator requested to be notified only for changes in recurring
 608      * operation results, skip the notification if the result hasn't changed.
 609      */
 610     if (cmd->first_notify_sent
 611         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_changes_only)
 612         && (cmd->last_notify_rc == cmd->result.exit_status)
 613         && (cmd->last_notify_op_status == cmd->result.execution_status)) {
 614         return;
 615     }
 616 
 617     cmd->first_notify_sent = true;
 618     cmd->last_notify_rc = cmd->result.exit_status;
 619     cmd->last_notify_op_status = cmd->result.execution_status;
 620 
 621     notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 622 
 623     crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
 624     crm_xml_add_int(notify, PCMK__XA_LRMD_TIMEOUT, cmd->timeout);
 625     crm_xml_add_ms(notify, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
 626     crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_START_DELAY, cmd->start_delay);
 627     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_RC, cmd->result.exit_status);
 628     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_OP_STATUS,
 629                     cmd->result.execution_status);
 630     crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, cmd->call_id);
 631     crm_xml_add_int(notify, PCMK__XA_LRMD_RSC_DELETED, cmd->rsc_deleted);
 632 
 633     crm_xml_add_ll(notify, PCMK__XA_LRMD_RUN_TIME,
 634                    (long long) cmd->epoch_last_run);
 635     crm_xml_add_ll(notify, PCMK__XA_LRMD_RCCHANGE_TIME,
 636                    (long long) cmd->epoch_rcchange);
 637 #ifdef PCMK__TIME_USE_CGT
 638     crm_xml_add_int(notify, PCMK__XA_LRMD_EXEC_TIME, exec_time);
 639     crm_xml_add_int(notify, PCMK__XA_LRMD_QUEUE_TIME, queue_time);
 640 #endif
 641 
 642     crm_xml_add(notify, PCMK__XA_LRMD_OP, LRMD_OP_RSC_EXEC);
 643     crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, cmd->rsc_id);
 644     if(cmd->real_action) {
 645         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->real_action);
 646     } else {
 647         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ACTION, cmd->action);
 648     }
 649     crm_xml_add(notify, PCMK__XA_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 650     crm_xml_add(notify, PCMK__XA_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
 651 
 652     if (cmd->result.action_stderr != NULL) {
 653         crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
 654                     cmd->result.action_stderr);
 655 
 656     } else if (cmd->result.action_stdout != NULL) {
 657         crm_xml_add(notify, PCMK__XA_LRMD_RSC_OUTPUT,
 658                     cmd->result.action_stdout);
 659     }
 660 
 661     if (cmd->params) {
 662         char *key = NULL;
 663         char *value = NULL;
 664         GHashTableIter iter;
 665 
 666         xmlNode *args = pcmk__xe_create(notify, PCMK__XE_ATTRIBUTES);
 667 
 668         g_hash_table_iter_init(&iter, cmd->params);
 669         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 670             hash2smartfield((gpointer) key, (gpointer) value, args);
 671         }
 672     }
 673     if ((cmd->client_id != NULL)
 674         && pcmk_is_set(cmd->call_opts, lrmd_opt_notify_orig_only)) {
 675 
 676         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 677 
 678         if (client != NULL) {
 679             send_client_notify(client->id, client, notify);
 680         }
 681     } else {
 682         pcmk__foreach_ipc_client(send_client_notify, notify);
 683     }
 684 
 685     free_xml(notify);
 686 }
 687 
 688 static void
 689 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 690 {
 691     if (pcmk__ipc_client_count() != 0) {
 692         int call_id = 0;
 693         xmlNode *notify = NULL;
 694         xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
 695                                             LOG_ERR);
 696         const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
 697         const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
 698 
 699         crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
 700 
 701         notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 702         crm_xml_add(notify, PCMK__XA_LRMD_ORIGIN, __func__);
 703         crm_xml_add_int(notify, PCMK__XA_LRMD_RC, rc);
 704         crm_xml_add_int(notify, PCMK__XA_LRMD_CALLID, call_id);
 705         crm_xml_add(notify, PCMK__XA_LRMD_OP, op);
 706         crm_xml_add(notify, PCMK__XA_LRMD_RSC_ID, rsc_id);
 707 
 708         pcmk__foreach_ipc_client(send_client_notify, notify);
 709 
 710         free_xml(notify);
 711     }
 712 }
 713 
 714 static void
 715 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 716 {
 717     cmd->last_pid = 0;
 718 #ifdef PCMK__TIME_USE_CGT
 719     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 720     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 721 #endif
 722     cmd->epoch_last_run = 0;
 723 
 724     pcmk__reset_result(&(cmd->result));
 725     cmd->result.execution_status = PCMK_EXEC_DONE;
 726 }
 727 
 728 static void
 729 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 730 {
 731     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 732               rsc ? rsc->active : NULL, cmd);
 733 
 734     if (rsc && (rsc->active == cmd)) {
 735         rsc->active = NULL;
 736         mainloop_set_trigger(rsc->work);
 737     }
 738 
 739     if (!rsc) {
 740         cmd->rsc_deleted = 1;
 741     }
 742 
 743     /* reset original timeout so client notification has correct information */
 744     cmd->timeout = cmd->timeout_orig;
 745 
 746     send_cmd_complete_notify(cmd);
 747 
 748     if ((cmd->interval_ms != 0)
 749         && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
 750 
 751         if (rsc) {
 752             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 753             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 754         }
 755         free_lrmd_cmd(cmd);
 756     } else if (cmd->interval_ms == 0) {
 757         if (rsc) {
 758             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 759         }
 760         free_lrmd_cmd(cmd);
 761     } else {
 762         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 763         cmd_reset(cmd);
 764     }
 765 }
 766 
 767 struct notify_new_client_data {
 768     xmlNode *notify;
 769     pcmk__client_t *new_client;
 770 };
 771 
 772 static void
 773 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 774 {
 775     pcmk__client_t *client = value;
 776     struct notify_new_client_data *data = user_data;
 777 
 778     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 779         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 780     }
 781 }
 782 
 783 void
 784 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 785 {
 786     struct notify_new_client_data data;
 787 
 788     data.new_client = new_client;
 789     data.notify = pcmk__xe_create(NULL, PCMK__XE_LRMD_NOTIFY);
 790     crm_xml_add(data.notify, PCMK__XA_LRMD_ORIGIN, __func__);
 791     crm_xml_add(data.notify, PCMK__XA_LRMD_OP, LRMD_OP_NEW_CLIENT);
 792     pcmk__foreach_ipc_client(notify_one_client, &data);
 793     free_xml(data.notify);
 794 }
 795 
 796 void
 797 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 798 {
 799     GHashTableIter iter;
 800     lrmd_rsc_t *rsc = NULL;
 801     char *key = NULL;
 802 
 803     g_hash_table_iter_init(&iter, rsc_list);
 804     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 805         if (pcmk_all_flags_set(rsc->call_opts, lrmd_opt_drop_recurring)) {
 806             /* This client is disconnecting, drop any recurring operations
 807              * it may have initiated on the resource */
 808             cancel_all_recurring(rsc, client_id);
 809         }
 810     }
 811 }
 812 
 813 static void
 814 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 815 {
 816     lrmd_rsc_t *rsc;
 817     lrmd_cmd_t *cmd = action->cb_data;
 818     enum ocf_exitcode code;
 819 
 820 #ifdef PCMK__TIME_USE_CGT
 821     const char *rclass = NULL;
 822     bool goagain = false;
 823 #endif
 824 
 825     if (!cmd) {
 826         crm_err("Completed executor action (%s) does not match any known operations",
 827                 action->id);
 828         return;
 829     }
 830 
 831 #ifdef PCMK__TIME_USE_CGT
 832     if (cmd->result.exit_status != action->rc) {
 833         cmd->epoch_rcchange = time(NULL);
 834     }
 835 #endif
 836 
 837     cmd->last_pid = action->pid;
 838 
 839     // Cast variable instead of function return to keep compilers happy
 840     code = services_result2ocf(action->standard, cmd->action, action->rc);
 841     pcmk__set_result(&(cmd->result), (int) code,
 842                      action->status, services__exit_reason(action));
 843 
 844     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 845 
 846 #ifdef PCMK__TIME_USE_CGT
 847     if (rsc != NULL) {
 848         rclass = rsc->class;
 849 #if PCMK__ENABLE_SERVICE
 850         if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SERVICE,
 851                          pcmk__str_casei)) {
 852             rclass = resources_find_service_class(rsc->type);
 853         }
 854 #endif
 855     }
 856 
 857     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 858         if (pcmk__result_ok(&(cmd->result))
 859             && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
 860                                     PCMK_ACTION_STOP, NULL)) {
 861             /* systemd returns from start and stop actions after the action
 862              * begins, not after it completes. We have to jump through a few
 863              * hoops so that we don't report 'complete' to the rest of pacemaker
 864              * until it's actually done.
 865              */
 866             goagain = true;
 867             cmd->real_action = cmd->action;
 868             cmd->action = pcmk__str_copy(PCMK_ACTION_MONITOR);
 869 
 870         } else if (cmd->real_action != NULL) {
 871             // This is follow-up monitor to check whether start/stop completed
 872             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 873                 goagain = true;
 874 
 875             } else if (pcmk__result_ok(&(cmd->result))
 876                        && pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
 877                                        pcmk__str_casei)) {
 878                 goagain = true;
 879 
 880             } else {
 881                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 882                 int timeout_left = cmd->timeout_orig - time_sum;
 883 
 884                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 885                           "remaining=%dms): %s (%d)",
 886                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 887                           services_ocf_exitcode_str(cmd->result.exit_status),
 888                           cmd->result.exit_status);
 889                 cmd_original_times(cmd);
 890 
 891                 // Monitors may return "not running", but start/stop shouldn't
 892                 if ((cmd->result.execution_status == PCMK_EXEC_DONE)
 893                     && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
 894 
 895                     if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_START,
 896                                      pcmk__str_casei)) {
 897                         cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
 898                     } else if (pcmk__str_eq(cmd->real_action, PCMK_ACTION_STOP,
 899                                             pcmk__str_casei)) {
 900                         cmd->result.exit_status = PCMK_OCF_OK;
 901                     }
 902                 }
 903             }
 904         }
 905     }
 906 #endif
 907 
 908 #if SUPPORT_NAGIOS
 909     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 910         if (action_matches(cmd, PCMK_ACTION_MONITOR, 0)
 911             && pcmk__result_ok(&(cmd->result))) {
 912             /* Successfully executed --version for the nagios plugin */
 913             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
 914 
 915         } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)
 916                    && !pcmk__result_ok(&(cmd->result))) {
 917 #ifdef PCMK__TIME_USE_CGT
 918             goagain = true;
 919 #endif
 920         }
 921     }
 922 #endif
 923 
 924 #ifdef PCMK__TIME_USE_CGT
 925     if (goagain) {
 926         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 927         int timeout_left = cmd->timeout_orig - time_sum;
 928         int delay = cmd->timeout_orig / 10;
 929 
 930         if(delay >= timeout_left && timeout_left > 20) {
 931             delay = timeout_left/2;
 932         }
 933 
 934         delay = QB_MIN(2000, delay);
 935         if (delay < timeout_left) {
 936             cmd->start_delay = delay;
 937             cmd->timeout = timeout_left;
 938 
 939             if (pcmk__result_ok(&(cmd->result))) {
 940                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 941                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
 942 
 943             } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 944                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 945                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
 946 
 947             } else {
 948                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 949                            cmd->rsc_id, cmd->action,
 950                            services_ocf_exitcode_str(cmd->result.exit_status),
 951                            cmd->result.exit_status, time_sum, timeout_left,
 952                            delay);
 953             }
 954 
 955             cmd_reset(cmd);
 956             if(rsc) {
 957                 rsc->active = NULL;
 958             }
 959             schedule_lrmd_cmd(rsc, cmd);
 960 
 961             /* Don't finalize cmd, we're not done with it yet */
 962             return;
 963 
 964         } else {
 965             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
 966                        cmd->rsc_id,
 967                        (cmd->real_action? cmd->real_action : cmd->action),
 968                        cmd->result.exit_status, time_sum, timeout_left);
 969             pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 970                              PCMK_EXEC_TIMEOUT,
 971                              "Investigate reason for timeout, and adjust "
 972                              "configured operation timeout if necessary");
 973             cmd_original_times(cmd);
 974         }
 975     }
 976 #endif
 977 
 978     pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
 979                             services__grab_stderr(action));
 980     cmd_finalize(cmd, rsc);
 981 }
 982 
 983 /*!
 984  * \internal
 985  * \brief Process the result of a fence device action (start, stop, or monitor)
 986  *
 987  * \param[in,out] cmd               Fence device action that completed
 988  * \param[in]     exit_status       Fencer API exit status for action
 989  * \param[in]     execution_status  Fencer API execution status for action
 990  * \param[in]     exit_reason       Human-friendly detail, if action failed
 991  */
 992 static void
 993 stonith_action_complete(lrmd_cmd_t *cmd, int exit_status,
     /* [previous][next][first][last][top][bottom][index][help] */
 994                         enum pcmk_exec_status execution_status,
 995                         const char *exit_reason)
 996 {
 997     // This can be NULL if resource was removed before command completed
 998     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 999 
1000     // Simplify fencer exit status to uniform exit status
1001     if (exit_status != CRM_EX_OK) {
1002         exit_status = PCMK_OCF_UNKNOWN_ERROR;
1003     }
1004 
1005     if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) {
1006         /* An in-flight fence action was cancelled. The execution status is
1007          * already correct, so don't overwrite it.
1008          */
1009         execution_status = PCMK_EXEC_CANCELLED;
1010 
1011     } else {
1012         /* Some execution status codes have specific meanings for the fencer
1013          * that executor clients may not expect, so map them to a simple error
1014          * status.
1015          */
1016         switch (execution_status) {
1017             case PCMK_EXEC_NOT_CONNECTED:
1018             case PCMK_EXEC_INVALID:
1019                 execution_status = PCMK_EXEC_ERROR;
1020                 break;
1021 
1022             case PCMK_EXEC_NO_FENCE_DEVICE:
1023                 /* This should be possible only for probes in practice, but
1024                  * interpret for all actions to be safe.
1025                  */
1026                 if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1027                                  pcmk__str_none)) {
1028                     exit_status = PCMK_OCF_NOT_RUNNING;
1029 
1030                 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1031                                         pcmk__str_none)) {
1032                     exit_status = PCMK_OCF_OK;
1033 
1034                 } else {
1035                     exit_status = PCMK_OCF_NOT_INSTALLED;
1036                 }
1037                 execution_status = PCMK_EXEC_ERROR;
1038                 break;
1039 
1040             case PCMK_EXEC_NOT_SUPPORTED:
1041                 exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE;
1042                 break;
1043 
1044             default:
1045                 break;
1046         }
1047     }
1048 
1049     pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason);
1050 
1051     // Certain successful actions change the known state of the resource
1052     if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) {
1053 
1054         if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1055             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK,
1056                              PCMK_EXEC_DONE, NULL); // "running"
1057 
1058         } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
1059                                 pcmk__str_casei)) {
1060             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1061                              PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running"
1062         }
1063     }
1064 
1065     /* The recurring timer should not be running at this point in any case, but
1066      * as a failsafe, stop it if it is.
1067      */
1068     stop_recurring_timer(cmd);
1069 
1070     /* Reschedule this command if appropriate. If a recurring command is *not*
1071      * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1072      * not be removed from recurring_ops by cmd_finalize().
1073      */
1074     if (rsc && (cmd->interval_ms > 0)
1075         && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1076         start_recurring_timer(cmd);
1077     }
1078 
1079     cmd_finalize(cmd, rsc);
1080 }
1081 
1082 static void
1083 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1084 {
1085     if ((data == NULL) || (data->userdata == NULL)) {
1086         crm_err("Ignoring fence action result: "
1087                 "Invalid callback arguments (bug?)");
1088     } else {
1089         stonith_action_complete((lrmd_cmd_t *) data->userdata,
1090                                 stonith__exit_status(data),
1091                                 stonith__execution_status(data),
1092                                 stonith__exit_reason(data));
1093     }
1094 }
1095 
1096 void
1097 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1098 {
1099     GHashTableIter iter;
1100     lrmd_rsc_t *rsc = NULL;
1101 
1102     crm_warn("Connection to fencer lost (any pending operations for "
1103              "fence devices will be considered failed)");
1104 
1105     g_hash_table_iter_init(&iter, rsc_list);
1106     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) {
1107         if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1108                           pcmk__str_none)) {
1109             continue;
1110         }
1111 
1112         /* If we registered this fence device, we don't know whether the
1113          * fencer still has the registration or not. Cause future probes to
1114          * return an error until the resource is stopped or started
1115          * successfully. This is especially important if the controller also
1116          * went away (possibly due to a cluster layer restart) and won't
1117          * receive our client notification of any monitors finalized below.
1118          */
1119         if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) {
1120             pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR,
1121                              PCMK_EXEC_NOT_CONNECTED,
1122                              "Lost connection to fencer");
1123         }
1124 
1125         // Consider any active, pending, or recurring operations as failed
1126 
1127         for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) {
1128             lrmd_cmd_t *cmd = op->data;
1129 
1130             /* This won't free a recurring op but instead restart its timer.
1131              * If cmd is rsc->active, this will set rsc->active to NULL, so we
1132              * don't have to worry about finalizing it a second time below.
1133              */
1134             stonith_action_complete(cmd,
1135                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1136                                     "Lost connection to fencer");
1137         }
1138 
1139         if (rsc->active != NULL) {
1140             rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active);
1141         }
1142         while (rsc->pending_ops != NULL) {
1143             // This will free the op and remove it from rsc->pending_ops
1144             stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data,
1145                                     CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED,
1146                                     "Lost connection to fencer");
1147         }
1148     }
1149 }
1150 
1151 /*!
1152  * \internal
1153  * \brief Execute a stonith resource "start" action
1154  *
1155  * Start a stonith resource by registering it with the fencer.
1156  * (Stonith agents don't have a start command.)
1157  *
1158  * \param[in,out] stonith_api  Connection to fencer
1159  * \param[in]     rsc          Stonith resource to start
1160  * \param[in]     cmd          Start command to execute
1161  *
1162  * \return pcmk_ok on success, -errno otherwise
1163  */
1164 static int
1165 execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1166                     const lrmd_cmd_t *cmd)
1167 {
1168     char *key = NULL;
1169     char *value = NULL;
1170     stonith_key_value_t *device_params = NULL;
1171     int rc = pcmk_ok;
1172 
1173     // Convert command parameters to stonith API key/values
1174     if (cmd->params) {
1175         GHashTableIter iter;
1176 
1177         g_hash_table_iter_init(&iter, cmd->params);
1178         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1179             device_params = stonith_key_value_add(device_params, key, value);
1180         }
1181     }
1182 
1183     /* The fencer will automatically register devices via CIB notifications
1184      * when the CIB changes, but to avoid a possible race condition between
1185      * the fencer receiving the notification and the executor requesting that
1186      * resource, the executor registers the device as well. The fencer knows how
1187      * to handle duplicate registrations.
1188      */
1189     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1190                                             cmd->rsc_id, rsc->provider,
1191                                             rsc->type, device_params);
1192 
1193     stonith_key_value_freeall(device_params, 1, 1);
1194     return rc;
1195 }
1196 
1197 /*!
1198  * \internal
1199  * \brief Execute a stonith resource "stop" action
1200  *
1201  * Stop a stonith resource by unregistering it with the fencer.
1202  * (Stonith agents don't have a stop command.)
1203  *
1204  * \param[in,out] stonith_api  Connection to fencer
1205  * \param[in]     rsc          Stonith resource to stop
1206  *
1207  * \return pcmk_ok on success, -errno otherwise
1208  */
1209 static inline int
1210 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1211 {
1212     /* @TODO Failure would indicate a problem communicating with fencer;
1213      * perhaps we should try reconnecting and retrying a few times?
1214      */
1215     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1216                                             rsc->rsc_id);
1217 }
1218 
1219 /*!
1220  * \internal
1221  * \brief Initiate a stonith resource agent recurring "monitor" action
1222  *
1223  * \param[in,out] stonith_api  Connection to fencer
1224  * \param[in,out] rsc          Stonith resource to monitor
1225  * \param[in]     cmd          Monitor command being executed
1226  *
1227  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1228  */
1229 static inline int
1230 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1231 {
1232     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1233                                         cmd->timeout / 1000);
1234 
1235     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1236                                               "lrmd_stonith_callback",
1237                                               lrmd_stonith_callback);
1238     if (rc == TRUE) {
1239         rsc->active = cmd;
1240         rc = pcmk_ok;
1241     } else {
1242         rc = -pcmk_err_generic;
1243     }
1244     return rc;
1245 }
1246 
1247 static void
1248 execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1249 {
1250     int rc = 0;
1251     bool do_monitor = FALSE;
1252 
1253     stonith_t *stonith_api = get_stonith_connection();
1254 
1255     if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR, pcmk__str_casei)
1256         && (cmd->interval_ms == 0)) {
1257         // Probes don't require a fencer connection
1258         stonith_action_complete(cmd, rsc->fence_probe_result.exit_status,
1259                                 rsc->fence_probe_result.execution_status,
1260                                 rsc->fence_probe_result.exit_reason);
1261         return;
1262 
1263     } else if (stonith_api == NULL) {
1264         stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR,
1265                                 PCMK_EXEC_NOT_CONNECTED,
1266                                 "No connection to fencer");
1267         return;
1268 
1269     } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
1270         rc = execd_stonith_start(stonith_api, rsc, cmd);
1271         if (rc == pcmk_ok) {
1272             do_monitor = TRUE;
1273         }
1274 
1275     } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
1276         rc = execd_stonith_stop(stonith_api, rsc);
1277 
1278     } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1279                             pcmk__str_casei)) {
1280         do_monitor = TRUE;
1281 
1282     } else {
1283         stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE,
1284                                 PCMK_EXEC_ERROR,
1285                                 "Invalid fence device action (bug?)");
1286         return;
1287     }
1288 
1289     if (do_monitor) {
1290         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1291         if (rc == pcmk_ok) {
1292             // Don't clean up yet, we will find out result of the monitor later
1293             return;
1294         }
1295     }
1296 
1297     stonith_action_complete(cmd,
1298                             ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR),
1299                             stonith__legacy2status(rc),
1300                             ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc)));
1301 }
1302 
1303 static void
1304 execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1305 {
1306     svc_action_t *action = NULL;
1307     GHashTable *params_copy = NULL;
1308 
1309     pcmk__assert((rsc != NULL) && (cmd != NULL));
1310 
1311     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1312               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1313 
1314 #if SUPPORT_NAGIOS
1315     /* Recurring operations are cancelled anyway for a stop operation */
1316     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1317         && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
1318 
1319         cmd->result.exit_status = PCMK_OCF_OK;
1320         cmd_finalize(cmd, rsc);
1321         return;
1322     }
1323 #endif
1324 
1325     params_copy = pcmk__str_table_dup(cmd->params);
1326 
1327     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1328                                      rsc->type,
1329                                      normalize_action_name(rsc, cmd->action),
1330                                      cmd->interval_ms, cmd->timeout,
1331                                      params_copy, cmd->service_flags);
1332 
1333     if (action == NULL) {
1334         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1335                          PCMK_EXEC_ERROR, strerror(ENOMEM));
1336         cmd_finalize(cmd, rsc);
1337         return;
1338     }
1339 
1340     if (action->rc != PCMK_OCF_UNKNOWN) {
1341         pcmk__set_result(&(cmd->result), action->rc, action->status,
1342                          services__exit_reason(action));
1343         services_action_free(action);
1344         cmd_finalize(cmd, rsc);
1345         return;
1346     }
1347 
1348     action->cb_data = cmd;
1349 
1350     if (services_action_async(action, action_complete)) {
1351         /* The services library has taken responsibility for the action. It
1352          * could be pending, blocked, or merged into a duplicate recurring
1353          * action, in which case the action callback (action_complete())
1354          * will be called when the action completes, otherwise the callback has
1355          * already been called.
1356          *
1357          * action_complete() calls cmd_finalize() which can free cmd, so cmd
1358          * cannot be used here.
1359          */
1360     } else {
1361         /* This is a recurring action that is not being cancelled and could not
1362          * be initiated. It has been rescheduled, and the action callback
1363          * (action_complete()) has been called, which in this case has already
1364          * called cmd_finalize(), which in this case should only reset (not
1365          * free) cmd.
1366          */
1367 
1368         pcmk__set_result(&(cmd->result), action->rc, action->status,
1369                          services__exit_reason(action));
1370         services_action_free(action);
1371     }
1372 }
1373 
1374 static gboolean
1375 execute_resource_action(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1376 {
1377     lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data;
1378     lrmd_cmd_t *cmd = NULL;
1379 
1380     CRM_CHECK(rsc != NULL, return FALSE);
1381 
1382     if (rsc->active) {
1383         crm_trace("%s is still active", rsc->rsc_id);
1384         return TRUE;
1385     }
1386 
1387     if (rsc->pending_ops) {
1388         GList *first = rsc->pending_ops;
1389 
1390         cmd = first->data;
1391         if (cmd->delay_id) {
1392             crm_trace
1393                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1394                  cmd->rsc_id, cmd->action, cmd->start_delay);
1395             return TRUE;
1396         }
1397         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1398         g_list_free_1(first);
1399 
1400 #ifdef PCMK__TIME_USE_CGT
1401         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1402 #endif
1403         cmd->epoch_last_run = time(NULL);
1404     }
1405 
1406     if (!cmd) {
1407         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1408         return TRUE;
1409     }
1410 
1411     rsc->active = cmd;          /* only one op at a time for a rsc */
1412     if (cmd->interval_ms) {
1413         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1414     }
1415 
1416     log_execute(cmd);
1417 
1418     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1419         execute_stonith_action(rsc, cmd);
1420     } else {
1421         execute_nonstonith_action(rsc, cmd);
1422     }
1423 
1424     return TRUE;
1425 }
1426 
1427 void
1428 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1429 {
1430     GList *gIter = NULL;
1431     lrmd_rsc_t *rsc = data;
1432     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1433                                   pcmk__str_casei);
1434 
1435     gIter = rsc->pending_ops;
1436     while (gIter != NULL) {
1437         GList *next = gIter->next;
1438         lrmd_cmd_t *cmd = gIter->data;
1439 
1440         /* command was never executed */
1441         cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1442         cmd_finalize(cmd, NULL);
1443 
1444         gIter = next;
1445     }
1446     /* frees list, but not list elements. */
1447     g_list_free(rsc->pending_ops);
1448 
1449     gIter = rsc->recurring_ops;
1450     while (gIter != NULL) {
1451         GList *next = gIter->next;
1452         lrmd_cmd_t *cmd = gIter->data;
1453 
1454         if (is_stonith) {
1455             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1456             /* If a stonith command is in-flight, just mark it as cancelled;
1457              * it is not safe to finalize/free the cmd until the stonith api
1458              * says it has either completed or timed out.
1459              */
1460             if (rsc->active != cmd) {
1461                 cmd_finalize(cmd, NULL);
1462             }
1463         } else {
1464             /* This command is already handed off to service library,
1465              * let service library cancel it and tell us via the callback
1466              * when it is cancelled. The rsc can be safely destroyed
1467              * even if we are waiting for the cancel result */
1468             services_action_cancel(rsc->rsc_id,
1469                                    normalize_action_name(rsc, cmd->action),
1470                                    cmd->interval_ms);
1471         }
1472 
1473         gIter = next;
1474     }
1475     /* frees list, but not list elements. */
1476     g_list_free(rsc->recurring_ops);
1477 
1478     free(rsc->rsc_id);
1479     free(rsc->class);
1480     free(rsc->provider);
1481     free(rsc->type);
1482     mainloop_destroy_trigger(rsc->work);
1483 
1484     free(rsc);
1485 }
1486 
1487 static int
1488 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1489                     xmlNode **reply)
1490 {
1491     int rc = pcmk_ok;
1492     time_t now = time(NULL);
1493     const char *protocol_version =
1494         crm_element_value(request, PCMK__XA_LRMD_PROTOCOL_VERSION);
1495     const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
1496 
1497     if (compare_version(protocol_version, LRMD_COMPATIBLE_PROTOCOL) < 0) {
1498         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1499                 LRMD_COMPATIBLE_PROTOCOL, protocol_version);
1500         rc = -EPROTO;
1501     }
1502 
1503     if (pcmk__xe_attr_is_true(request, PCMK__XA_LRMD_IS_IPC_PROVIDER)) {
1504 #ifdef PCMK__COMPILE_REMOTE
1505         if ((client->remote != NULL)
1506             && pcmk_is_set(client->flags,
1507                            pcmk__client_tls_handshake_complete)) {
1508             const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
1509 
1510             // This is a remote connection from a cluster node's controller
1511             ipc_proxy_add_provider(client);
1512 
1513             /* If this was a register operation, also ask for new schema files but
1514              * only if it's supported by the protocol version.
1515              */
1516             if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none) &&
1517                 LRMD_SUPPORTS_SCHEMA_XFER(protocol_version)) {
1518                 remoted_request_cib_schema_files();
1519             }
1520         } else {
1521             rc = -EACCES;
1522         }
1523 #else
1524         rc = -EPROTONOSUPPORT;
1525 #endif
1526     }
1527 
1528     *reply = create_lrmd_reply(__func__, rc, call_id);
1529     crm_xml_add(*reply, PCMK__XA_LRMD_OP, CRM_OP_REGISTER);
1530     crm_xml_add(*reply, PCMK__XA_LRMD_CLIENTID, client->id);
1531     crm_xml_add(*reply, PCMK__XA_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1532     crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
1533 
1534     if (start_state) {
1535         crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state);
1536     }
1537 
1538     return rc;
1539 }
1540 
1541 static int
1542 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1543 {
1544     int rc = pcmk_ok;
1545     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1546     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1547 
1548     if (dup &&
1549         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1550         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1551 
1552         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1553         free_rsc(rsc);
1554         return rc;
1555     }
1556 
1557     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1558     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1559     return rc;
1560 }
1561 
1562 static xmlNode *
1563 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1564 {
1565     int rc = pcmk_ok;
1566     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
1567                                         LOG_ERR);
1568     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1569     xmlNode *reply = NULL;
1570     lrmd_rsc_t *rsc = NULL;
1571 
1572     if (rsc_id == NULL) {
1573         rc = -ENODEV;
1574     } else {
1575         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1576         if (rsc == NULL) {
1577             crm_info("Agent information for '%s' not in cache", rsc_id);
1578             rc = -ENODEV;
1579         }
1580     }
1581 
1582     reply = create_lrmd_reply(__func__, rc, call_id);
1583     if (rsc) {
1584         crm_xml_add(reply, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1585         crm_xml_add(reply, PCMK__XA_LRMD_CLASS, rsc->class);
1586         crm_xml_add(reply, PCMK__XA_LRMD_PROVIDER, rsc->provider);
1587         crm_xml_add(reply, PCMK__XA_LRMD_TYPE, rsc->type);
1588     }
1589     return reply;
1590 }
1591 
1592 static int
1593 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1594                             xmlNode *request)
1595 {
1596     int rc = pcmk_ok;
1597     lrmd_rsc_t *rsc = NULL;
1598     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
1599                                         LOG_ERR);
1600     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1601 
1602     if (!rsc_id) {
1603         return -ENODEV;
1604     }
1605 
1606     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1607     if (rsc == NULL) {
1608         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1609                  rsc_id);
1610         return pcmk_ok;
1611     }
1612 
1613     if (rsc->active) {
1614         /* let the caller know there are still active ops on this rsc to watch for */
1615         crm_trace("Operation (%p) still in progress for unregistered resource %s",
1616                   rsc->active, rsc_id);
1617         rc = -EINPROGRESS;
1618     }
1619 
1620     g_hash_table_remove(rsc_list, rsc_id);
1621 
1622     return rc;
1623 }
1624 
1625 static int
1626 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1627 {
1628     lrmd_rsc_t *rsc = NULL;
1629     lrmd_cmd_t *cmd = NULL;
1630     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
1631                                         LOG_ERR);
1632     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1633     int call_id;
1634 
1635     if (!rsc_id) {
1636         return -EINVAL;
1637     }
1638     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1639         crm_info("Resource '%s' not found (%d active resources)",
1640                  rsc_id, g_hash_table_size(rsc_list));
1641         return -ENODEV;
1642     }
1643 
1644     cmd = create_lrmd_cmd(request, client);
1645     call_id = cmd->call_id;
1646 
1647     /* Don't reference cmd after handing it off to be scheduled.
1648      * The cmd could get merged and freed. */
1649     schedule_lrmd_cmd(rsc, cmd);
1650 
1651     return call_id;
1652 }
1653 
1654 static int
1655 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1656 {
1657     GList *gIter = NULL;
1658     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1659 
1660     /* How to cancel an action.
1661      * 1. Check pending ops list, if it hasn't been handed off
1662      *    to the service library or stonith recurring list remove
1663      *    it there and that will stop it.
1664      * 2. If it isn't in the pending ops list, then it's either a
1665      *    recurring op in the stonith recurring list, or the service
1666      *    library's recurring list.  Stop it there
1667      * 3. If not found in any lists, then this operation has either
1668      *    been executed already and is not a recurring operation, or
1669      *    never existed.
1670      */
1671     if (!rsc) {
1672         return -ENODEV;
1673     }
1674 
1675     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1676         lrmd_cmd_t *cmd = gIter->data;
1677 
1678         if (action_matches(cmd, action, interval_ms)) {
1679             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1680             cmd_finalize(cmd, rsc);
1681             return pcmk_ok;
1682         }
1683     }
1684 
1685     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1686         /* The service library does not handle stonith operations.
1687          * We have to handle recurring stonith operations ourselves. */
1688         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1689             lrmd_cmd_t *cmd = gIter->data;
1690 
1691             if (action_matches(cmd, action, interval_ms)) {
1692                 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1693                 if (rsc->active != cmd) {
1694                     cmd_finalize(cmd, rsc);
1695                 }
1696                 return pcmk_ok;
1697             }
1698         }
1699     } else if (services_action_cancel(rsc_id,
1700                                       normalize_action_name(rsc, action),
1701                                       interval_ms) == TRUE) {
1702         /* The service library will tell the action_complete callback function
1703          * this action was cancelled, which will destroy the cmd and remove
1704          * it from the recurring_op list. Do not do that in this function
1705          * if the service library says it cancelled it. */
1706         return pcmk_ok;
1707     }
1708 
1709     return -EOPNOTSUPP;
1710 }
1711 
1712 static void
1713 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1714 {
1715     GList *cmd_list = NULL;
1716     GList *cmd_iter = NULL;
1717 
1718     /* Notice a copy of each list is created when concat is called.
1719      * This prevents odd behavior from occurring when the cmd_list
1720      * is iterated through later on.  It is possible the cancel_op
1721      * function may end up modifying the recurring_ops and pending_ops
1722      * lists.  If we did not copy those lists, our cmd_list iteration
1723      * could get messed up.*/
1724     if (rsc->recurring_ops) {
1725         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1726     }
1727     if (rsc->pending_ops) {
1728         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1729     }
1730     if (!cmd_list) {
1731         return;
1732     }
1733 
1734     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1735         lrmd_cmd_t *cmd = cmd_iter->data;
1736 
1737         if (cmd->interval_ms == 0) {
1738             continue;
1739         }
1740 
1741         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1742             continue;
1743         }
1744 
1745         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1746     }
1747     /* frees only the copied list data, not the cmds */
1748     g_list_free(cmd_list);
1749 }
1750 
1751 static int
1752 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1753 {
1754     xmlNode *rsc_xml = get_xpath_object("//" PCMK__XE_LRMD_RSC, request,
1755                                         LOG_ERR);
1756     const char *rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1757     const char *action = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ACTION);
1758     guint interval_ms = 0;
1759 
1760     crm_element_value_ms(rsc_xml, PCMK__XA_LRMD_RSC_INTERVAL, &interval_ms);
1761 
1762     if (!rsc_id || !action) {
1763         return -EINVAL;
1764     }
1765 
1766     return cancel_op(rsc_id, action, interval_ms);
1767 }
1768 
1769 static void
1770 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1771 {
1772     xmlNode *rsc_xml = pcmk__xe_create(reply, PCMK__XE_LRMD_RSC);
1773 
1774     crm_xml_add(rsc_xml, PCMK__XA_LRMD_RSC_ID, rsc->rsc_id);
1775     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1776         lrmd_cmd_t *cmd = item->data;
1777         xmlNode *op_xml = pcmk__xe_create(rsc_xml, PCMK__XE_LRMD_RSC_OP);
1778 
1779         crm_xml_add(op_xml, PCMK__XA_LRMD_RSC_ACTION,
1780                     pcmk__s(cmd->real_action, cmd->action));
1781         crm_xml_add_ms(op_xml, PCMK__XA_LRMD_RSC_INTERVAL, cmd->interval_ms);
1782         crm_xml_add_int(op_xml, PCMK__XA_LRMD_TIMEOUT, cmd->timeout_orig);
1783     }
1784 }
1785 
1786 static xmlNode *
1787 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1788 {
1789     int rc = pcmk_ok;
1790     const char *rsc_id = NULL;
1791     lrmd_rsc_t *rsc = NULL;
1792     xmlNode *reply = NULL;
1793     xmlNode *rsc_xml = NULL;
1794 
1795     // Resource ID is optional
1796     rsc_xml = pcmk__xe_first_child(request, PCMK__XE_LRMD_CALLDATA, NULL, NULL);
1797     if (rsc_xml) {
1798         rsc_xml = pcmk__xe_first_child(rsc_xml, PCMK__XE_LRMD_RSC, NULL, NULL);
1799     }
1800     if (rsc_xml) {
1801         rsc_id = crm_element_value(rsc_xml, PCMK__XA_LRMD_RSC_ID);
1802     }
1803 
1804     // If resource ID is specified, resource must exist
1805     if (rsc_id != NULL) {
1806         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1807         if (rsc == NULL) {
1808             crm_info("Resource '%s' not found (%d active resources)",
1809                      rsc_id, g_hash_table_size(rsc_list));
1810             rc = -ENODEV;
1811         }
1812     }
1813 
1814     reply = create_lrmd_reply(__func__, rc, call_id);
1815 
1816     // If resource ID is not specified, check all resources
1817     if (rsc_id == NULL) {
1818         GHashTableIter iter;
1819         char *key = NULL;
1820 
1821         g_hash_table_iter_init(&iter, rsc_list);
1822         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1823                                       (gpointer *) &rsc)) {
1824             add_recurring_op_xml(reply, rsc);
1825         }
1826     } else if (rsc) {
1827         add_recurring_op_xml(reply, rsc);
1828     }
1829     return reply;
1830 }
1831 
1832 void
1833 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1834 {
1835     int rc = pcmk_ok;
1836     int call_id = 0;
1837     const char *op = crm_element_value(request, PCMK__XA_LRMD_OP);
1838     int do_reply = 0;
1839     int do_notify = 0;
1840     xmlNode *reply = NULL;
1841 
1842     /* Certain IPC commands may be done only by privileged users (i.e. root or
1843      * hacluster), because they would otherwise provide a means of bypassing
1844      * ACLs.
1845      */
1846     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1847 
1848     crm_trace("Processing %s operation from %s", op, client->id);
1849     crm_element_value_int(request, PCMK__XA_LRMD_CALLID, &call_id);
1850 
1851     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1852 #ifdef PCMK__COMPILE_REMOTE
1853         if (allowed) {
1854             ipc_proxy_forward_client(client, request);
1855         } else {
1856             rc = -EACCES;
1857         }
1858 #else
1859         rc = -EPROTONOSUPPORT;
1860 #endif
1861         do_reply = 1;
1862     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1863         rc = process_lrmd_signon(client, request, call_id, &reply);
1864         do_reply = 1;
1865     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1866         if (allowed) {
1867             rc = process_lrmd_rsc_register(client, id, request);
1868             do_notify = 1;
1869         } else {
1870             rc = -EACCES;
1871         }
1872         do_reply = 1;
1873     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1874         if (allowed) {
1875             reply = process_lrmd_get_rsc_info(request, call_id);
1876         } else {
1877             rc = -EACCES;
1878         }
1879         do_reply = 1;
1880     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1881         if (allowed) {
1882             rc = process_lrmd_rsc_unregister(client, id, request);
1883             /* don't notify anyone about failed un-registers */
1884             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1885                 do_notify = 1;
1886             }
1887         } else {
1888             rc = -EACCES;
1889         }
1890         do_reply = 1;
1891     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1892         if (allowed) {
1893             rc = process_lrmd_rsc_exec(client, id, request);
1894         } else {
1895             rc = -EACCES;
1896         }
1897         do_reply = 1;
1898     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1899         if (allowed) {
1900             rc = process_lrmd_rsc_cancel(client, id, request);
1901         } else {
1902             rc = -EACCES;
1903         }
1904         do_reply = 1;
1905     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1906         do_notify = 1;
1907         do_reply = 1;
1908     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1909         if (allowed) {
1910             xmlNode *wrapper = pcmk__xe_first_child(request,
1911                                                     PCMK__XE_LRMD_CALLDATA,
1912                                                     NULL, NULL);
1913             xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
1914 
1915             const char *timeout = NULL;
1916 
1917             CRM_LOG_ASSERT(data != NULL);
1918             timeout = crm_element_value(data, PCMK__XA_LRMD_WATCHDOG);
1919             pcmk__valid_stonith_watchdog_timeout(timeout);
1920         } else {
1921             rc = -EACCES;
1922         }
1923     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1924         if (allowed) {
1925             rc = process_lrmd_alert_exec(client, id, request);
1926         } else {
1927             rc = -EACCES;
1928         }
1929         do_reply = 1;
1930     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1931         if (allowed) {
1932             reply = process_lrmd_get_recurring(request, call_id);
1933         } else {
1934             rc = -EACCES;
1935         }
1936         do_reply = 1;
1937     } else {
1938         rc = -EOPNOTSUPP;
1939         do_reply = 1;
1940         crm_err("Unknown IPC request '%s' from client %s",
1941                 op, pcmk__client_name(client));
1942     }
1943 
1944     if (rc == -EACCES) {
1945         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1946                  op, pcmk__client_name(client));
1947     }
1948 
1949     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1950               op, client->id, rc, do_reply, do_notify);
1951 
1952     if (do_reply) {
1953         int send_rc = pcmk_rc_ok;
1954 
1955         if (reply == NULL) {
1956             reply = create_lrmd_reply(__func__, rc, call_id);
1957         }
1958         send_rc = lrmd_server_send_reply(client, id, reply);
1959         free_xml(reply);
1960         if (send_rc != pcmk_rc_ok) {
1961             crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
1962                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
1963         }
1964     }
1965 
1966     if (do_notify) {
1967         send_generic_notify(rc, request);
1968     }
1969 }

/* [previous][next][first][last][top][bottom][index][help] */