root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. stonith2uniform_rc
  26. action_get_uniform_rc
  27. notify_one_client
  28. notify_of_new_client
  29. client_disconnect_cleanup
  30. action_complete
  31. stonith_rc2status
  32. stonith_action_complete
  33. lrmd_stonith_callback
  34. stonith_connection_failed
  35. execd_stonith_start
  36. execd_stonith_stop
  37. execd_stonith_monitor
  38. lrmd_rsc_execute_stonith
  39. lrmd_rsc_execute_service_lib
  40. lrmd_rsc_execute
  41. lrmd_rsc_dispatch
  42. free_rsc
  43. process_lrmd_signon
  44. process_lrmd_rsc_register
  45. process_lrmd_get_rsc_info
  46. process_lrmd_rsc_unregister
  47. process_lrmd_rsc_exec
  48. cancel_op
  49. cancel_all_recurring
  50. process_lrmd_rsc_cancel
  51. add_recurring_op_xml
  52. process_lrmd_get_recurring
  53. process_lrmd_message

   1 /*
   2  * Copyright 2012-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 // Check whether we have a high-resolution monotonic clock
  15 #undef PCMK__TIME_USE_CGT
  16 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  17 #  define PCMK__TIME_USE_CGT
  18 #  include <time.h>  /* clock_gettime */
  19 #endif
  20 
  21 #include <unistd.h>
  22 
  23 #include <crm/crm.h>
  24 #include <crm/services.h>
  25 #include <crm/services_internal.h>
  26 #include <crm/common/mainloop.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/msg_xml.h>
  30 
  31 #include "pacemaker-execd.h"
  32 
  33 GHashTable *rsc_list = NULL;
  34 
  35 typedef struct lrmd_cmd_s {
  36     int timeout;
  37     guint interval_ms;
  38     int start_delay;
  39     int timeout_orig;
  40 
  41     int call_id;
  42 
  43     int call_opts;
  44     /* Timer ids, must be removed on cmd destruction. */
  45     int delay_id;
  46     int stonith_recurring_id;
  47 
  48     int rsc_deleted;
  49 
  50     int service_flags;
  51 
  52     char *client_id;
  53     char *origin;
  54     char *rsc_id;
  55     char *action;
  56     char *real_action;
  57     char *userdata_str;
  58 
  59     pcmk__action_result_t result;
  60 
  61     /* We can track operation queue time and run time, to be saved with the CIB
  62      * resource history (and displayed in cluster status). We need
  63      * high-resolution monotonic time for this purpose, so we use
  64      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  65      * is disabled).
  66      *
  67      * However, we also need epoch timestamps for recording the time the command
  68      * last ran and the time its return value last changed, for use in time
  69      * displays (as opposed to interval calculations). We keep time_t values for
  70      * this purpose.
  71      *
  72      * The last run time is used for both purposes, so we keep redundant
  73      * monotonic and epoch values for this. Technically the two could represent
  74      * different times, but since time_t has only second resolution and the
  75      * values are used for distinct purposes, that is not significant.
  76      */
  77 #ifdef PCMK__TIME_USE_CGT
  78     /* Recurring and systemd operations may involve more than one executor
  79      * command per operation, so they need info about the original and the most
  80      * recent.
  81      */
  82     struct timespec t_first_run;    // When op first ran
  83     struct timespec t_run;          // When op most recently ran
  84     struct timespec t_first_queue;  // When op was first queued
  85     struct timespec t_queue;        // When op was most recently queued
  86 #endif
  87     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  88     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  89 
  90     bool first_notify_sent;
  91     int last_notify_rc;
  92     int last_notify_op_status;
  93     int last_pid;
  94 
  95     GHashTable *params;
  96 } lrmd_cmd_t;
  97 
  98 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
  99 static gboolean lrmd_rsc_dispatch(gpointer user_data);
 100 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 101 
 102 #ifdef PCMK__TIME_USE_CGT
 103 
 104 /*!
 105  * \internal
 106  * \brief Check whether a struct timespec has been set
 107  *
 108  * \param[in] timespec  Time to check
 109  *
 110  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 111  */
 112 static inline bool
 113 time_is_set(struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115     return (timespec != NULL) &&
 116            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 117 }
 118 
 119 /*
 120  * \internal
 121  * \brief Set a timespec (and its original if unset) to the current time
 122  *
 123  * \param[out] t_current  Where to store current time
 124  * \param[out] t_orig     Where to copy t_current if unset
 125  */
 126 static void
 127 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 128 {
 129     clock_gettime(CLOCK_MONOTONIC, t_current);
 130     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 131         *t_orig = *t_current;
 132     }
 133 }
 134 
 135 /*!
 136  * \internal
 137  * \brief Return difference between two times in milliseconds
 138  *
 139  * \param[in] now  More recent time (or NULL to use current time)
 140  * \param[in] old  Earlier time
 141  *
 142  * \return milliseconds difference (or 0 if old is NULL or unset)
 143  *
 144  * \note Can overflow on 32bit machines when the differences is around
 145  *       24 days or more.
 146  */
 147 static int
 148 time_diff_ms(struct timespec *now, struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 149 {
 150     int diff_ms = 0;
 151 
 152     if (time_is_set(old)) {
 153         struct timespec local_now = { 0, };
 154 
 155         if (now == NULL) {
 156             clock_gettime(CLOCK_MONOTONIC, &local_now);
 157             now = &local_now;
 158         }
 159         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 160                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 161     }
 162     return diff_ms;
 163 }
 164 
 165 /*!
 166  * \internal
 167  * \brief Reset a command's operation times to their original values.
 168  *
 169  * Reset a command's run and queued timestamps to the timestamps of the original
 170  * command, so we report the entire time since then and not just the time since
 171  * the most recent command (for recurring and systemd operations).
 172  *
 173  * \param[in] cmd  Executor command object to reset
 174  *
 175  * \note It's not obvious what the queued time should be for a systemd
 176  *       start/stop operation, which might go like this:
 177  *         initial command queued 5ms, runs 3s
 178  *         monitor command queued 10ms, runs 10s
 179  *         monitor command queued 10ms, runs 10s
 180  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 181  *       implementation will report 5ms. If it's 25ms, then we need to
 182  *       subtract 20ms from the total exec time so as not to count it twice.
 183  *       We can implement that later if it matters to anyone ...
 184  */
 185 static void
 186 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 187 {
 188     cmd->t_run = cmd->t_first_run;
 189     cmd->t_queue = cmd->t_first_queue;
 190 }
 191 #endif
 192 
 193 static inline bool
 194 action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 195 {
 196     return (cmd->interval_ms == interval_ms)
 197            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 198 }
 199 
 200 /*!
 201  * \internal
 202  * \brief Log the result of an asynchronous command
 203  *
 204  * \param[in] cmd            Command to log result for
 205  * \param[in] exec_time_ms   Execution time in milliseconds, if known
 206  * \param[in] queue_time_ms  Queue time in milliseconds, if known
 207  */
 208 static void
 209 log_finished(lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 210 {
 211     int log_level = LOG_INFO;
 212     GString *str = g_string_sized_new(100); // reasonable starting size
 213 
 214     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 215         log_level = LOG_DEBUG;
 216     }
 217 
 218     g_string_printf(str, "%s %s (call %d",
 219                     cmd->rsc_id, cmd->action, cmd->call_id);
 220     if (cmd->last_pid != 0) {
 221         g_string_append_printf(str, ", PID %d", cmd->last_pid);
 222     }
 223     if (cmd->result.execution_status == PCMK_EXEC_DONE) {
 224         g_string_append_printf(str, ") exited with status %d",
 225                                cmd->result.exit_status);
 226     } else {
 227         g_string_append_printf(str, ") could not be executed: %s",
 228                                pcmk_exec_status_str(cmd->result.execution_status));
 229     }
 230     if (cmd->result.exit_reason != NULL) {
 231         g_string_append_printf(str, " (%s)", cmd->result.exit_reason);
 232     }
 233 
 234 #ifdef PCMK__TIME_USE_CGT
 235     g_string_append_printf(str, " (execution time %s",
 236                            pcmk__readable_interval(exec_time_ms));
 237     if (queue_time_ms > 0) {
 238         g_string_append_printf(str, " after being queued %s",
 239                                pcmk__readable_interval(queue_time_ms));
 240     }
 241     g_string_append(str, ")");
 242 #endif
 243 
 244     do_crm_log(log_level, "%s", str->str);
 245     g_string_free(str, TRUE);
 246 }
 247 
 248 static void
 249 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251     int log_level = LOG_INFO;
 252 
 253     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 254         log_level = LOG_DEBUG;
 255     }
 256 
 257     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 258                cmd->rsc_id, cmd->action, cmd->call_id);
 259 }
 260 
 261 static const char *
 262 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 263 {
 264     if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
 265         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 266         return "status";
 267     }
 268     return action;
 269 }
 270 
 271 static lrmd_rsc_t *
 272 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 275     lrmd_rsc_t *rsc = NULL;
 276 
 277     rsc = calloc(1, sizeof(lrmd_rsc_t));
 278 
 279     crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
 280 
 281     rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 282     rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
 283     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
 284     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
 285     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
 286     rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
 287     return rsc;
 288 }
 289 
 290 static lrmd_cmd_t *
 291 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 292 {
 293     int call_options = 0;
 294     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 295     lrmd_cmd_t *cmd = NULL;
 296 
 297     cmd = calloc(1, sizeof(lrmd_cmd_t));
 298 
 299     crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
 300     cmd->call_opts = call_options;
 301     cmd->client_id = strdup(client->id);
 302 
 303     crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
 304     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
 305     crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
 306     crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
 307     cmd->timeout_orig = cmd->timeout;
 308 
 309     cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
 310     cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
 311     cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
 312     cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 313 
 314     cmd->params = xml2list(rsc_xml);
 315 
 316     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
 317         crm_debug("Setting flag to leave pid group on timeout and "
 318                   "only kill action pid for " PCMK__OP_FMT,
 319                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 320         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 321                                                 LOG_TRACE, "Action",
 322                                                 cmd->action, 0,
 323                                                 SVC_ACTION_LEAVE_GROUP,
 324                                                 "SVC_ACTION_LEAVE_GROUP");
 325     }
 326     return cmd;
 327 }
 328 
 329 static void
 330 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 331 {
 332     if (cmd) {
 333         if (cmd->stonith_recurring_id) {
 334             g_source_remove(cmd->stonith_recurring_id);
 335         }
 336         cmd->stonith_recurring_id = 0;
 337     }
 338 }
 339 
 340 static void
 341 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 342 {
 343     stop_recurring_timer(cmd);
 344     if (cmd->delay_id) {
 345         g_source_remove(cmd->delay_id);
 346     }
 347     if (cmd->params) {
 348         g_hash_table_destroy(cmd->params);
 349     }
 350     pcmk__reset_result(&(cmd->result));
 351     free(cmd->origin);
 352     free(cmd->action);
 353     free(cmd->real_action);
 354     free(cmd->userdata_str);
 355     free(cmd->rsc_id);
 356     free(cmd->client_id);
 357     free(cmd);
 358 }
 359 
 360 static gboolean
 361 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 362 {
 363     lrmd_cmd_t *cmd = data;
 364     lrmd_rsc_t *rsc;
 365 
 366     cmd->stonith_recurring_id = 0;
 367 
 368     if (!cmd->rsc_id) {
 369         return FALSE;
 370     }
 371 
 372     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 373 
 374     CRM_ASSERT(rsc != NULL);
 375     /* take it out of recurring_ops list, and put it in the pending ops
 376      * to be executed */
 377     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 378     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 379 #ifdef PCMK__TIME_USE_CGT
 380     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 381 #endif
 382     mainloop_set_trigger(rsc->work);
 383 
 384     return FALSE;
 385 }
 386 
 387 static inline void
 388 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 389 {
 390     if (cmd && (cmd->interval_ms > 0)) {
 391         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 392                                                   stonith_recurring_op_helper,
 393                                                   cmd);
 394     }
 395 }
 396 
 397 static gboolean
 398 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 399 {
 400     lrmd_cmd_t *cmd = data;
 401     lrmd_rsc_t *rsc = NULL;
 402 
 403     cmd->delay_id = 0;
 404     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 405 
 406     if (rsc) {
 407         mainloop_set_trigger(rsc->work);
 408     }
 409 
 410     return FALSE;
 411 }
 412 
 413 /*!
 414  * \internal
 415  * \brief Check whether a list already contains the equivalent of a given action
 416  */
 417 static lrmd_cmd_t *
 418 find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 419 {
 420     for (GList *item = action_list; item != NULL; item = item->next) {
 421         lrmd_cmd_t *dup = item->data;
 422 
 423         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 424             return dup;
 425         }
 426     }
 427     return NULL;
 428 }
 429 
 430 static bool
 431 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 432 {
 433     lrmd_cmd_t * dup = NULL;
 434     bool dup_pending = true;
 435 
 436     if (cmd->interval_ms == 0) {
 437         return false;
 438     }
 439 
 440     // Search for a duplicate of this action (in-flight or not)
 441     dup = find_duplicate_action(rsc->pending_ops, cmd);
 442     if (dup == NULL) {
 443         dup_pending = false;
 444         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 445         if (dup == NULL) {
 446             return false;
 447         }
 448     }
 449 
 450     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 451      * the cancellation separately.
 452      */
 453     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 454                      pcmk__str_casei)
 455         && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) {
 456         return false;
 457     }
 458 
 459     /* This should not occur. If it does, we need to investigate how something
 460      * like this is possible in the controller.
 461      */
 462     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 463              "), merging with previous op entry",
 464              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 465              dup->interval_ms);
 466 
 467     // Merge new action's call ID and user data into existing action
 468     dup->first_notify_sent = false;
 469     free(dup->userdata_str);
 470     dup->userdata_str = cmd->userdata_str;
 471     cmd->userdata_str = NULL;
 472     dup->call_id = cmd->call_id;
 473     free_lrmd_cmd(cmd);
 474     cmd = NULL;
 475 
 476     /* If dup is not pending, that means it has already executed at least once
 477      * and is waiting in the interval. In that case, stop waiting and initiate
 478      * a new instance now.
 479      */
 480     if (!dup_pending) {
 481         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 482                          pcmk__str_casei)) {
 483             stop_recurring_timer(dup);
 484             stonith_recurring_op_helper(dup);
 485         } else {
 486             services_action_kick(rsc->rsc_id,
 487                                  normalize_action_name(rsc, dup->action),
 488                                  dup->interval_ms);
 489         }
 490     }
 491     return true;
 492 }
 493 
 494 static void
 495 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 496 {
 497     CRM_CHECK(cmd != NULL, return);
 498     CRM_CHECK(rsc != NULL, return);
 499 
 500     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 501 
 502     if (merge_recurring_duplicate(rsc, cmd)) {
 503         // Equivalent of cmd has already been scheduled
 504         return;
 505     }
 506 
 507     /* The controller expects the executor to automatically cancel
 508      * recurring operations before a resource stops.
 509      */
 510     if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 511         cancel_all_recurring(rsc, NULL);
 512     }
 513 
 514     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 515 #ifdef PCMK__TIME_USE_CGT
 516     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 517 #endif
 518     mainloop_set_trigger(rsc->work);
 519 
 520     if (cmd->start_delay) {
 521         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 522     }
 523 }
 524 
 525 static xmlNode *
 526 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 527 {
 528     xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
 529 
 530     crm_xml_add(reply, F_LRMD_ORIGIN, origin);
 531     crm_xml_add_int(reply, F_LRMD_RC, rc);
 532     crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
 533     return reply;
 534 }
 535 
 536 static void
 537 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 538 {
 539     xmlNode *update_msg = user_data;
 540     pcmk__client_t *client = value;
 541     int rc;
 542     int log_level = LOG_WARNING;
 543     const char *msg = NULL;
 544 
 545     CRM_CHECK(client != NULL, return);
 546     if (client->name == NULL) {
 547         crm_trace("Skipping notification to client without name");
 548         return;
 549     }
 550     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 551         /* We only want to notify clients of the executor IPC API. If we are
 552          * running as Pacemaker Remote, we may have clients proxied to other
 553          * IPC services in the cluster, so skip those.
 554          */
 555         crm_trace("Skipping executor API notification to client %s",
 556                   pcmk__client_name(client));
 557         return;
 558     }
 559 
 560     rc = lrmd_server_send_notify(client, update_msg);
 561     if (rc == pcmk_rc_ok) {
 562         return;
 563     }
 564 
 565     switch (rc) {
 566         case ENOTCONN:
 567         case EPIPE: // Client exited without waiting for notification
 568             log_level = LOG_INFO;
 569             msg = "Disconnected";
 570             break;
 571 
 572         default:
 573             msg = pcmk_rc_str(rc);
 574             break;
 575     }
 576     do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
 577                pcmk__client_name(client), msg, rc);
 578 }
 579 
 580 static void
 581 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 582 {
 583     xmlNode *notify = NULL;
 584     int exec_time = 0;
 585     int queue_time = 0;
 586 
 587 #ifdef PCMK__TIME_USE_CGT
 588     exec_time = time_diff_ms(NULL, &(cmd->t_run));
 589     queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 590 #endif
 591     log_finished(cmd, exec_time, queue_time);
 592 
 593     /* if the first notify result for a cmd has already been sent earlier, and the
 594      * the option to only send notifies on result changes is set. Check to see
 595      * if the last result is the same as the new one. If so, suppress this update */
 596     if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
 597         if ((cmd->last_notify_rc == cmd->result.exit_status) &&
 598             (cmd->last_notify_op_status == cmd->result.execution_status)) {
 599 
 600             /* only send changes */
 601             return;
 602         }
 603 
 604     }
 605 
 606     cmd->first_notify_sent = true;
 607     cmd->last_notify_rc = cmd->result.exit_status;
 608     cmd->last_notify_op_status = cmd->result.execution_status;
 609 
 610     notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 611 
 612     crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 613     crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
 614     crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
 615     crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
 616     crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status);
 617     crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status);
 618     crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
 619     crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
 620 
 621     crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
 622                    (long long) cmd->epoch_last_run);
 623     crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
 624                    (long long) cmd->epoch_rcchange);
 625 #ifdef PCMK__TIME_USE_CGT
 626     crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
 627     crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
 628 #endif
 629 
 630     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
 631     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
 632     if(cmd->real_action) {
 633         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
 634     } else {
 635         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
 636     }
 637     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 638     crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason);
 639 
 640     if (cmd->result.action_stderr != NULL) {
 641         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr);
 642 
 643     } else if (cmd->result.action_stdout != NULL) {
 644         crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout);
 645     }
 646 
 647     if (cmd->params) {
 648         char *key = NULL;
 649         char *value = NULL;
 650         GHashTableIter iter;
 651 
 652         xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
 653 
 654         g_hash_table_iter_init(&iter, cmd->params);
 655         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 656             hash2smartfield((gpointer) key, (gpointer) value, args);
 657         }
 658     }
 659     if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
 660         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 661 
 662         if (client) {
 663             send_client_notify(client->id, client, notify);
 664         }
 665     } else {
 666         pcmk__foreach_ipc_client(send_client_notify, notify);
 667     }
 668 
 669     free_xml(notify);
 670 }
 671 
 672 static void
 673 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 674 {
 675     if (pcmk__ipc_client_count() != 0) {
 676         int call_id = 0;
 677         xmlNode *notify = NULL;
 678         xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
 679         const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
 680         const char *op = crm_element_value(request, F_LRMD_OPERATION);
 681 
 682         crm_element_value_int(request, F_LRMD_CALLID, &call_id);
 683 
 684         notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 685         crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 686         crm_xml_add_int(notify, F_LRMD_RC, rc);
 687         crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
 688         crm_xml_add(notify, F_LRMD_OPERATION, op);
 689         crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
 690 
 691         pcmk__foreach_ipc_client(send_client_notify, notify);
 692 
 693         free_xml(notify);
 694     }
 695 }
 696 
 697 static void
 698 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 699 {
 700     cmd->last_pid = 0;
 701 #ifdef PCMK__TIME_USE_CGT
 702     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 703     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 704 #endif
 705     cmd->epoch_last_run = 0;
 706 
 707     pcmk__reset_result(&(cmd->result));
 708     cmd->result.execution_status = PCMK_EXEC_DONE;
 709 }
 710 
 711 static void
 712 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 713 {
 714     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 715               rsc ? rsc->active : NULL, cmd);
 716 
 717     if (rsc && (rsc->active == cmd)) {
 718         rsc->active = NULL;
 719         mainloop_set_trigger(rsc->work);
 720     }
 721 
 722     if (!rsc) {
 723         cmd->rsc_deleted = 1;
 724     }
 725 
 726     /* reset original timeout so client notification has correct information */
 727     cmd->timeout = cmd->timeout_orig;
 728 
 729     send_cmd_complete_notify(cmd);
 730 
 731     if ((cmd->interval_ms != 0)
 732         && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) {
 733 
 734         if (rsc) {
 735             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 736             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 737         }
 738         free_lrmd_cmd(cmd);
 739     } else if (cmd->interval_ms == 0) {
 740         if (rsc) {
 741             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 742         }
 743         free_lrmd_cmd(cmd);
 744     } else {
 745         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 746         cmd_reset(cmd);
 747     }
 748 }
 749 
 750 static int
 751 stonith2uniform_rc(const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753     switch (rc) {
 754         case pcmk_ok:
 755             rc = PCMK_OCF_OK;
 756             break;
 757 
 758         case -ENODEV:
 759             /* This should be possible only for probes in practice, but
 760              * interpret for all actions to be safe.
 761              */
 762             if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
 763                 rc = PCMK_OCF_NOT_RUNNING;
 764             } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) {
 765                 rc = PCMK_OCF_OK;
 766             } else {
 767                 rc = PCMK_OCF_NOT_INSTALLED;
 768             }
 769             break;
 770 
 771         case -EOPNOTSUPP:
 772             rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
 773             break;
 774 
 775         default:
 776             rc = PCMK_OCF_UNKNOWN_ERROR;
 777             break;
 778     }
 779     return rc;
 780 }
 781 
 782 static int
 783 action_get_uniform_rc(svc_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 784 {
 785     lrmd_cmd_t *cmd = action->cb_data;
 786 
 787     if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_STONITH,
 788                             pcmk__str_casei)) {
 789         return stonith2uniform_rc(cmd->action, action->rc);
 790     } else {
 791         enum ocf_exitcode code = services_result2ocf(action->standard,
 792                                                      cmd->action, action->rc);
 793 
 794         // Cast variable instead of function return to keep compilers happy
 795         return (int) code;
 796     }
 797 }
 798 
 799 struct notify_new_client_data {
 800     xmlNode *notify;
 801     pcmk__client_t *new_client;
 802 };
 803 
 804 static void
 805 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 806 {
 807     pcmk__client_t *client = value;
 808     struct notify_new_client_data *data = user_data;
 809 
 810     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 811         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 812     }
 813 }
 814 
 815 void
 816 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 817 {
 818     struct notify_new_client_data data;
 819 
 820     data.new_client = new_client;
 821     data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 822     crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
 823     crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
 824     pcmk__foreach_ipc_client(notify_one_client, &data);
 825     free_xml(data.notify);
 826 }
 827 
 828 void
 829 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 830 {
 831     GHashTableIter iter;
 832     lrmd_rsc_t *rsc = NULL;
 833     char *key = NULL;
 834 
 835     g_hash_table_iter_init(&iter, rsc_list);
 836     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 837         if (rsc->call_opts & lrmd_opt_drop_recurring) {
 838             /* This client is disconnecting, drop any recurring operations
 839              * it may have initiated on the resource */
 840             cancel_all_recurring(rsc, client_id);
 841         }
 842     }
 843 }
 844 
 845 static void
 846 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 847 {
 848     lrmd_rsc_t *rsc;
 849     lrmd_cmd_t *cmd = action->cb_data;
 850 
 851 #ifdef PCMK__TIME_USE_CGT
 852     const char *rclass = NULL;
 853     bool goagain = false;
 854 #endif
 855 
 856     if (!cmd) {
 857         crm_err("Completed executor action (%s) does not match any known operations",
 858                 action->id);
 859         return;
 860     }
 861 
 862 #ifdef PCMK__TIME_USE_CGT
 863     if (cmd->result.exit_status != action->rc) {
 864         cmd->epoch_rcchange = time(NULL);
 865     }
 866 #endif
 867 
 868     cmd->last_pid = action->pid;
 869     pcmk__set_result(&(cmd->result), action_get_uniform_rc(action),
 870                      action->status, services__exit_reason(action));
 871     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 872 
 873 #ifdef PCMK__TIME_USE_CGT
 874     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
 875         rclass = resources_find_service_class(rsc->type);
 876     } else if(rsc) {
 877         rclass = rsc->class;
 878     }
 879 
 880     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 881         if ((cmd->result.exit_status == PCMK_OCF_OK)
 882             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
 883             /* systemd returns from start and stop actions after the action
 884              * begins, not after it completes. We have to jump through a few
 885              * hoops so that we don't report 'complete' to the rest of pacemaker
 886              * until it's actually done.
 887              */
 888             goagain = true;
 889             cmd->real_action = cmd->action;
 890             cmd->action = strdup("monitor");
 891 
 892         } else if (cmd->real_action != NULL) {
 893             // This is follow-up monitor to check whether start/stop completed
 894             if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 895                 goagain = true;
 896 
 897             } else if ((cmd->result.exit_status == PCMK_OCF_OK)
 898                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 899                 goagain = true;
 900 
 901             } else {
 902                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 903                 int timeout_left = cmd->timeout_orig - time_sum;
 904 
 905                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 906                           "remaining=%dms): %s (%d)",
 907                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 908                           services_ocf_exitcode_str(cmd->result.exit_status),
 909                           cmd->result.exit_status);
 910                 cmd_original_times(cmd);
 911 
 912                 // Monitors may return "not running", but start/stop shouldn't
 913                 if ((cmd->result.execution_status == PCMK_EXEC_DONE)
 914                     && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) {
 915 
 916                     if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
 917                         cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR;
 918                     } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 919                         cmd->result.exit_status = PCMK_OCF_OK;
 920                     }
 921                 }
 922             }
 923         }
 924     }
 925 #endif
 926 
 927 #if SUPPORT_NAGIOS
 928     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 929         if (action_matches(cmd, "monitor", 0)
 930             && (cmd->result.exit_status == PCMK_OCF_OK)) {
 931             /* Successfully executed --version for the nagios plugin */
 932             cmd->result.exit_status = PCMK_OCF_NOT_RUNNING;
 933 
 934         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)
 935                    && (cmd->result.exit_status != PCMK_OCF_OK)) {
 936 #ifdef PCMK__TIME_USE_CGT
 937             goagain = true;
 938 #endif
 939         }
 940     }
 941 #endif
 942 
 943 #ifdef PCMK__TIME_USE_CGT
 944     if (goagain) {
 945         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 946         int timeout_left = cmd->timeout_orig - time_sum;
 947         int delay = cmd->timeout_orig / 10;
 948 
 949         if(delay >= timeout_left && timeout_left > 20) {
 950             delay = timeout_left/2;
 951         }
 952 
 953         delay = QB_MIN(2000, delay);
 954         if (delay < timeout_left) {
 955             cmd->start_delay = delay;
 956             cmd->timeout = timeout_left;
 957 
 958             if (cmd->result.exit_status == PCMK_OCF_OK) {
 959                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 960                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
 961 
 962             } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) {
 963                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 964                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
 965 
 966             } else {
 967                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
 968                            cmd->rsc_id, cmd->action,
 969                            services_ocf_exitcode_str(cmd->result.exit_status),
 970                            cmd->result.exit_status, time_sum, timeout_left,
 971                            delay);
 972             }
 973 
 974             cmd_reset(cmd);
 975             if(rsc) {
 976                 rsc->active = NULL;
 977             }
 978             schedule_lrmd_cmd(rsc, cmd);
 979 
 980             /* Don't finalize cmd, we're not done with it yet */
 981             return;
 982 
 983         } else {
 984             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
 985                        cmd->rsc_id,
 986                        (cmd->real_action? cmd->real_action : cmd->action),
 987                        cmd->result.exit_status, time_sum, timeout_left);
 988             pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 989                              PCMK_EXEC_TIMEOUT,
 990                              "Investigate reason for timeout, and adjust "
 991                              "configured operation timeout if necessary");
 992             cmd_original_times(cmd);
 993         }
 994     }
 995 #endif
 996 
 997     pcmk__set_result_output(&(cmd->result), services__grab_stdout(action),
 998                             services__grab_stderr(action));
 999     cmd_finalize(cmd, rsc);
1000 }
1001 
1002 /*!
1003  * \internal
1004  * \brief Determine operation status of a stonith operation
1005  *
1006  * Non-stonith resource operations get their operation status directly from the
1007  * service library, but the fencer does not have an equivalent, so we must infer
1008  * an operation status from the fencer API's return code.
1009  *
1010  * \param[in] action       Name of action performed on stonith resource
1011  * \param[in] interval_ms  Action interval
1012  * \param[in] rc           Action result from fencer
1013  *
1014  * \return Operation status corresponding to fencer API return code
1015  */
1016 static int
1017 stonith_rc2status(const char *action, guint interval_ms, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1018 {
1019     int status = PCMK_EXEC_DONE;
1020 
1021     switch (rc) {
1022         case pcmk_ok:
1023             break;
1024 
1025         case -EOPNOTSUPP:
1026         case -EPROTONOSUPPORT:
1027             status = PCMK_EXEC_NOT_SUPPORTED;
1028             break;
1029 
1030         case -ETIME:
1031         case -ETIMEDOUT:
1032             status = PCMK_EXEC_TIMEOUT;
1033             break;
1034 
1035         case -ENOTCONN:
1036         case -ECOMM:
1037             // Couldn't talk to fencer
1038             status = PCMK_EXEC_ERROR;
1039             break;
1040 
1041         case -ENODEV:
1042             // The device is not registered with the fencer
1043             status = PCMK_EXEC_ERROR;
1044             break;
1045 
1046         default:
1047             break;
1048     }
1049     return status;
1050 }
1051 
1052 static void
1053 stonith_action_complete(lrmd_cmd_t * cmd, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1054 {
1055     // This can be NULL if resource was removed before command completed
1056     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1057 
1058     cmd->result.exit_status = stonith2uniform_rc(cmd->action, rc);
1059 
1060     /* This function may be called with status already set to cancelled, if a
1061      * pending action was aborted. Otherwise, we need to determine status from
1062      * the fencer return code.
1063      */
1064     if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) {
1065         cmd->result.execution_status = stonith_rc2status(cmd->action,
1066                                                          cmd->interval_ms, rc);
1067 
1068         // Certain successful actions change the known state of the resource
1069         if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) {
1070             if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1071                 rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
1072             } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1073                 rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
1074             }
1075         }
1076     }
1077 
1078     // Give the user more detail than an OCF code
1079     if (rc != -pcmk_err_generic) {
1080         cmd->result.exit_reason = strdup(pcmk_strerror(rc));
1081     }
1082 
1083     /* The recurring timer should not be running at this point in any case, but
1084      * as a failsafe, stop it if it is.
1085      */
1086     stop_recurring_timer(cmd);
1087 
1088     /* Reschedule this command if appropriate. If a recurring command is *not*
1089      * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will
1090      * not be removed from recurring_ops by cmd_finalize().
1091      */
1092     if (rsc && (cmd->interval_ms > 0)
1093         && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) {
1094         start_recurring_timer(cmd);
1095     }
1096 
1097     cmd_finalize(cmd, rsc);
1098 }
1099 
1100 static void
1101 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1102 {
1103     stonith_action_complete(data->userdata, data->rc);
1104 }
1105 
1106 void
1107 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1108 {
1109     GHashTableIter iter;
1110     GList *cmd_list = NULL;
1111     GList *cmd_iter = NULL;
1112     lrmd_rsc_t *rsc = NULL;
1113     char *key = NULL;
1114 
1115     g_hash_table_iter_init(&iter, rsc_list);
1116     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
1117         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1118             /* If we registered this fence device, we don't know whether the
1119              * fencer still has the registration or not. Cause future probes to
1120              * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
1121              * started successfully. This is especially important if the
1122              * controller also went away (possibly due to a cluster layer
1123              * restart) and won't receive our client notification of any
1124              * monitors finalized below.
1125              */
1126             if (rsc->st_probe_rc == pcmk_ok) {
1127                 rsc->st_probe_rc = pcmk_err_generic;
1128             }
1129 
1130             if (rsc->active) {
1131                 cmd_list = g_list_append(cmd_list, rsc->active);
1132             }
1133             if (rsc->recurring_ops) {
1134                 cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
1135             }
1136             if (rsc->pending_ops) {
1137                 cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
1138             }
1139             rsc->pending_ops = rsc->recurring_ops = NULL;
1140         }
1141     }
1142 
1143     if (!cmd_list) {
1144         return;
1145     }
1146 
1147     crm_err("Connection to fencer failed, finalizing %d pending operations",
1148             g_list_length(cmd_list));
1149     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1150         stonith_action_complete(cmd_iter->data, -ENOTCONN);
1151     }
1152     g_list_free(cmd_list);
1153 }
1154 
1155 /*!
1156  * \internal
1157  * \brief Execute a stonith resource "start" action
1158  *
1159  * Start a stonith resource by registering it with the fencer.
1160  * (Stonith agents don't have a start command.)
1161  *
1162  * \param[in] stonith_api  Connection to fencer
1163  * \param[in] rsc          Stonith resource to start
1164  * \param[in] cmd          Start command to execute
1165  *
1166  * \return pcmk_ok on success, -errno otherwise
1167  */
1168 static int
1169 execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1170 {
1171     char *key = NULL;
1172     char *value = NULL;
1173     stonith_key_value_t *device_params = NULL;
1174     int rc = pcmk_ok;
1175 
1176     // Convert command parameters to stonith API key/values
1177     if (cmd->params) {
1178         GHashTableIter iter;
1179 
1180         g_hash_table_iter_init(&iter, cmd->params);
1181         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1182             device_params = stonith_key_value_add(device_params, key, value);
1183         }
1184     }
1185 
1186     /* The fencer will automatically register devices via CIB notifications
1187      * when the CIB changes, but to avoid a possible race condition between
1188      * the fencer receiving the notification and the executor requesting that
1189      * resource, the executor registers the device as well. The fencer knows how
1190      * to handle duplicate registrations.
1191      */
1192     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1193                                             cmd->rsc_id, rsc->provider,
1194                                             rsc->type, device_params);
1195 
1196     stonith_key_value_freeall(device_params, 1, 1);
1197     return rc;
1198 }
1199 
1200 /*!
1201  * \internal
1202  * \brief Execute a stonith resource "stop" action
1203  *
1204  * Stop a stonith resource by unregistering it with the fencer.
1205  * (Stonith agents don't have a stop command.)
1206  *
1207  * \param[in] stonith_api  Connection to fencer
1208  * \param[in] rsc          Stonith resource to stop
1209  *
1210  * \return pcmk_ok on success, -errno otherwise
1211  */
1212 static inline int
1213 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1214 {
1215     /* @TODO Failure would indicate a problem communicating with fencer;
1216      * perhaps we should try reconnecting and retrying a few times?
1217      */
1218     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1219                                             rsc->rsc_id);
1220 }
1221 
1222 /*!
1223  * \internal
1224  * \brief Initiate a stonith resource agent recurring "monitor" action
1225  *
1226  * \param[in] stonith_api  Connection to fencer
1227  * \param[in] rsc          Stonith resource to monitor
1228  * \param[in] cmd          Monitor command being executed
1229  *
1230  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1231  */
1232 static inline int
1233 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1234 {
1235     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1236                                         cmd->timeout / 1000);
1237 
1238     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1239                                               "lrmd_stonith_callback",
1240                                               lrmd_stonith_callback);
1241     if (rc == TRUE) {
1242         rsc->active = cmd;
1243         rc = pcmk_ok;
1244     } else {
1245         rc = -pcmk_err_generic;
1246     }
1247     return rc;
1248 }
1249 
1250 static void
1251 lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1252 {
1253     int rc = 0;
1254     bool do_monitor = FALSE;
1255 
1256     stonith_t *stonith_api = get_stonith_connection();
1257 
1258     if (!stonith_api) {
1259         rc = -ENOTCONN;
1260 
1261     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1262         rc = execd_stonith_start(stonith_api, rsc, cmd);
1263         if (rc == 0) {
1264             do_monitor = TRUE;
1265         }
1266 
1267     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1268         rc = execd_stonith_stop(stonith_api, rsc);
1269 
1270     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1271         if (cmd->interval_ms > 0) {
1272             do_monitor = TRUE;
1273         } else {
1274             rc = rsc->st_probe_rc;
1275         }
1276     }
1277 
1278     if (do_monitor) {
1279         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1280         if (rc == pcmk_ok) {
1281             // Don't clean up yet, we will find out result of the monitor later
1282             return;
1283         }
1284     }
1285 
1286     stonith_action_complete(cmd, rc);
1287 }
1288 
1289 static int
1290 lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1291 {
1292     svc_action_t *action = NULL;
1293     GHashTable *params_copy = NULL;
1294 
1295     CRM_ASSERT(rsc);
1296     CRM_ASSERT(cmd);
1297 
1298     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1299               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1300 
1301 #if SUPPORT_NAGIOS
1302     /* Recurring operations are cancelled anyway for a stop operation */
1303     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1304         && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1305 
1306         cmd->result.exit_status = PCMK_OCF_OK;
1307         goto exec_done;
1308     }
1309 #endif
1310 
1311     params_copy = pcmk__str_table_dup(cmd->params);
1312 
1313     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1314                                      rsc->type,
1315                                      normalize_action_name(rsc, cmd->action),
1316                                      cmd->interval_ms, cmd->timeout,
1317                                      params_copy, cmd->service_flags);
1318 
1319     if (action == NULL) {
1320         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1321                          PCMK_EXEC_ERROR, strerror(ENOMEM));
1322         goto exec_done;
1323     }
1324 
1325     if (action->rc != PCMK_OCF_UNKNOWN) {
1326         pcmk__set_result(&(cmd->result), action->rc, action->status,
1327                          services__exit_reason(action));
1328         services_action_free(action);
1329         goto exec_done;
1330     }
1331 
1332     action->cb_data = cmd;
1333 
1334     if (services_action_async(action, action_complete)) {
1335         /* When services_action_async() returns TRUE, the callback might have
1336          * been called -- in this case action_complete(), which might free cmd,
1337          * so cmd cannot be used here.
1338          */
1339         return TRUE;
1340     }
1341 
1342     pcmk__set_result(&(cmd->result), action->rc, action->status,
1343                      services__exit_reason(action));
1344     services_action_free(action);
1345     action = NULL;
1346 
1347   exec_done:
1348     cmd_finalize(cmd, rsc);
1349     return TRUE;
1350 }
1351 
1352 static gboolean
1353 lrmd_rsc_execute(lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1354 {
1355     lrmd_cmd_t *cmd = NULL;
1356 
1357     CRM_CHECK(rsc != NULL, return FALSE);
1358 
1359     if (rsc->active) {
1360         crm_trace("%s is still active", rsc->rsc_id);
1361         return TRUE;
1362     }
1363 
1364     if (rsc->pending_ops) {
1365         GList *first = rsc->pending_ops;
1366 
1367         cmd = first->data;
1368         if (cmd->delay_id) {
1369             crm_trace
1370                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1371                  cmd->rsc_id, cmd->action, cmd->start_delay);
1372             return TRUE;
1373         }
1374         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1375         g_list_free_1(first);
1376 
1377 #ifdef PCMK__TIME_USE_CGT
1378         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1379 #endif
1380         cmd->epoch_last_run = time(NULL);
1381     }
1382 
1383     if (!cmd) {
1384         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1385         return TRUE;
1386     }
1387 
1388     rsc->active = cmd;          /* only one op at a time for a rsc */
1389     if (cmd->interval_ms) {
1390         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1391     }
1392 
1393     log_execute(cmd);
1394 
1395     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1396         lrmd_rsc_execute_stonith(rsc, cmd);
1397     } else {
1398         lrmd_rsc_execute_service_lib(rsc, cmd);
1399     }
1400 
1401     return TRUE;
1402 }
1403 
1404 static gboolean
1405 lrmd_rsc_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1406 {
1407     return lrmd_rsc_execute(user_data);
1408 }
1409 
1410 void
1411 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1412 {
1413     GList *gIter = NULL;
1414     lrmd_rsc_t *rsc = data;
1415     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1416                                   pcmk__str_casei);
1417 
1418     gIter = rsc->pending_ops;
1419     while (gIter != NULL) {
1420         GList *next = gIter->next;
1421         lrmd_cmd_t *cmd = gIter->data;
1422 
1423         /* command was never executed */
1424         cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1425         cmd_finalize(cmd, NULL);
1426 
1427         gIter = next;
1428     }
1429     /* frees list, but not list elements. */
1430     g_list_free(rsc->pending_ops);
1431 
1432     gIter = rsc->recurring_ops;
1433     while (gIter != NULL) {
1434         GList *next = gIter->next;
1435         lrmd_cmd_t *cmd = gIter->data;
1436 
1437         if (is_stonith) {
1438             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1439             /* If a stonith command is in-flight, just mark it as cancelled;
1440              * it is not safe to finalize/free the cmd until the stonith api
1441              * says it has either completed or timed out.
1442              */
1443             if (rsc->active != cmd) {
1444                 cmd_finalize(cmd, NULL);
1445             }
1446         } else {
1447             /* This command is already handed off to service library,
1448              * let service library cancel it and tell us via the callback
1449              * when it is cancelled. The rsc can be safely destroyed
1450              * even if we are waiting for the cancel result */
1451             services_action_cancel(rsc->rsc_id,
1452                                    normalize_action_name(rsc, cmd->action),
1453                                    cmd->interval_ms);
1454         }
1455 
1456         gIter = next;
1457     }
1458     /* frees list, but not list elements. */
1459     g_list_free(rsc->recurring_ops);
1460 
1461     free(rsc->rsc_id);
1462     free(rsc->class);
1463     free(rsc->provider);
1464     free(rsc->type);
1465     mainloop_destroy_trigger(rsc->work);
1466 
1467     free(rsc);
1468 }
1469 
1470 static int
1471 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1472                     xmlNode **reply)
1473 {
1474     int rc = pcmk_ok;
1475     const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
1476     const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
1477 
1478     if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
1479         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1480                 LRMD_MIN_PROTOCOL_VERSION, protocol_version);
1481         rc = -EPROTO;
1482     }
1483 
1484     if (crm_is_true(is_ipc_provider)) {
1485 #ifdef PCMK__COMPILE_REMOTE
1486         if ((client->remote != NULL) && client->remote->tls_handshake_complete) {
1487             // This is a remote connection from a cluster node's controller
1488             ipc_proxy_add_provider(client);
1489         } else {
1490             rc = -EACCES;
1491         }
1492 #else
1493         rc = -EPROTONOSUPPORT;
1494 #endif
1495     }
1496 
1497     *reply = create_lrmd_reply(__func__, rc, call_id);
1498     crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
1499     crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
1500     crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1501 
1502     return rc;
1503 }
1504 
1505 static int
1506 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1507 {
1508     int rc = pcmk_ok;
1509     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1510     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1511 
1512     if (dup &&
1513         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1514         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1515 
1516         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1517         free_rsc(rsc);
1518         return rc;
1519     }
1520 
1521     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1522     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1523     return rc;
1524 }
1525 
1526 static xmlNode *
1527 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1528 {
1529     int rc = pcmk_ok;
1530     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1531     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1532     xmlNode *reply = NULL;
1533     lrmd_rsc_t *rsc = NULL;
1534 
1535     if (rsc_id == NULL) {
1536         rc = -ENODEV;
1537     } else {
1538         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1539         if (rsc == NULL) {
1540             crm_info("Agent information for '%s' not in cache", rsc_id);
1541             rc = -ENODEV;
1542         }
1543     }
1544 
1545     reply = create_lrmd_reply(__func__, rc, call_id);
1546     if (rsc) {
1547         crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
1548         crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
1549         crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
1550         crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
1551     }
1552     return reply;
1553 }
1554 
1555 static int
1556 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1557                             xmlNode *request)
1558 {
1559     int rc = pcmk_ok;
1560     lrmd_rsc_t *rsc = NULL;
1561     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1562     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1563 
1564     if (!rsc_id) {
1565         return -ENODEV;
1566     }
1567 
1568     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1569     if (rsc == NULL) {
1570         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1571                  rsc_id);
1572         return pcmk_ok;
1573     }
1574 
1575     if (rsc->active) {
1576         /* let the caller know there are still active ops on this rsc to watch for */
1577         crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
1578                   rsc->active, rsc_id);
1579         rc = -EINPROGRESS;
1580     }
1581 
1582     g_hash_table_remove(rsc_list, rsc_id);
1583 
1584     return rc;
1585 }
1586 
1587 static int
1588 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1589 {
1590     lrmd_rsc_t *rsc = NULL;
1591     lrmd_cmd_t *cmd = NULL;
1592     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1593     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1594     int call_id;
1595 
1596     if (!rsc_id) {
1597         return -EINVAL;
1598     }
1599     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1600         crm_info("Resource '%s' not found (%d active resources)",
1601                  rsc_id, g_hash_table_size(rsc_list));
1602         return -ENODEV;
1603     }
1604 
1605     cmd = create_lrmd_cmd(request, client);
1606     call_id = cmd->call_id;
1607 
1608     /* Don't reference cmd after handing it off to be scheduled.
1609      * The cmd could get merged and freed. */
1610     schedule_lrmd_cmd(rsc, cmd);
1611 
1612     return call_id;
1613 }
1614 
1615 static int
1616 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1617 {
1618     GList *gIter = NULL;
1619     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1620 
1621     /* How to cancel an action.
1622      * 1. Check pending ops list, if it hasn't been handed off
1623      *    to the service library or stonith recurring list remove
1624      *    it there and that will stop it.
1625      * 2. If it isn't in the pending ops list, then it's either a
1626      *    recurring op in the stonith recurring list, or the service
1627      *    library's recurring list.  Stop it there
1628      * 3. If not found in any lists, then this operation has either
1629      *    been executed already and is not a recurring operation, or
1630      *    never existed.
1631      */
1632     if (!rsc) {
1633         return -ENODEV;
1634     }
1635 
1636     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1637         lrmd_cmd_t *cmd = gIter->data;
1638 
1639         if (action_matches(cmd, action, interval_ms)) {
1640             cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1641             cmd_finalize(cmd, rsc);
1642             return pcmk_ok;
1643         }
1644     }
1645 
1646     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1647         /* The service library does not handle stonith operations.
1648          * We have to handle recurring stonith operations ourselves. */
1649         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1650             lrmd_cmd_t *cmd = gIter->data;
1651 
1652             if (action_matches(cmd, action, interval_ms)) {
1653                 cmd->result.execution_status = PCMK_EXEC_CANCELLED;
1654                 if (rsc->active != cmd) {
1655                     cmd_finalize(cmd, rsc);
1656                 }
1657                 return pcmk_ok;
1658             }
1659         }
1660     } else if (services_action_cancel(rsc_id,
1661                                       normalize_action_name(rsc, action),
1662                                       interval_ms) == TRUE) {
1663         /* The service library will tell the action_complete callback function
1664          * this action was cancelled, which will destroy the cmd and remove
1665          * it from the recurring_op list. Do not do that in this function
1666          * if the service library says it cancelled it. */
1667         return pcmk_ok;
1668     }
1669 
1670     return -EOPNOTSUPP;
1671 }
1672 
1673 static void
1674 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1675 {
1676     GList *cmd_list = NULL;
1677     GList *cmd_iter = NULL;
1678 
1679     /* Notice a copy of each list is created when concat is called.
1680      * This prevents odd behavior from occurring when the cmd_list
1681      * is iterated through later on.  It is possible the cancel_op
1682      * function may end up modifying the recurring_ops and pending_ops
1683      * lists.  If we did not copy those lists, our cmd_list iteration
1684      * could get messed up.*/
1685     if (rsc->recurring_ops) {
1686         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1687     }
1688     if (rsc->pending_ops) {
1689         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1690     }
1691     if (!cmd_list) {
1692         return;
1693     }
1694 
1695     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1696         lrmd_cmd_t *cmd = cmd_iter->data;
1697 
1698         if (cmd->interval_ms == 0) {
1699             continue;
1700         }
1701 
1702         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1703             continue;
1704         }
1705 
1706         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1707     }
1708     /* frees only the copied list data, not the cmds */
1709     g_list_free(cmd_list);
1710 }
1711 
1712 static int
1713 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1714 {
1715     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1716     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1717     const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
1718     guint interval_ms = 0;
1719 
1720     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
1721 
1722     if (!rsc_id || !action) {
1723         return -EINVAL;
1724     }
1725 
1726     return cancel_op(rsc_id, action, interval_ms);
1727 }
1728 
1729 static void
1730 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1731 {
1732     xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
1733 
1734     crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
1735     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1736         lrmd_cmd_t *cmd = item->data;
1737         xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
1738 
1739         crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
1740                     (cmd->real_action? cmd->real_action : cmd->action));
1741         crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
1742         crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
1743     }
1744 }
1745 
1746 static xmlNode *
1747 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1748 {
1749     int rc = pcmk_ok;
1750     const char *rsc_id = NULL;
1751     lrmd_rsc_t *rsc = NULL;
1752     xmlNode *reply = NULL;
1753     xmlNode *rsc_xml = NULL;
1754 
1755     // Resource ID is optional
1756     rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
1757     if (rsc_xml) {
1758         rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
1759     }
1760     if (rsc_xml) {
1761         rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1762     }
1763 
1764     // If resource ID is specified, resource must exist
1765     if (rsc_id != NULL) {
1766         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1767         if (rsc == NULL) {
1768             crm_info("Resource '%s' not found (%d active resources)",
1769                      rsc_id, g_hash_table_size(rsc_list));
1770             rc = -ENODEV;
1771         }
1772     }
1773 
1774     reply = create_lrmd_reply(__func__, rc, call_id);
1775 
1776     // If resource ID is not specified, check all resources
1777     if (rsc_id == NULL) {
1778         GHashTableIter iter;
1779         char *key = NULL;
1780 
1781         g_hash_table_iter_init(&iter, rsc_list);
1782         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1783                                       (gpointer *) &rsc)) {
1784             add_recurring_op_xml(reply, rsc);
1785         }
1786     } else if (rsc) {
1787         add_recurring_op_xml(reply, rsc);
1788     }
1789     return reply;
1790 }
1791 
1792 void
1793 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1794 {
1795     int rc = pcmk_ok;
1796     int call_id = 0;
1797     const char *op = crm_element_value(request, F_LRMD_OPERATION);
1798     int do_reply = 0;
1799     int do_notify = 0;
1800     xmlNode *reply = NULL;
1801 
1802     /* Certain IPC commands may be done only by privileged users (i.e. root or
1803      * hacluster), because they would otherwise provide a means of bypassing
1804      * ACLs.
1805      */
1806     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1807 
1808     crm_trace("Processing %s operation from %s", op, client->id);
1809     crm_element_value_int(request, F_LRMD_CALLID, &call_id);
1810 
1811     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1812 #ifdef PCMK__COMPILE_REMOTE
1813         if (allowed) {
1814             ipc_proxy_forward_client(client, request);
1815         } else {
1816             rc = -EACCES;
1817         }
1818 #else
1819         rc = -EPROTONOSUPPORT;
1820 #endif
1821         do_reply = 1;
1822     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1823         rc = process_lrmd_signon(client, request, call_id, &reply);
1824         do_reply = 1;
1825     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1826         if (allowed) {
1827             rc = process_lrmd_rsc_register(client, id, request);
1828             do_notify = 1;
1829         } else {
1830             rc = -EACCES;
1831         }
1832         do_reply = 1;
1833     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1834         if (allowed) {
1835             reply = process_lrmd_get_rsc_info(request, call_id);
1836         } else {
1837             rc = -EACCES;
1838         }
1839         do_reply = 1;
1840     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1841         if (allowed) {
1842             rc = process_lrmd_rsc_unregister(client, id, request);
1843             /* don't notify anyone about failed un-registers */
1844             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1845                 do_notify = 1;
1846             }
1847         } else {
1848             rc = -EACCES;
1849         }
1850         do_reply = 1;
1851     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1852         if (allowed) {
1853             rc = process_lrmd_rsc_exec(client, id, request);
1854         } else {
1855             rc = -EACCES;
1856         }
1857         do_reply = 1;
1858     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1859         if (allowed) {
1860             rc = process_lrmd_rsc_cancel(client, id, request);
1861         } else {
1862             rc = -EACCES;
1863         }
1864         do_reply = 1;
1865     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1866         do_notify = 1;
1867         do_reply = 1;
1868     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1869         if (allowed) {
1870             xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
1871 
1872             CRM_LOG_ASSERT(data != NULL);
1873             pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
1874         } else {
1875             rc = -EACCES;
1876         }
1877     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1878         if (allowed) {
1879             rc = process_lrmd_alert_exec(client, id, request);
1880         } else {
1881             rc = -EACCES;
1882         }
1883         do_reply = 1;
1884     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1885         if (allowed) {
1886             reply = process_lrmd_get_recurring(request, call_id);
1887         } else {
1888             rc = -EACCES;
1889         }
1890         do_reply = 1;
1891     } else {
1892         rc = -EOPNOTSUPP;
1893         do_reply = 1;
1894         crm_err("Unknown IPC request '%s' from client %s",
1895                 op, pcmk__client_name(client));
1896     }
1897 
1898     if (rc == -EACCES) {
1899         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1900                  op, pcmk__client_name(client));
1901     }
1902 
1903     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1904               op, client->id, rc, do_reply, do_notify);
1905 
1906     if (do_reply) {
1907         int send_rc = pcmk_rc_ok;
1908 
1909         if (reply == NULL) {
1910             reply = create_lrmd_reply(__func__, rc, call_id);
1911         }
1912         send_rc = lrmd_server_send_reply(client, id, reply);
1913         free_xml(reply);
1914         if (send_rc != pcmk_rc_ok) {
1915             crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
1916                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
1917         }
1918     }
1919 
1920     if (do_notify) {
1921         send_generic_notify(rc, request);
1922     }
1923 }

/* [previous][next][first][last][top][bottom][index][help] */