root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. action_matches
  6. log_finished
  7. log_execute
  8. normalize_action_name
  9. build_rsc_from_xml
  10. create_lrmd_cmd
  11. stop_recurring_timer
  12. free_lrmd_cmd
  13. stonith_recurring_op_helper
  14. start_recurring_timer
  15. start_delay_helper
  16. find_duplicate_action
  17. merge_recurring_duplicate
  18. schedule_lrmd_cmd
  19. create_lrmd_reply
  20. send_client_notify
  21. send_cmd_complete_notify
  22. send_generic_notify
  23. cmd_reset
  24. cmd_finalize
  25. ocf2uniform_rc
  26. stonith2uniform_rc
  27. nagios2uniform_rc
  28. get_uniform_rc
  29. action_get_uniform_rc
  30. notify_one_client
  31. notify_of_new_client
  32. parse_exit_reason
  33. client_disconnect_cleanup
  34. action_complete
  35. stonith_rc2status
  36. stonith_action_complete
  37. lrmd_stonith_callback
  38. stonith_connection_failed
  39. execd_stonith_start
  40. execd_stonith_stop
  41. execd_stonith_monitor
  42. lrmd_rsc_execute_stonith
  43. lrmd_rsc_execute_service_lib
  44. lrmd_rsc_execute
  45. lrmd_rsc_dispatch
  46. free_rsc
  47. process_lrmd_signon
  48. process_lrmd_rsc_register
  49. process_lrmd_get_rsc_info
  50. process_lrmd_rsc_unregister
  51. process_lrmd_rsc_exec
  52. cancel_op
  53. cancel_all_recurring
  54. process_lrmd_rsc_cancel
  55. add_recurring_op_xml
  56. process_lrmd_get_recurring
  57. process_lrmd_message

   1 /*
   2  * Copyright 2012-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 // Check whether we have a high-resolution monotonic clock
  15 #undef PCMK__TIME_USE_CGT
  16 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  17 #  define PCMK__TIME_USE_CGT
  18 #  include <time.h>  /* clock_gettime */
  19 #endif
  20 
  21 #include <unistd.h>
  22 
  23 #include <crm/crm.h>
  24 #include <crm/services.h>
  25 #include <crm/services_internal.h>
  26 #include <crm/common/mainloop.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/msg_xml.h>
  30 
  31 #include "pacemaker-execd.h"
  32 
  33 #define EXIT_REASON_MAX_LEN 128
  34 
  35 GHashTable *rsc_list = NULL;
  36 
  37 typedef struct lrmd_cmd_s {
  38     int timeout;
  39     guint interval_ms;
  40     int start_delay;
  41     int timeout_orig;
  42 
  43     int call_id;
  44     int exec_rc;
  45     int lrmd_op_status;
  46 
  47     int call_opts;
  48     /* Timer ids, must be removed on cmd destruction. */
  49     int delay_id;
  50     int stonith_recurring_id;
  51 
  52     int rsc_deleted;
  53 
  54     int service_flags;
  55 
  56     char *client_id;
  57     char *origin;
  58     char *rsc_id;
  59     char *action;
  60     char *real_action;
  61     char *exit_reason;
  62     char *output;
  63     char *userdata_str;
  64 
  65     /* We can track operation queue time and run time, to be saved with the CIB
  66      * resource history (and displayed in cluster status). We need
  67      * high-resolution monotonic time for this purpose, so we use
  68      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  69      * is disabled).
  70      *
  71      * However, we also need epoch timestamps for recording the time the command
  72      * last ran and the time its return value last changed, for use in time
  73      * displays (as opposed to interval calculations). We keep time_t values for
  74      * this purpose.
  75      *
  76      * The last run time is used for both purposes, so we keep redundant
  77      * monotonic and epoch values for this. Technically the two could represent
  78      * different times, but since time_t has only second resolution and the
  79      * values are used for distinct purposes, that is not significant.
  80      */
  81 #ifdef PCMK__TIME_USE_CGT
  82     /* Recurring and systemd operations may involve more than one executor
  83      * command per operation, so they need info about the original and the most
  84      * recent.
  85      */
  86     struct timespec t_first_run;    // When op first ran
  87     struct timespec t_run;          // When op most recently ran
  88     struct timespec t_first_queue;  // When op was first queued
  89     struct timespec t_queue;        // When op was most recently queued
  90 #endif
  91     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  92     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  93 
  94     bool first_notify_sent;
  95     int last_notify_rc;
  96     int last_notify_op_status;
  97     int last_pid;
  98 
  99     GHashTable *params;
 100 } lrmd_cmd_t;
 101 
 102 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 103 static gboolean lrmd_rsc_dispatch(gpointer user_data);
 104 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 105 
 106 #ifdef PCMK__TIME_USE_CGT
 107 
 108 /*!
 109  * \internal
 110  * \brief Check whether a struct timespec has been set
 111  *
 112  * \param[in] timespec  Time to check
 113  *
 114  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 115  */
 116 static inline bool
 117 time_is_set(struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119     return (timespec != NULL) &&
 120            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 121 }
 122 
 123 /*
 124  * \internal
 125  * \brief Set a timespec (and its original if unset) to the current time
 126  *
 127  * \param[out] t_current  Where to store current time
 128  * \param[out] t_orig     Where to copy t_current if unset
 129  */
 130 static void
 131 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 132 {
 133     clock_gettime(CLOCK_MONOTONIC, t_current);
 134     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 135         *t_orig = *t_current;
 136     }
 137 }
 138 
 139 /*!
 140  * \internal
 141  * \brief Return difference between two times in milliseconds
 142  *
 143  * \param[in] now  More recent time (or NULL to use current time)
 144  * \param[in] old  Earlier time
 145  *
 146  * \return milliseconds difference (or 0 if old is NULL or unset)
 147  *
 148  * \note Can overflow on 32bit machines when the differences is around
 149  *       24 days or more.
 150  */
 151 static int
 152 time_diff_ms(struct timespec *now, struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 153 {
 154     int diff_ms = 0;
 155 
 156     if (time_is_set(old)) {
 157         struct timespec local_now = { 0, };
 158 
 159         if (now == NULL) {
 160             clock_gettime(CLOCK_MONOTONIC, &local_now);
 161             now = &local_now;
 162         }
 163         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 164                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 165     }
 166     return diff_ms;
 167 }
 168 
 169 /*!
 170  * \internal
 171  * \brief Reset a command's operation times to their original values.
 172  *
 173  * Reset a command's run and queued timestamps to the timestamps of the original
 174  * command, so we report the entire time since then and not just the time since
 175  * the most recent command (for recurring and systemd operations).
 176  *
 177  * \param[in] cmd  Executor command object to reset
 178  *
 179  * \note It's not obvious what the queued time should be for a systemd
 180  *       start/stop operation, which might go like this:
 181  *         initial command queued 5ms, runs 3s
 182  *         monitor command queued 10ms, runs 10s
 183  *         monitor command queued 10ms, runs 10s
 184  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 185  *       implementation will report 5ms. If it's 25ms, then we need to
 186  *       subtract 20ms from the total exec time so as not to count it twice.
 187  *       We can implement that later if it matters to anyone ...
 188  */
 189 static void
 190 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 191 {
 192     cmd->t_run = cmd->t_first_run;
 193     cmd->t_queue = cmd->t_first_queue;
 194 }
 195 #endif
 196 
 197 static inline bool
 198 action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 199 {
 200     return (cmd->interval_ms == interval_ms)
 201            && pcmk__str_eq(cmd->action, action, pcmk__str_casei);
 202 }
 203 
 204 static void
 205 log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207     char pid_str[32] = { 0, };
 208     int log_level = LOG_INFO;
 209 
 210     if (cmd->last_pid) {
 211         snprintf(pid_str, 32, "%d", cmd->last_pid);
 212     }
 213 
 214     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 215         log_level = LOG_DEBUG;
 216     }
 217 #ifdef PCMK__TIME_USE_CGT
 218     do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
 219                " (execution time %dms, queue time %dms)",
 220                cmd->rsc_id, cmd->action, cmd->call_id,
 221                (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc,
 222                exec_time, queue_time);
 223 #else
 224     do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
 225                cmd->rsc_id, cmd->action, cmd->call_id,
 226                (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc);
 227 #endif
 228 }
 229 
 230 static void
 231 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233     int log_level = LOG_INFO;
 234 
 235     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 236         log_level = LOG_DEBUG;
 237     }
 238 
 239     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 240                cmd->rsc_id, cmd->action, cmd->call_id);
 241 }
 242 
 243 static const char *
 244 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246     if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
 247         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 248         return "status";
 249     }
 250     return action;
 251 }
 252 
 253 static lrmd_rsc_t *
 254 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 257     lrmd_rsc_t *rsc = NULL;
 258 
 259     rsc = calloc(1, sizeof(lrmd_rsc_t));
 260 
 261     crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
 262 
 263     rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 264     rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
 265     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
 266     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
 267     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
 268     rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
 269     return rsc;
 270 }
 271 
 272 static lrmd_cmd_t *
 273 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 274 {
 275     int call_options = 0;
 276     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 277     lrmd_cmd_t *cmd = NULL;
 278 
 279     cmd = calloc(1, sizeof(lrmd_cmd_t));
 280 
 281     crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
 282     cmd->call_opts = call_options;
 283     cmd->client_id = strdup(client->id);
 284 
 285     crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
 286     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
 287     crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
 288     crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
 289     cmd->timeout_orig = cmd->timeout;
 290 
 291     cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
 292     cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
 293     cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
 294     cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 295 
 296     cmd->params = xml2list(rsc_xml);
 297 
 298     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
 299         crm_debug("Setting flag to leave pid group on timeout and "
 300                   "only kill action pid for " PCMK__OP_FMT,
 301                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 302         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 303                                                 LOG_TRACE, "Action",
 304                                                 cmd->action, 0,
 305                                                 SVC_ACTION_LEAVE_GROUP,
 306                                                 "SVC_ACTION_LEAVE_GROUP");
 307     }
 308     return cmd;
 309 }
 310 
 311 static void
 312 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314     if (cmd) {
 315         if (cmd->stonith_recurring_id) {
 316             g_source_remove(cmd->stonith_recurring_id);
 317         }
 318         cmd->stonith_recurring_id = 0;
 319     }
 320 }
 321 
 322 static void
 323 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325     stop_recurring_timer(cmd);
 326     if (cmd->delay_id) {
 327         g_source_remove(cmd->delay_id);
 328     }
 329     if (cmd->params) {
 330         g_hash_table_destroy(cmd->params);
 331     }
 332     free(cmd->origin);
 333     free(cmd->action);
 334     free(cmd->real_action);
 335     free(cmd->userdata_str);
 336     free(cmd->rsc_id);
 337     free(cmd->output);
 338     free(cmd->exit_reason);
 339     free(cmd->client_id);
 340     free(cmd);
 341 }
 342 
 343 static gboolean
 344 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346     lrmd_cmd_t *cmd = data;
 347     lrmd_rsc_t *rsc;
 348 
 349     cmd->stonith_recurring_id = 0;
 350 
 351     if (!cmd->rsc_id) {
 352         return FALSE;
 353     }
 354 
 355     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 356 
 357     CRM_ASSERT(rsc != NULL);
 358     /* take it out of recurring_ops list, and put it in the pending ops
 359      * to be executed */
 360     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 361     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 362 #ifdef PCMK__TIME_USE_CGT
 363     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 364 #endif
 365     mainloop_set_trigger(rsc->work);
 366 
 367     return FALSE;
 368 }
 369 
 370 static inline void
 371 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 372 {
 373     if (cmd && (cmd->interval_ms > 0)) {
 374         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 375                                                   stonith_recurring_op_helper,
 376                                                   cmd);
 377     }
 378 }
 379 
 380 static gboolean
 381 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 382 {
 383     lrmd_cmd_t *cmd = data;
 384     lrmd_rsc_t *rsc = NULL;
 385 
 386     cmd->delay_id = 0;
 387     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 388 
 389     if (rsc) {
 390         mainloop_set_trigger(rsc->work);
 391     }
 392 
 393     return FALSE;
 394 }
 395 
 396 /*!
 397  * \internal
 398  * \brief Check whether a list already contains the equivalent of a given action
 399  */
 400 static lrmd_cmd_t *
 401 find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 402 {
 403     for (GList *item = action_list; item != NULL; item = item->next) {
 404         lrmd_cmd_t *dup = item->data;
 405 
 406         if (action_matches(cmd, dup->action, dup->interval_ms)) {
 407             return dup;
 408         }
 409     }
 410     return NULL;
 411 }
 412 
 413 static bool
 414 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 415 {
 416     lrmd_cmd_t * dup = NULL;
 417     bool dup_pending = true;
 418 
 419     if (cmd->interval_ms == 0) {
 420         return false;
 421     }
 422 
 423     // Search for a duplicate of this action (in-flight or not)
 424     dup = find_duplicate_action(rsc->pending_ops, cmd);
 425     if (dup == NULL) {
 426         dup_pending = false;
 427         dup = find_duplicate_action(rsc->recurring_ops, cmd);
 428         if (dup == NULL) {
 429             return false;
 430         }
 431     }
 432 
 433     /* Do not merge fencing monitors marked for cancellation, so we can reply to
 434      * the cancellation separately.
 435      */
 436     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 437                      pcmk__str_casei)
 438         && (dup->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
 439         return false;
 440     }
 441 
 442     /* This should not occur. If it does, we need to investigate how something
 443      * like this is possible in the controller.
 444      */
 445     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 446              "), merging with previous op entry",
 447              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 448              dup->interval_ms);
 449 
 450     // Merge new action's call ID and user data into existing action
 451     dup->first_notify_sent = false;
 452     free(dup->userdata_str);
 453     dup->userdata_str = cmd->userdata_str;
 454     cmd->userdata_str = NULL;
 455     dup->call_id = cmd->call_id;
 456     free_lrmd_cmd(cmd);
 457     cmd = NULL;
 458 
 459     /* If dup is not pending, that means it has already executed at least once
 460      * and is waiting in the interval. In that case, stop waiting and initiate
 461      * a new instance now.
 462      */
 463     if (!dup_pending) {
 464         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
 465                          pcmk__str_casei)) {
 466             stop_recurring_timer(dup);
 467             stonith_recurring_op_helper(dup);
 468         } else {
 469             services_action_kick(rsc->rsc_id,
 470                                  normalize_action_name(rsc, dup->action),
 471                                  dup->interval_ms);
 472         }
 473     }
 474     return true;
 475 }
 476 
 477 static void
 478 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 479 {
 480     CRM_CHECK(cmd != NULL, return);
 481     CRM_CHECK(rsc != NULL, return);
 482 
 483     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 484 
 485     if (merge_recurring_duplicate(rsc, cmd)) {
 486         // Equivalent of cmd has already been scheduled
 487         return;
 488     }
 489 
 490     /* The controller expects the executor to automatically cancel
 491      * recurring operations before a resource stops.
 492      */
 493     if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 494         cancel_all_recurring(rsc, NULL);
 495     }
 496 
 497     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 498 #ifdef PCMK__TIME_USE_CGT
 499     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 500 #endif
 501     mainloop_set_trigger(rsc->work);
 502 
 503     if (cmd->start_delay) {
 504         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 505     }
 506 }
 507 
 508 static xmlNode *
 509 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 510 {
 511     xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
 512 
 513     crm_xml_add(reply, F_LRMD_ORIGIN, origin);
 514     crm_xml_add_int(reply, F_LRMD_RC, rc);
 515     crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
 516     return reply;
 517 }
 518 
 519 static void
 520 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 521 {
 522     xmlNode *update_msg = user_data;
 523     pcmk__client_t *client = value;
 524     int rc;
 525     int log_level = LOG_WARNING;
 526     const char *msg = NULL;
 527 
 528     CRM_CHECK(client != NULL, return);
 529     if (client->name == NULL) {
 530         crm_trace("Skipping notification to client without name");
 531         return;
 532     }
 533     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 534         /* We only want to notify clients of the executor IPC API. If we are
 535          * running as Pacemaker Remote, we may have clients proxied to other
 536          * IPC services in the cluster, so skip those.
 537          */
 538         crm_trace("Skipping executor API notification to client %s",
 539                   pcmk__client_name(client));
 540         return;
 541     }
 542 
 543     rc = lrmd_server_send_notify(client, update_msg);
 544     if (rc == pcmk_rc_ok) {
 545         return;
 546     }
 547 
 548     switch (rc) {
 549         case ENOTCONN:
 550         case EPIPE: // Client exited without waiting for notification
 551             log_level = LOG_INFO;
 552             msg = "Disconnected";
 553             break;
 554 
 555         default:
 556             msg = pcmk_rc_str(rc);
 557             break;
 558     }
 559     do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d",
 560                pcmk__client_name(client), msg, rc);
 561 }
 562 
 563 static void
 564 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 565 {
 566     xmlNode *notify = NULL;
 567 
 568 #ifdef PCMK__TIME_USE_CGT
 569     int exec_time = time_diff_ms(NULL, &(cmd->t_run));
 570     int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 571 
 572     log_finished(cmd, exec_time, queue_time);
 573 #else
 574     log_finished(cmd, 0, 0);
 575 #endif
 576 
 577     /* if the first notify result for a cmd has already been sent earlier, and the
 578      * the option to only send notifies on result changes is set. Check to see
 579      * if the last result is the same as the new one. If so, suppress this update */
 580     if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
 581         if (cmd->last_notify_rc == cmd->exec_rc &&
 582             cmd->last_notify_op_status == cmd->lrmd_op_status) {
 583 
 584             /* only send changes */
 585             return;
 586         }
 587 
 588     }
 589 
 590     cmd->first_notify_sent = true;
 591     cmd->last_notify_rc = cmd->exec_rc;
 592     cmd->last_notify_op_status = cmd->lrmd_op_status;
 593 
 594     notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 595 
 596     crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 597     crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
 598     crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
 599     crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
 600     crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
 601     crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
 602     crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
 603     crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
 604 
 605     crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
 606                    (long long) cmd->epoch_last_run);
 607     crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
 608                    (long long) cmd->epoch_rcchange);
 609 #ifdef PCMK__TIME_USE_CGT
 610     crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
 611     crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
 612 #endif
 613 
 614     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
 615     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
 616     if(cmd->real_action) {
 617         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
 618     } else {
 619         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
 620     }
 621     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 622     crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
 623     crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
 624 
 625     if (cmd->params) {
 626         char *key = NULL;
 627         char *value = NULL;
 628         GHashTableIter iter;
 629 
 630         xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
 631 
 632         g_hash_table_iter_init(&iter, cmd->params);
 633         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 634             hash2smartfield((gpointer) key, (gpointer) value, args);
 635         }
 636     }
 637     if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
 638         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 639 
 640         if (client) {
 641             send_client_notify(client->id, client, notify);
 642         }
 643     } else {
 644         pcmk__foreach_ipc_client(send_client_notify, notify);
 645     }
 646 
 647     free_xml(notify);
 648 }
 649 
 650 static void
 651 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 652 {
 653     if (pcmk__ipc_client_count() != 0) {
 654         int call_id = 0;
 655         xmlNode *notify = NULL;
 656         xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
 657         const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
 658         const char *op = crm_element_value(request, F_LRMD_OPERATION);
 659 
 660         crm_element_value_int(request, F_LRMD_CALLID, &call_id);
 661 
 662         notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 663         crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 664         crm_xml_add_int(notify, F_LRMD_RC, rc);
 665         crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
 666         crm_xml_add(notify, F_LRMD_OPERATION, op);
 667         crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
 668 
 669         pcmk__foreach_ipc_client(send_client_notify, notify);
 670 
 671         free_xml(notify);
 672     }
 673 }
 674 
 675 static void
 676 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 677 {
 678     cmd->lrmd_op_status = 0;
 679     cmd->last_pid = 0;
 680 #ifdef PCMK__TIME_USE_CGT
 681     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 682     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 683 #endif
 684     cmd->epoch_last_run = 0;
 685     free(cmd->exit_reason);
 686     cmd->exit_reason = NULL;
 687     free(cmd->output);
 688     cmd->output = NULL;
 689 }
 690 
 691 static void
 692 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 693 {
 694     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 695               rsc ? rsc->active : NULL, cmd);
 696 
 697     if (rsc && (rsc->active == cmd)) {
 698         rsc->active = NULL;
 699         mainloop_set_trigger(rsc->work);
 700     }
 701 
 702     if (!rsc) {
 703         cmd->rsc_deleted = 1;
 704     }
 705 
 706     /* reset original timeout so client notification has correct information */
 707     cmd->timeout = cmd->timeout_orig;
 708 
 709     send_cmd_complete_notify(cmd);
 710 
 711     if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
 712         if (rsc) {
 713             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 714             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 715         }
 716         free_lrmd_cmd(cmd);
 717     } else if (cmd->interval_ms == 0) {
 718         if (rsc) {
 719             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 720         }
 721         free_lrmd_cmd(cmd);
 722     } else {
 723         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 724         cmd_reset(cmd);
 725     }
 726 }
 727 
 728 static int
 729 ocf2uniform_rc(int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 730 {
 731     switch (rc) {
 732         case PCMK_OCF_DEGRADED:
 733         case PCMK_OCF_DEGRADED_PROMOTED:
 734             break;
 735         default:
 736             if (rc < 0 || rc > PCMK_OCF_FAILED_PROMOTED) {
 737                 return PCMK_OCF_UNKNOWN_ERROR;
 738             }
 739     }
 740 
 741     return rc;
 742 }
 743 
 744 static int
 745 stonith2uniform_rc(const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 746 {
 747     switch (rc) {
 748         case pcmk_ok:
 749             rc = PCMK_OCF_OK;
 750             break;
 751 
 752         case -ENODEV:
 753             /* This should be possible only for probes in practice, but
 754              * interpret for all actions to be safe.
 755              */
 756             if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
 757                 rc = PCMK_OCF_NOT_RUNNING;
 758             } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) {
 759                 rc = PCMK_OCF_OK;
 760             } else {
 761                 rc = PCMK_OCF_NOT_INSTALLED;
 762             }
 763             break;
 764 
 765         case -EOPNOTSUPP:
 766             rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
 767             break;
 768 
 769         case -ETIME:
 770         case -ETIMEDOUT:
 771             rc = PCMK_OCF_TIMEOUT;
 772             break;
 773 
 774         default:
 775             rc = PCMK_OCF_UNKNOWN_ERROR;
 776             break;
 777     }
 778     return rc;
 779 }
 780 
 781 #if SUPPORT_NAGIOS
 782 static int
 783 nagios2uniform_rc(const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 784 {
 785     if (rc < 0) {
 786         return PCMK_OCF_UNKNOWN_ERROR;
 787     }
 788 
 789     switch (rc) {
 790         case NAGIOS_STATE_OK:
 791             return PCMK_OCF_OK;
 792         case NAGIOS_INSUFFICIENT_PRIV:
 793             return PCMK_OCF_INSUFFICIENT_PRIV;
 794         case NAGIOS_NOT_INSTALLED:
 795             return PCMK_OCF_NOT_INSTALLED;
 796         case NAGIOS_STATE_WARNING:
 797         case NAGIOS_STATE_CRITICAL:
 798         case NAGIOS_STATE_UNKNOWN:
 799         case NAGIOS_STATE_DEPENDENT:
 800         default:
 801             return PCMK_OCF_UNKNOWN_ERROR;
 802     }
 803 
 804     return PCMK_OCF_UNKNOWN_ERROR;
 805 }
 806 #endif
 807 
 808 static int
 809 get_uniform_rc(const char *standard, const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 810 {
 811     if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
 812         return ocf2uniform_rc(rc);
 813     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 814         return stonith2uniform_rc(action, rc);
 815     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 816         return rc;
 817     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) {
 818         return rc;
 819 #if SUPPORT_NAGIOS
 820     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 821         return nagios2uniform_rc(action, rc);
 822 #endif
 823     } else {
 824         return services_get_ocf_exitcode(action, rc);
 825     }
 826 }
 827 
 828 static int
 829 action_get_uniform_rc(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 830 {
 831     lrmd_cmd_t *cmd = action->cb_data;
 832     return get_uniform_rc(action->standard, cmd->action, action->rc);
 833 }
 834 
 835 struct notify_new_client_data {
 836     xmlNode *notify;
 837     pcmk__client_t *new_client;
 838 };
 839 
 840 static void
 841 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 842 {
 843     pcmk__client_t *client = value;
 844     struct notify_new_client_data *data = user_data;
 845 
 846     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 847         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 848     }
 849 }
 850 
 851 void
 852 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 853 {
 854     struct notify_new_client_data data;
 855 
 856     data.new_client = new_client;
 857     data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 858     crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
 859     crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
 860     pcmk__foreach_ipc_client(notify_one_client, &data);
 861     free_xml(data.notify);
 862 }
 863 
 864 static char *
 865 parse_exit_reason(const char *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 866 {
 867     const char *cur = NULL;
 868     const char *last = NULL;
 869     static int cookie_len = 0;
 870     char *eol = NULL;
 871     size_t reason_len = EXIT_REASON_MAX_LEN;
 872 
 873     if (output == NULL) {
 874         return NULL;
 875     }
 876 
 877     if (!cookie_len) {
 878         cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
 879     }
 880 
 881     cur = strstr(output, PCMK_OCF_REASON_PREFIX);
 882     for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
 883         /* skip over the cookie delimiter string */
 884         cur += cookie_len;
 885         last = cur;
 886     }
 887     if (last == NULL) {
 888         return NULL;
 889     }
 890 
 891     // Truncate everything after a new line, and limit reason string size
 892     eol = strchr(last, '\n');
 893     if (eol) {
 894         reason_len = QB_MIN(reason_len, eol - last);
 895     }
 896     return strndup(last, reason_len);
 897 }
 898 
 899 void
 900 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 901 {
 902     GHashTableIter iter;
 903     lrmd_rsc_t *rsc = NULL;
 904     char *key = NULL;
 905 
 906     g_hash_table_iter_init(&iter, rsc_list);
 907     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 908         if (rsc->call_opts & lrmd_opt_drop_recurring) {
 909             /* This client is disconnecting, drop any recurring operations
 910              * it may have initiated on the resource */
 911             cancel_all_recurring(rsc, client_id);
 912         }
 913     }
 914 }
 915 
 916 static void
 917 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 918 {
 919     lrmd_rsc_t *rsc;
 920     lrmd_cmd_t *cmd = action->cb_data;
 921     const char *rclass = NULL;
 922 
 923 #ifdef PCMK__TIME_USE_CGT
 924     bool goagain = false;
 925 #endif
 926 
 927     if (!cmd) {
 928         crm_err("Completed executor action (%s) does not match any known operations",
 929                 action->id);
 930         return;
 931     }
 932 
 933 #ifdef PCMK__TIME_USE_CGT
 934     if (cmd->exec_rc != action->rc) {
 935         cmd->epoch_rcchange = time(NULL);
 936     }
 937 #endif
 938 
 939     cmd->last_pid = action->pid;
 940     cmd->exec_rc = action_get_uniform_rc(action);
 941     cmd->lrmd_op_status = action->status;
 942     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 943 
 944     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
 945         rclass = resources_find_service_class(rsc->type);
 946     } else if(rsc) {
 947         rclass = rsc->class;
 948     }
 949 
 950 #ifdef PCMK__TIME_USE_CGT
 951     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 952         if ((cmd->exec_rc == PCMK_OCF_OK)
 953             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
 954             /* systemd returns from start and stop actions after the action
 955              * begins, not after it completes. We have to jump through a few
 956              * hoops so that we don't report 'complete' to the rest of pacemaker
 957              * until it's actually done.
 958              */
 959             goagain = true;
 960             cmd->real_action = cmd->action;
 961             cmd->action = strdup("monitor");
 962 
 963         } else if (cmd->real_action != NULL) {
 964             // This is follow-up monitor to check whether start/stop completed
 965             if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
 966                 && (cmd->exec_rc == PCMK_OCF_PENDING)) {
 967                 goagain = true;
 968 
 969             } else if ((cmd->exec_rc == PCMK_OCF_OK)
 970                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 971                 goagain = true;
 972 
 973             } else {
 974                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 975                 int timeout_left = cmd->timeout_orig - time_sum;
 976 
 977                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 978                           "remaining=%dms): %s (%d)",
 979                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 980                           services_ocf_exitcode_str(cmd->exec_rc),
 981                           cmd->exec_rc);
 982                 cmd_original_times(cmd);
 983 
 984                 // Monitors may return "not running", but start/stop shouldn't
 985                 if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
 986                     && (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) {
 987 
 988                     if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
 989                         cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR;
 990                     } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 991                         cmd->exec_rc = PCMK_OCF_OK;
 992                     }
 993                 }
 994             }
 995         }
 996     }
 997 #endif
 998 
 999 #if SUPPORT_NAGIOS
1000     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
1001         if (action_matches(cmd, "monitor", 0)
1002             && (cmd->exec_rc == PCMK_OCF_OK)) {
1003             /* Successfully executed --version for the nagios plugin */
1004             cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
1005 
1006         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && cmd->exec_rc != PCMK_OCF_OK) {
1007 #ifdef PCMK__TIME_USE_CGT
1008             goagain = true;
1009 #endif
1010         }
1011     }
1012 #endif
1013 
1014 #ifdef PCMK__TIME_USE_CGT
1015     if (goagain) {
1016         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
1017         int timeout_left = cmd->timeout_orig - time_sum;
1018         int delay = cmd->timeout_orig / 10;
1019 
1020         if(delay >= timeout_left && timeout_left > 20) {
1021             delay = timeout_left/2;
1022         }
1023 
1024         delay = QB_MIN(2000, delay);
1025         if (delay < timeout_left) {
1026             cmd->start_delay = delay;
1027             cmd->timeout = timeout_left;
1028 
1029             if(cmd->exec_rc == PCMK_OCF_OK) {
1030                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1031                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
1032 
1033             } else if(cmd->exec_rc == PCMK_OCF_PENDING) {
1034                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1035                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
1036 
1037             } else {
1038                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1039                            cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
1040             }
1041 
1042             cmd_reset(cmd);
1043             if(rsc) {
1044                 rsc->active = NULL;
1045             }
1046             schedule_lrmd_cmd(rsc, cmd);
1047 
1048             /* Don't finalize cmd, we're not done with it yet */
1049             return;
1050 
1051         } else {
1052             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
1053                        cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
1054             cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
1055             cmd->exec_rc = PCMK_OCF_TIMEOUT;
1056             cmd_original_times(cmd);
1057         }
1058     }
1059 #endif
1060 
1061     if (action->stderr_data) {
1062         cmd->output = strdup(action->stderr_data);
1063         cmd->exit_reason = parse_exit_reason(action->stderr_data);
1064 
1065     } else if (action->stdout_data) {
1066         cmd->output = strdup(action->stdout_data);
1067     }
1068 
1069     cmd_finalize(cmd, rsc);
1070 }
1071 
1072 /*!
1073  * \internal
1074  * \brief Determine operation status of a stonith operation
1075  *
1076  * Non-stonith resource operations get their operation status directly from the
1077  * service library, but the fencer does not have an equivalent, so we must infer
1078  * an operation status from the fencer API's return code.
1079  *
1080  * \param[in] action       Name of action performed on stonith resource
1081  * \param[in] interval_ms  Action interval
1082  * \param[in] rc           Action result from fencer
1083  *
1084  * \return Operation status corresponding to fencer API return code
1085  */
1086 static int
1087 stonith_rc2status(const char *action, guint interval_ms, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1088 {
1089     int status = PCMK_LRM_OP_DONE;
1090 
1091     switch (rc) {
1092         case pcmk_ok:
1093             break;
1094 
1095         case -EOPNOTSUPP:
1096         case -EPROTONOSUPPORT:
1097             status = PCMK_LRM_OP_NOTSUPPORTED;
1098             break;
1099 
1100         case -ETIME:
1101         case -ETIMEDOUT:
1102             status = PCMK_LRM_OP_TIMEOUT;
1103             break;
1104 
1105         case -ENOTCONN:
1106         case -ECOMM:
1107             // Couldn't talk to fencer
1108             status = PCMK_LRM_OP_ERROR;
1109             break;
1110 
1111         case -ENODEV:
1112             // The device is not registered with the fencer
1113             status = PCMK_LRM_OP_ERROR;
1114             break;
1115 
1116         default:
1117             break;
1118     }
1119     return status;
1120 }
1121 
1122 static void
1123 stonith_action_complete(lrmd_cmd_t * cmd, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1124 {
1125     // This can be NULL if resource was removed before command completed
1126     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1127 
1128     cmd->exec_rc = stonith2uniform_rc(cmd->action, rc);
1129 
1130     /* This function may be called with status already set to cancelled, if a
1131      * pending action was aborted. Otherwise, we need to determine status from
1132      * the fencer return code.
1133      */
1134     if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) {
1135         cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms,
1136                                                 rc);
1137 
1138         // Certain successful actions change the known state of the resource
1139         if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) {
1140             if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1141                 rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
1142             } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1143                 rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
1144             }
1145         }
1146     }
1147 
1148     /* The recurring timer should not be running at this point in any case, but
1149      * as a failsafe, stop it if it is.
1150      */
1151     stop_recurring_timer(cmd);
1152 
1153     /* Reschedule this command if appropriate. If a recurring command is *not*
1154      * rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will
1155      * not be removed from recurring_ops by cmd_finalize().
1156      */
1157     if (rsc && (cmd->interval_ms > 0)
1158         && (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) {
1159         start_recurring_timer(cmd);
1160     }
1161 
1162     cmd_finalize(cmd, rsc);
1163 }
1164 
1165 static void
1166 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1167 {
1168     stonith_action_complete(data->userdata, data->rc);
1169 }
1170 
1171 void
1172 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1173 {
1174     GHashTableIter iter;
1175     GList *cmd_list = NULL;
1176     GList *cmd_iter = NULL;
1177     lrmd_rsc_t *rsc = NULL;
1178     char *key = NULL;
1179 
1180     g_hash_table_iter_init(&iter, rsc_list);
1181     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
1182         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1183             /* If we registered this fence device, we don't know whether the
1184              * fencer still has the registration or not. Cause future probes to
1185              * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
1186              * started successfully. This is especially important if the
1187              * controller also went away (possibly due to a cluster layer
1188              * restart) and won't receive our client notification of any
1189              * monitors finalized below.
1190              */
1191             if (rsc->st_probe_rc == pcmk_ok) {
1192                 rsc->st_probe_rc = pcmk_err_generic;
1193             }
1194 
1195             if (rsc->active) {
1196                 cmd_list = g_list_append(cmd_list, rsc->active);
1197             }
1198             if (rsc->recurring_ops) {
1199                 cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
1200             }
1201             if (rsc->pending_ops) {
1202                 cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
1203             }
1204             rsc->pending_ops = rsc->recurring_ops = NULL;
1205         }
1206     }
1207 
1208     if (!cmd_list) {
1209         return;
1210     }
1211 
1212     crm_err("Connection to fencer failed, finalizing %d pending operations",
1213             g_list_length(cmd_list));
1214     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1215         stonith_action_complete(cmd_iter->data, -ENOTCONN);
1216     }
1217     g_list_free(cmd_list);
1218 }
1219 
1220 /*!
1221  * \internal
1222  * \brief Execute a stonith resource "start" action
1223  *
1224  * Start a stonith resource by registering it with the fencer.
1225  * (Stonith agents don't have a start command.)
1226  *
1227  * \param[in] stonith_api  Connection to fencer
1228  * \param[in] rsc          Stonith resource to start
1229  * \param[in] cmd          Start command to execute
1230  *
1231  * \return pcmk_ok on success, -errno otherwise
1232  */
1233 static int
1234 execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1235 {
1236     char *key = NULL;
1237     char *value = NULL;
1238     stonith_key_value_t *device_params = NULL;
1239     int rc = pcmk_ok;
1240 
1241     // Convert command parameters to stonith API key/values
1242     if (cmd->params) {
1243         GHashTableIter iter;
1244 
1245         g_hash_table_iter_init(&iter, cmd->params);
1246         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1247             device_params = stonith_key_value_add(device_params, key, value);
1248         }
1249     }
1250 
1251     /* The fencer will automatically register devices via CIB notifications
1252      * when the CIB changes, but to avoid a possible race condition between
1253      * the fencer receiving the notification and the executor requesting that
1254      * resource, the executor registers the device as well. The fencer knows how
1255      * to handle duplicate registrations.
1256      */
1257     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1258                                             cmd->rsc_id, rsc->provider,
1259                                             rsc->type, device_params);
1260 
1261     stonith_key_value_freeall(device_params, 1, 1);
1262     return rc;
1263 }
1264 
1265 /*!
1266  * \internal
1267  * \brief Execute a stonith resource "stop" action
1268  *
1269  * Stop a stonith resource by unregistering it with the fencer.
1270  * (Stonith agents don't have a stop command.)
1271  *
1272  * \param[in] stonith_api  Connection to fencer
1273  * \param[in] rsc          Stonith resource to stop
1274  *
1275  * \return pcmk_ok on success, -errno otherwise
1276  */
1277 static inline int
1278 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1279 {
1280     /* @TODO Failure would indicate a problem communicating with fencer;
1281      * perhaps we should try reconnecting and retrying a few times?
1282      */
1283     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1284                                             rsc->rsc_id);
1285 }
1286 
1287 /*!
1288  * \internal
1289  * \brief Initiate a stonith resource agent recurring "monitor" action
1290  *
1291  * \param[in] stonith_api  Connection to fencer
1292  * \param[in] rsc          Stonith resource to monitor
1293  * \param[in] cmd          Monitor command being executed
1294  *
1295  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1296  */
1297 static inline int
1298 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1299 {
1300     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1301                                         cmd->timeout / 1000);
1302 
1303     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1304                                               "lrmd_stonith_callback",
1305                                               lrmd_stonith_callback);
1306     if (rc == TRUE) {
1307         rsc->active = cmd;
1308         rc = pcmk_ok;
1309     } else {
1310         rc = -pcmk_err_generic;
1311     }
1312     return rc;
1313 }
1314 
1315 static void
1316 lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1317 {
1318     int rc = 0;
1319     bool do_monitor = FALSE;
1320 
1321     stonith_t *stonith_api = get_stonith_connection();
1322 
1323     if (!stonith_api) {
1324         rc = -ENOTCONN;
1325 
1326     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1327         rc = execd_stonith_start(stonith_api, rsc, cmd);
1328         if (rc == 0) {
1329             do_monitor = TRUE;
1330         }
1331 
1332     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1333         rc = execd_stonith_stop(stonith_api, rsc);
1334 
1335     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1336         if (cmd->interval_ms > 0) {
1337             do_monitor = TRUE;
1338         } else {
1339             rc = rsc->st_probe_rc;
1340         }
1341     }
1342 
1343     if (do_monitor) {
1344         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1345         if (rc == pcmk_ok) {
1346             // Don't clean up yet, we will find out result of the monitor later
1347             return;
1348         }
1349     }
1350 
1351     stonith_action_complete(cmd, rc);
1352 }
1353 
1354 static int
1355 lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1356 {
1357     svc_action_t *action = NULL;
1358     GHashTable *params_copy = NULL;
1359 
1360     CRM_ASSERT(rsc);
1361     CRM_ASSERT(cmd);
1362 
1363     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1364               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1365 
1366 #if SUPPORT_NAGIOS
1367     /* Recurring operations are cancelled anyway for a stop operation */
1368     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1369         && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1370 
1371         cmd->exec_rc = PCMK_OCF_OK;
1372         goto exec_done;
1373     }
1374 #endif
1375 
1376     params_copy = pcmk__str_table_dup(cmd->params);
1377 
1378     action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider,
1379                                      rsc->type,
1380                                      normalize_action_name(rsc, cmd->action),
1381                                      cmd->interval_ms, cmd->timeout,
1382                                      params_copy, cmd->service_flags);
1383 
1384     if (!action) {
1385         crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
1386         cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR;
1387         cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
1388         goto exec_done;
1389     }
1390 
1391     if (action->rc != 0) {
1392         cmd->exec_rc = action->rc;
1393         cmd->lrmd_op_status = action->status;
1394         services_action_free(action);
1395         goto exec_done;
1396     }
1397 
1398     action->cb_data = cmd;
1399 
1400     /* 'cmd' may not be valid after this point if
1401      * services_action_async() returned TRUE
1402      *
1403      * Upstart and systemd both synchronously determine monitor/status
1404      * results and call action_complete (which may free 'cmd') if necessary.
1405      */
1406     if (services_action_async(action, action_complete)) {
1407         return TRUE;
1408     }
1409 
1410     cmd->exec_rc = action->rc;
1411     if(action->status != PCMK_LRM_OP_DONE) {
1412         cmd->lrmd_op_status = action->status;
1413     } else {
1414         cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
1415     }
1416     services_action_free(action);
1417     action = NULL;
1418 
1419   exec_done:
1420     cmd_finalize(cmd, rsc);
1421     return TRUE;
1422 }
1423 
1424 static gboolean
1425 lrmd_rsc_execute(lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1426 {
1427     lrmd_cmd_t *cmd = NULL;
1428 
1429     CRM_CHECK(rsc != NULL, return FALSE);
1430 
1431     if (rsc->active) {
1432         crm_trace("%s is still active", rsc->rsc_id);
1433         return TRUE;
1434     }
1435 
1436     if (rsc->pending_ops) {
1437         GList *first = rsc->pending_ops;
1438 
1439         cmd = first->data;
1440         if (cmd->delay_id) {
1441             crm_trace
1442                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1443                  cmd->rsc_id, cmd->action, cmd->start_delay);
1444             return TRUE;
1445         }
1446         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1447         g_list_free_1(first);
1448 
1449 #ifdef PCMK__TIME_USE_CGT
1450         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1451 #endif
1452         cmd->epoch_last_run = time(NULL);
1453     }
1454 
1455     if (!cmd) {
1456         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1457         return TRUE;
1458     }
1459 
1460     rsc->active = cmd;          /* only one op at a time for a rsc */
1461     if (cmd->interval_ms) {
1462         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1463     }
1464 
1465     log_execute(cmd);
1466 
1467     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1468         lrmd_rsc_execute_stonith(rsc, cmd);
1469     } else {
1470         lrmd_rsc_execute_service_lib(rsc, cmd);
1471     }
1472 
1473     return TRUE;
1474 }
1475 
1476 static gboolean
1477 lrmd_rsc_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1478 {
1479     return lrmd_rsc_execute(user_data);
1480 }
1481 
1482 void
1483 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1484 {
1485     GList *gIter = NULL;
1486     lrmd_rsc_t *rsc = data;
1487     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1488                                   pcmk__str_casei);
1489 
1490     gIter = rsc->pending_ops;
1491     while (gIter != NULL) {
1492         GList *next = gIter->next;
1493         lrmd_cmd_t *cmd = gIter->data;
1494 
1495         /* command was never executed */
1496         cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1497         cmd_finalize(cmd, NULL);
1498 
1499         gIter = next;
1500     }
1501     /* frees list, but not list elements. */
1502     g_list_free(rsc->pending_ops);
1503 
1504     gIter = rsc->recurring_ops;
1505     while (gIter != NULL) {
1506         GList *next = gIter->next;
1507         lrmd_cmd_t *cmd = gIter->data;
1508 
1509         if (is_stonith) {
1510             cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1511             /* If a stonith command is in-flight, just mark it as cancelled;
1512              * it is not safe to finalize/free the cmd until the stonith api
1513              * says it has either completed or timed out.
1514              */
1515             if (rsc->active != cmd) {
1516                 cmd_finalize(cmd, NULL);
1517             }
1518         } else {
1519             /* This command is already handed off to service library,
1520              * let service library cancel it and tell us via the callback
1521              * when it is cancelled. The rsc can be safely destroyed
1522              * even if we are waiting for the cancel result */
1523             services_action_cancel(rsc->rsc_id,
1524                                    normalize_action_name(rsc, cmd->action),
1525                                    cmd->interval_ms);
1526         }
1527 
1528         gIter = next;
1529     }
1530     /* frees list, but not list elements. */
1531     g_list_free(rsc->recurring_ops);
1532 
1533     free(rsc->rsc_id);
1534     free(rsc->class);
1535     free(rsc->provider);
1536     free(rsc->type);
1537     mainloop_destroy_trigger(rsc->work);
1538 
1539     free(rsc);
1540 }
1541 
1542 static int
1543 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1544                     xmlNode **reply)
1545 {
1546     int rc = pcmk_ok;
1547     const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
1548     const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
1549 
1550     if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
1551         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1552                 LRMD_MIN_PROTOCOL_VERSION, protocol_version);
1553         rc = -EPROTO;
1554     }
1555 
1556     if (crm_is_true(is_ipc_provider)) {
1557 #ifdef PCMK__COMPILE_REMOTE
1558         if ((client->remote != NULL) && client->remote->tls_handshake_complete) {
1559             // This is a remote connection from a cluster node's controller
1560             ipc_proxy_add_provider(client);
1561         } else {
1562             rc = -EACCES;
1563         }
1564 #else
1565         rc = -EPROTONOSUPPORT;
1566 #endif
1567     }
1568 
1569     *reply = create_lrmd_reply(__func__, rc, call_id);
1570     crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
1571     crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
1572     crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1573 
1574     return rc;
1575 }
1576 
1577 static int
1578 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1579 {
1580     int rc = pcmk_ok;
1581     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1582     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1583 
1584     if (dup &&
1585         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1586         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1587 
1588         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1589         free_rsc(rsc);
1590         return rc;
1591     }
1592 
1593     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1594     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1595     return rc;
1596 }
1597 
1598 static xmlNode *
1599 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1600 {
1601     int rc = pcmk_ok;
1602     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1603     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1604     xmlNode *reply = NULL;
1605     lrmd_rsc_t *rsc = NULL;
1606 
1607     if (rsc_id == NULL) {
1608         rc = -ENODEV;
1609     } else {
1610         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1611         if (rsc == NULL) {
1612             crm_info("Agent information for '%s' not in cache", rsc_id);
1613             rc = -ENODEV;
1614         }
1615     }
1616 
1617     reply = create_lrmd_reply(__func__, rc, call_id);
1618     if (rsc) {
1619         crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
1620         crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
1621         crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
1622         crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
1623     }
1624     return reply;
1625 }
1626 
1627 static int
1628 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1629                             xmlNode *request)
1630 {
1631     int rc = pcmk_ok;
1632     lrmd_rsc_t *rsc = NULL;
1633     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1634     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1635 
1636     if (!rsc_id) {
1637         return -ENODEV;
1638     }
1639 
1640     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1641     if (rsc == NULL) {
1642         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1643                  rsc_id);
1644         return pcmk_ok;
1645     }
1646 
1647     if (rsc->active) {
1648         /* let the caller know there are still active ops on this rsc to watch for */
1649         crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
1650                   rsc->active, rsc_id);
1651         rc = -EINPROGRESS;
1652     }
1653 
1654     g_hash_table_remove(rsc_list, rsc_id);
1655 
1656     return rc;
1657 }
1658 
1659 static int
1660 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1661 {
1662     lrmd_rsc_t *rsc = NULL;
1663     lrmd_cmd_t *cmd = NULL;
1664     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1665     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1666     int call_id;
1667 
1668     if (!rsc_id) {
1669         return -EINVAL;
1670     }
1671     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1672         crm_info("Resource '%s' not found (%d active resources)",
1673                  rsc_id, g_hash_table_size(rsc_list));
1674         return -ENODEV;
1675     }
1676 
1677     cmd = create_lrmd_cmd(request, client);
1678     call_id = cmd->call_id;
1679 
1680     /* Don't reference cmd after handing it off to be scheduled.
1681      * The cmd could get merged and freed. */
1682     schedule_lrmd_cmd(rsc, cmd);
1683 
1684     return call_id;
1685 }
1686 
1687 static int
1688 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1689 {
1690     GList *gIter = NULL;
1691     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1692 
1693     /* How to cancel an action.
1694      * 1. Check pending ops list, if it hasn't been handed off
1695      *    to the service library or stonith recurring list remove
1696      *    it there and that will stop it.
1697      * 2. If it isn't in the pending ops list, then it's either a
1698      *    recurring op in the stonith recurring list, or the service
1699      *    library's recurring list.  Stop it there
1700      * 3. If not found in any lists, then this operation has either
1701      *    been executed already and is not a recurring operation, or
1702      *    never existed.
1703      */
1704     if (!rsc) {
1705         return -ENODEV;
1706     }
1707 
1708     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1709         lrmd_cmd_t *cmd = gIter->data;
1710 
1711         if (action_matches(cmd, action, interval_ms)) {
1712             cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1713             cmd_finalize(cmd, rsc);
1714             return pcmk_ok;
1715         }
1716     }
1717 
1718     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1719         /* The service library does not handle stonith operations.
1720          * We have to handle recurring stonith operations ourselves. */
1721         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1722             lrmd_cmd_t *cmd = gIter->data;
1723 
1724             if (action_matches(cmd, action, interval_ms)) {
1725                 cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1726                 if (rsc->active != cmd) {
1727                     cmd_finalize(cmd, rsc);
1728                 }
1729                 return pcmk_ok;
1730             }
1731         }
1732     } else if (services_action_cancel(rsc_id,
1733                                       normalize_action_name(rsc, action),
1734                                       interval_ms) == TRUE) {
1735         /* The service library will tell the action_complete callback function
1736          * this action was cancelled, which will destroy the cmd and remove
1737          * it from the recurring_op list. Do not do that in this function
1738          * if the service library says it cancelled it. */
1739         return pcmk_ok;
1740     }
1741 
1742     return -EOPNOTSUPP;
1743 }
1744 
1745 static void
1746 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1747 {
1748     GList *cmd_list = NULL;
1749     GList *cmd_iter = NULL;
1750 
1751     /* Notice a copy of each list is created when concat is called.
1752      * This prevents odd behavior from occurring when the cmd_list
1753      * is iterated through later on.  It is possible the cancel_op
1754      * function may end up modifying the recurring_ops and pending_ops
1755      * lists.  If we did not copy those lists, our cmd_list iteration
1756      * could get messed up.*/
1757     if (rsc->recurring_ops) {
1758         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1759     }
1760     if (rsc->pending_ops) {
1761         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1762     }
1763     if (!cmd_list) {
1764         return;
1765     }
1766 
1767     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1768         lrmd_cmd_t *cmd = cmd_iter->data;
1769 
1770         if (cmd->interval_ms == 0) {
1771             continue;
1772         }
1773 
1774         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1775             continue;
1776         }
1777 
1778         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1779     }
1780     /* frees only the copied list data, not the cmds */
1781     g_list_free(cmd_list);
1782 }
1783 
1784 static int
1785 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1786 {
1787     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1788     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1789     const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
1790     guint interval_ms = 0;
1791 
1792     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
1793 
1794     if (!rsc_id || !action) {
1795         return -EINVAL;
1796     }
1797 
1798     return cancel_op(rsc_id, action, interval_ms);
1799 }
1800 
1801 static void
1802 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1803 {
1804     xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
1805 
1806     crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
1807     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1808         lrmd_cmd_t *cmd = item->data;
1809         xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
1810 
1811         crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
1812                     (cmd->real_action? cmd->real_action : cmd->action));
1813         crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
1814         crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
1815     }
1816 }
1817 
1818 static xmlNode *
1819 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1820 {
1821     int rc = pcmk_ok;
1822     const char *rsc_id = NULL;
1823     lrmd_rsc_t *rsc = NULL;
1824     xmlNode *reply = NULL;
1825     xmlNode *rsc_xml = NULL;
1826 
1827     // Resource ID is optional
1828     rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
1829     if (rsc_xml) {
1830         rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
1831     }
1832     if (rsc_xml) {
1833         rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1834     }
1835 
1836     // If resource ID is specified, resource must exist
1837     if (rsc_id != NULL) {
1838         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1839         if (rsc == NULL) {
1840             crm_info("Resource '%s' not found (%d active resources)",
1841                      rsc_id, g_hash_table_size(rsc_list));
1842             rc = -ENODEV;
1843         }
1844     }
1845 
1846     reply = create_lrmd_reply(__func__, rc, call_id);
1847 
1848     // If resource ID is not specified, check all resources
1849     if (rsc_id == NULL) {
1850         GHashTableIter iter;
1851         char *key = NULL;
1852 
1853         g_hash_table_iter_init(&iter, rsc_list);
1854         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1855                                       (gpointer *) &rsc)) {
1856             add_recurring_op_xml(reply, rsc);
1857         }
1858     } else if (rsc) {
1859         add_recurring_op_xml(reply, rsc);
1860     }
1861     return reply;
1862 }
1863 
1864 void
1865 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1866 {
1867     int rc = pcmk_ok;
1868     int call_id = 0;
1869     const char *op = crm_element_value(request, F_LRMD_OPERATION);
1870     int do_reply = 0;
1871     int do_notify = 0;
1872     xmlNode *reply = NULL;
1873 
1874     /* Certain IPC commands may be done only by privileged users (i.e. root or
1875      * hacluster), because they would otherwise provide a means of bypassing
1876      * ACLs.
1877      */
1878     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1879 
1880     crm_trace("Processing %s operation from %s", op, client->id);
1881     crm_element_value_int(request, F_LRMD_CALLID, &call_id);
1882 
1883     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1884 #ifdef PCMK__COMPILE_REMOTE
1885         if (allowed) {
1886             ipc_proxy_forward_client(client, request);
1887         } else {
1888             rc = -EACCES;
1889         }
1890 #else
1891         rc = -EPROTONOSUPPORT;
1892 #endif
1893         do_reply = 1;
1894     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1895         rc = process_lrmd_signon(client, request, call_id, &reply);
1896         do_reply = 1;
1897     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1898         if (allowed) {
1899             rc = process_lrmd_rsc_register(client, id, request);
1900             do_notify = 1;
1901         } else {
1902             rc = -EACCES;
1903         }
1904         do_reply = 1;
1905     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1906         if (allowed) {
1907             reply = process_lrmd_get_rsc_info(request, call_id);
1908         } else {
1909             rc = -EACCES;
1910         }
1911         do_reply = 1;
1912     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1913         if (allowed) {
1914             rc = process_lrmd_rsc_unregister(client, id, request);
1915             /* don't notify anyone about failed un-registers */
1916             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1917                 do_notify = 1;
1918             }
1919         } else {
1920             rc = -EACCES;
1921         }
1922         do_reply = 1;
1923     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1924         if (allowed) {
1925             rc = process_lrmd_rsc_exec(client, id, request);
1926         } else {
1927             rc = -EACCES;
1928         }
1929         do_reply = 1;
1930     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1931         if (allowed) {
1932             rc = process_lrmd_rsc_cancel(client, id, request);
1933         } else {
1934             rc = -EACCES;
1935         }
1936         do_reply = 1;
1937     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1938         do_notify = 1;
1939         do_reply = 1;
1940     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1941         if (allowed) {
1942             xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
1943 
1944             CRM_LOG_ASSERT(data != NULL);
1945             pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
1946         } else {
1947             rc = -EACCES;
1948         }
1949     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1950         if (allowed) {
1951             rc = process_lrmd_alert_exec(client, id, request);
1952         } else {
1953             rc = -EACCES;
1954         }
1955         do_reply = 1;
1956     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1957         if (allowed) {
1958             reply = process_lrmd_get_recurring(request, call_id);
1959         } else {
1960             rc = -EACCES;
1961         }
1962         do_reply = 1;
1963     } else {
1964         rc = -EOPNOTSUPP;
1965         do_reply = 1;
1966         crm_err("Unknown IPC request '%s' from client %s",
1967                 op, pcmk__client_name(client));
1968     }
1969 
1970     if (rc == -EACCES) {
1971         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1972                  op, pcmk__client_name(client));
1973     }
1974 
1975     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1976               op, client->id, rc, do_reply, do_notify);
1977 
1978     if (do_reply) {
1979         int send_rc = pcmk_rc_ok;
1980 
1981         if (reply == NULL) {
1982             reply = create_lrmd_reply(__func__, rc, call_id);
1983         }
1984         send_rc = lrmd_server_send_reply(client, id, reply);
1985         free_xml(reply);
1986         if (send_rc != pcmk_rc_ok) {
1987             crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d",
1988                      pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc);
1989         }
1990     }
1991 
1992     if (do_notify) {
1993         send_generic_notify(rc, request);
1994     }
1995 }

/* [previous][next][first][last][top][bottom][index][help] */