root/daemons/execd/execd_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. time_is_set
  2. get_current_time
  3. time_diff_ms
  4. cmd_original_times
  5. log_finished
  6. log_execute
  7. normalize_action_name
  8. build_rsc_from_xml
  9. create_lrmd_cmd
  10. stop_recurring_timer
  11. free_lrmd_cmd
  12. stonith_recurring_op_helper
  13. start_recurring_timer
  14. start_delay_helper
  15. merge_recurring_duplicate
  16. schedule_lrmd_cmd
  17. create_lrmd_reply
  18. send_client_notify
  19. send_cmd_complete_notify
  20. send_generic_notify
  21. cmd_reset
  22. cmd_finalize
  23. ocf2uniform_rc
  24. stonith2uniform_rc
  25. nagios2uniform_rc
  26. get_uniform_rc
  27. action_get_uniform_rc
  28. notify_one_client
  29. notify_of_new_client
  30. parse_exit_reason
  31. client_disconnect_cleanup
  32. action_complete
  33. stonith_rc2status
  34. stonith_action_complete
  35. lrmd_stonith_callback
  36. stonith_connection_failed
  37. execd_stonith_start
  38. execd_stonith_stop
  39. execd_stonith_monitor
  40. lrmd_rsc_execute_stonith
  41. lrmd_rsc_execute_service_lib
  42. lrmd_rsc_execute
  43. lrmd_rsc_dispatch
  44. free_rsc
  45. process_lrmd_signon
  46. process_lrmd_rsc_register
  47. process_lrmd_get_rsc_info
  48. process_lrmd_rsc_unregister
  49. process_lrmd_rsc_exec
  50. cancel_op
  51. cancel_all_recurring
  52. process_lrmd_rsc_cancel
  53. add_recurring_op_xml
  54. process_lrmd_get_recurring
  55. process_lrmd_message

   1 /*
   2  * Copyright 2012-2020 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <glib.h>
  13 
  14 // Check whether we have a high-resolution monotonic clock
  15 #undef PCMK__TIME_USE_CGT
  16 #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC)
  17 #  define PCMK__TIME_USE_CGT
  18 #  include <time.h>  /* clock_gettime */
  19 #endif
  20 
  21 #include <unistd.h>
  22 
  23 #include <crm/crm.h>
  24 #include <crm/services.h>
  25 #include <crm/common/mainloop.h>
  26 #include <crm/common/ipc.h>
  27 #include <crm/common/ipc_internal.h>
  28 #include <crm/msg_xml.h>
  29 
  30 #include "pacemaker-execd.h"
  31 
  32 #define EXIT_REASON_MAX_LEN 128
  33 
  34 GHashTable *rsc_list = NULL;
  35 
  36 typedef struct lrmd_cmd_s {
  37     int timeout;
  38     guint interval_ms;
  39     int start_delay;
  40     int timeout_orig;
  41 
  42     int call_id;
  43     int exec_rc;
  44     int lrmd_op_status;
  45 
  46     int call_opts;
  47     /* Timer ids, must be removed on cmd destruction. */
  48     int delay_id;
  49     int stonith_recurring_id;
  50 
  51     int rsc_deleted;
  52 
  53     int service_flags;
  54 
  55     char *client_id;
  56     char *origin;
  57     char *rsc_id;
  58     char *action;
  59     char *real_action;
  60     char *exit_reason;
  61     char *output;
  62     char *userdata_str;
  63 
  64     /* We can track operation queue time and run time, to be saved with the CIB
  65      * resource history (and displayed in cluster status). We need
  66      * high-resolution monotonic time for this purpose, so we use
  67      * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature
  68      * is disabled).
  69      *
  70      * However, we also need epoch timestamps for recording the time the command
  71      * last ran and the time its return value last changed, for use in time
  72      * displays (as opposed to interval calculations). We keep time_t values for
  73      * this purpose.
  74      *
  75      * The last run time is used for both purposes, so we keep redundant
  76      * monotonic and epoch values for this. Technically the two could represent
  77      * different times, but since time_t has only second resolution and the
  78      * values are used for distinct purposes, that is not significant.
  79      */
  80 #ifdef PCMK__TIME_USE_CGT
  81     /* Recurring and systemd operations may involve more than one executor
  82      * command per operation, so they need info about the original and the most
  83      * recent.
  84      */
  85     struct timespec t_first_run;    // When op first ran
  86     struct timespec t_run;          // When op most recently ran
  87     struct timespec t_first_queue;  // When op was first queued
  88     struct timespec t_queue;        // When op was most recently queued
  89 #endif
  90     time_t epoch_last_run;          // Epoch timestamp of when op last ran
  91     time_t epoch_rcchange;          // Epoch timestamp of when rc last changed
  92 
  93     int first_notify_sent;
  94     int last_notify_rc;
  95     int last_notify_op_status;
  96     int last_pid;
  97 
  98     GHashTable *params;
  99 } lrmd_cmd_t;
 100 
 101 static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc);
 102 static gboolean lrmd_rsc_dispatch(gpointer user_data);
 103 static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id);
 104 
 105 #ifdef PCMK__TIME_USE_CGT
 106 
 107 /*!
 108  * \internal
 109  * \brief Check whether a struct timespec has been set
 110  *
 111  * \param[in] timespec  Time to check
 112  *
 113  * \return true if timespec has been set (i.e. is nonzero), false otherwise
 114  */
 115 static inline bool
 116 time_is_set(struct timespec *timespec)
     /* [previous][next][first][last][top][bottom][index][help] */
 117 {
 118     return (timespec != NULL) &&
 119            ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0));
 120 }
 121 
 122 /*
 123  * \internal
 124  * \brief Set a timespec (and its original if unset) to the current time
 125  *
 126  * \param[out] t_current  Where to store current time
 127  * \param[out] t_orig     Where to copy t_current if unset
 128  */
 129 static void
 130 get_current_time(struct timespec *t_current, struct timespec *t_orig)
     /* [previous][next][first][last][top][bottom][index][help] */
 131 {
 132     clock_gettime(CLOCK_MONOTONIC, t_current);
 133     if ((t_orig != NULL) && !time_is_set(t_orig)) {
 134         *t_orig = *t_current;
 135     }
 136 }
 137 
 138 /*!
 139  * \internal
 140  * \brief Return difference between two times in milliseconds
 141  *
 142  * \param[in] now  More recent time (or NULL to use current time)
 143  * \param[in] old  Earlier time
 144  *
 145  * \return milliseconds difference (or 0 if old is NULL or unset)
 146  *
 147  * \note Can overflow on 32bit machines when the differences is around
 148  *       24 days or more.
 149  */
 150 static int
 151 time_diff_ms(struct timespec *now, struct timespec *old)
     /* [previous][next][first][last][top][bottom][index][help] */
 152 {
 153     int diff_ms = 0;
 154 
 155     if (time_is_set(old)) {
 156         struct timespec local_now = { 0, };
 157 
 158         if (now == NULL) {
 159             clock_gettime(CLOCK_MONOTONIC, &local_now);
 160             now = &local_now;
 161         }
 162         diff_ms = (now->tv_sec - old->tv_sec) * 1000
 163                   + (now->tv_nsec - old->tv_nsec) / 1000000;
 164     }
 165     return diff_ms;
 166 }
 167 
 168 /*!
 169  * \internal
 170  * \brief Reset a command's operation times to their original values.
 171  *
 172  * Reset a command's run and queued timestamps to the timestamps of the original
 173  * command, so we report the entire time since then and not just the time since
 174  * the most recent command (for recurring and systemd operations).
 175  *
 176  * \param[in] cmd  Executor command object to reset
 177  *
 178  * \note It's not obvious what the queued time should be for a systemd
 179  *       start/stop operation, which might go like this:
 180  *         initial command queued 5ms, runs 3s
 181  *         monitor command queued 10ms, runs 10s
 182  *         monitor command queued 10ms, runs 10s
 183  *       Is the queued time for that operation 5ms, 10ms or 25ms? The current
 184  *       implementation will report 5ms. If it's 25ms, then we need to
 185  *       subtract 20ms from the total exec time so as not to count it twice.
 186  *       We can implement that later if it matters to anyone ...
 187  */
 188 static void
 189 cmd_original_times(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 190 {
 191     cmd->t_run = cmd->t_first_run;
 192     cmd->t_queue = cmd->t_first_queue;
 193 }
 194 #endif
 195 
 196 static void
 197 log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199     char pid_str[32] = { 0, };
 200     int log_level = LOG_INFO;
 201 
 202     if (cmd->last_pid) {
 203         snprintf(pid_str, 32, "%d", cmd->last_pid);
 204     }
 205 
 206     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 207         log_level = LOG_DEBUG;
 208     }
 209 #ifdef PCMK__TIME_USE_CGT
 210     do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
 211                " (execution time %dms, queue time %dms)",
 212                cmd->rsc_id, cmd->action, cmd->call_id,
 213                (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc,
 214                exec_time, queue_time);
 215 #else
 216     do_crm_log(log_level, "%s %s (call %d%s%s) exited with status %d"
 217                cmd->rsc_id, cmd->action, cmd->call_id,
 218                (cmd->last_pid? ", PID " : ""), pid_str, cmd->exec_rc);
 219 #endif
 220 }
 221 
 222 static void
 223 log_execute(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 224 {
 225     int log_level = LOG_INFO;
 226 
 227     if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 228         log_level = LOG_DEBUG;
 229     }
 230 
 231     do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d",
 232                cmd->rsc_id, cmd->action, cmd->call_id);
 233 }
 234 
 235 static const char *
 236 normalize_action_name(lrmd_rsc_t * rsc, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 237 {
 238     if (pcmk__str_eq(action, "monitor", pcmk__str_casei) &&
 239         pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) {
 240         return "status";
 241     }
 242     return action;
 243 }
 244 
 245 static lrmd_rsc_t *
 246 build_rsc_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 249     lrmd_rsc_t *rsc = NULL;
 250 
 251     rsc = calloc(1, sizeof(lrmd_rsc_t));
 252 
 253     crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts);
 254 
 255     rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 256     rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS);
 257     rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER);
 258     rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE);
 259     rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc);
 260     rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running"
 261     return rsc;
 262 }
 263 
 264 static lrmd_cmd_t *
 265 create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client)
     /* [previous][next][first][last][top][bottom][index][help] */
 266 {
 267     int call_options = 0;
 268     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR);
 269     lrmd_cmd_t *cmd = NULL;
 270 
 271     cmd = calloc(1, sizeof(lrmd_cmd_t));
 272 
 273     crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options);
 274     cmd->call_opts = call_options;
 275     cmd->client_id = strdup(client->id);
 276 
 277     crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id);
 278     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms);
 279     crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout);
 280     crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay);
 281     cmd->timeout_orig = cmd->timeout;
 282 
 283     cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN);
 284     cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION);
 285     cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR);
 286     cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID);
 287 
 288     cmd->params = xml2list(rsc_xml);
 289 
 290     if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) {
 291         crm_debug("Setting flag to leave pid group on timeout and "
 292                   "only kill action pid for " PCMK__OP_FMT,
 293                   cmd->rsc_id, cmd->action, cmd->interval_ms);
 294         cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__,
 295                                                 LOG_TRACE, "Action",
 296                                                 cmd->action, 0,
 297                                                 SVC_ACTION_LEAVE_GROUP,
 298                                                 "SVC_ACTION_LEAVE_GROUP");
 299     }
 300     return cmd;
 301 }
 302 
 303 static void
 304 stop_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     if (cmd) {
 307         if (cmd->stonith_recurring_id) {
 308             g_source_remove(cmd->stonith_recurring_id);
 309         }
 310         cmd->stonith_recurring_id = 0;
 311     }
 312 }
 313 
 314 static void
 315 free_lrmd_cmd(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 316 {
 317     stop_recurring_timer(cmd);
 318     if (cmd->delay_id) {
 319         g_source_remove(cmd->delay_id);
 320     }
 321     if (cmd->params) {
 322         g_hash_table_destroy(cmd->params);
 323     }
 324     free(cmd->origin);
 325     free(cmd->action);
 326     free(cmd->real_action);
 327     free(cmd->userdata_str);
 328     free(cmd->rsc_id);
 329     free(cmd->output);
 330     free(cmd->exit_reason);
 331     free(cmd->client_id);
 332     free(cmd);
 333 }
 334 
 335 static gboolean
 336 stonith_recurring_op_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338     lrmd_cmd_t *cmd = data;
 339     lrmd_rsc_t *rsc;
 340 
 341     cmd->stonith_recurring_id = 0;
 342 
 343     if (!cmd->rsc_id) {
 344         return FALSE;
 345     }
 346 
 347     rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
 348 
 349     CRM_ASSERT(rsc != NULL);
 350     /* take it out of recurring_ops list, and put it in the pending ops
 351      * to be executed */
 352     rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 353     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 354 #ifdef PCMK__TIME_USE_CGT
 355     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 356 #endif
 357     mainloop_set_trigger(rsc->work);
 358 
 359     return FALSE;
 360 }
 361 
 362 static inline void
 363 start_recurring_timer(lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 364 {
 365     if (cmd && (cmd->interval_ms > 0)) {
 366         cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms,
 367                                                   stonith_recurring_op_helper,
 368                                                   cmd);
 369     }
 370 }
 371 
 372 static gboolean
 373 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375     lrmd_cmd_t *cmd = data;
 376     lrmd_rsc_t *rsc = NULL;
 377 
 378     cmd->delay_id = 0;
 379     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 380 
 381     if (rsc) {
 382         mainloop_set_trigger(rsc->work);
 383     }
 384 
 385     return FALSE;
 386 }
 387 
 388 static gboolean
 389 merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 390 {
 391     GListPtr gIter = NULL;
 392     lrmd_cmd_t * dup = NULL;
 393     gboolean dup_pending = FALSE;
 394 
 395     if (cmd->interval_ms == 0) {
 396         return 0;
 397     }
 398 
 399     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
 400         dup = gIter->data;
 401         if (pcmk__str_eq(cmd->action, dup->action, pcmk__str_casei)
 402             && (cmd->interval_ms == dup->interval_ms)) {
 403             dup_pending = TRUE;
 404             goto merge_dup;
 405         }
 406     }
 407 
 408     /* if dup is in recurring_ops list, that means it has already executed
 409      * and is in the interval loop. we can't just remove it in this case. */
 410     for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
 411         dup = gIter->data;
 412         if (pcmk__str_eq(cmd->action, dup->action, pcmk__str_casei)
 413             && (cmd->interval_ms == dup->interval_ms)) {
 414             goto merge_dup;
 415         }
 416     }
 417 
 418     return FALSE;
 419 merge_dup:
 420 
 421 
 422     /* This should not occur. If it does, we need to investigate how something
 423      * like this is possible in the controller.
 424      */
 425     crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT
 426              "), merging with previous op entry",
 427              rsc->rsc_id, normalize_action_name(rsc, dup->action),
 428              dup->interval_ms);
 429 
 430     /* merge */
 431     dup->first_notify_sent = 0;
 432     free(dup->userdata_str);
 433     dup->userdata_str = cmd->userdata_str;
 434     cmd->userdata_str = NULL;
 435     dup->call_id = cmd->call_id;
 436 
 437     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 438         /* if we are waiting for the next interval, kick it off now */
 439         if (dup_pending == TRUE) {
 440             stop_recurring_timer(cmd);
 441             stonith_recurring_op_helper(cmd);
 442         }
 443 
 444     } else if (dup_pending == FALSE) {
 445         /* if we've already handed this to the service lib, kick off an early execution */
 446         services_action_kick(rsc->rsc_id,
 447                              normalize_action_name(rsc, dup->action),
 448                              dup->interval_ms);
 449     }
 450     free_lrmd_cmd(cmd);
 451 
 452     return TRUE;
 453 }
 454 
 455 static void
 456 schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 457 {
 458     gboolean dup_processed = FALSE;
 459     CRM_CHECK(cmd != NULL, return);
 460     CRM_CHECK(rsc != NULL, return);
 461 
 462     crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id);
 463 
 464     dup_processed = merge_recurring_duplicate(rsc, cmd);
 465     if (dup_processed) {
 466         /* duplicate recurring cmd found, cmds merged */
 467         return;
 468     }
 469 
 470     /* The controller expects the executor to automatically cancel
 471      * recurring operations before a resource stops.
 472      */
 473     if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 474         cancel_all_recurring(rsc, NULL);
 475     }
 476 
 477     rsc->pending_ops = g_list_append(rsc->pending_ops, cmd);
 478 #ifdef PCMK__TIME_USE_CGT
 479     get_current_time(&(cmd->t_queue), &(cmd->t_first_queue));
 480 #endif
 481     mainloop_set_trigger(rsc->work);
 482 
 483     if (cmd->start_delay) {
 484         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
 485     }
 486 }
 487 
 488 static xmlNode *
 489 create_lrmd_reply(const char *origin, int rc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 490 {
 491     xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY);
 492 
 493     crm_xml_add(reply, F_LRMD_ORIGIN, origin);
 494     crm_xml_add_int(reply, F_LRMD_RC, rc);
 495     crm_xml_add_int(reply, F_LRMD_CALLID, call_id);
 496     return reply;
 497 }
 498 
 499 static void
 500 send_client_notify(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502     xmlNode *update_msg = user_data;
 503     pcmk__client_t *client = value;
 504     int rc;
 505     int log_level = LOG_WARNING;
 506     const char *msg = NULL;
 507 
 508     CRM_CHECK(client != NULL, return);
 509     if (client->name == NULL) {
 510         crm_trace("Skipping notification to client without name");
 511         return;
 512     }
 513     if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) {
 514         /* We only want to notify clients of the executor IPC API. If we are
 515          * running as Pacemaker Remote, we may have clients proxied to other
 516          * IPC services in the cluster, so skip those.
 517          */
 518         crm_trace("Skipping executor API notification to %s IPC client",
 519                   client->name);
 520         return;
 521     }
 522 
 523     rc = lrmd_server_send_notify(client, update_msg);
 524     if (rc == pcmk_rc_ok) {
 525         return;
 526     }
 527 
 528     switch (rc) {
 529         case ENOTCONN:
 530         case EPIPE: // Client exited without waiting for notification
 531             log_level = LOG_INFO;
 532             msg = "Disconnected";
 533             break;
 534 
 535         default:
 536             msg = pcmk_rc_str(rc);
 537             break;
 538     }
 539     do_crm_log(log_level,
 540                "Could not notify client %s/%s: %s " CRM_XS " rc=%d",
 541                client->name, client->id, msg, rc);
 542 }
 543 
 544 static void
 545 send_cmd_complete_notify(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 546 {
 547     xmlNode *notify = NULL;
 548 
 549 #ifdef PCMK__TIME_USE_CGT
 550     int exec_time = time_diff_ms(NULL, &(cmd->t_run));
 551     int queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue));
 552 
 553     log_finished(cmd, exec_time, queue_time);
 554 #else
 555     log_finished(cmd, 0, 0);
 556 #endif
 557 
 558     /* if the first notify result for a cmd has already been sent earlier, and the
 559      * the option to only send notifies on result changes is set. Check to see
 560      * if the last result is the same as the new one. If so, suppress this update */
 561     if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) {
 562         if (cmd->last_notify_rc == cmd->exec_rc &&
 563             cmd->last_notify_op_status == cmd->lrmd_op_status) {
 564 
 565             /* only send changes */
 566             return;
 567         }
 568 
 569     }
 570 
 571     cmd->first_notify_sent = 1;
 572     cmd->last_notify_rc = cmd->exec_rc;
 573     cmd->last_notify_op_status = cmd->lrmd_op_status;
 574 
 575     notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 576 
 577     crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 578     crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout);
 579     crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
 580     crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay);
 581     crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc);
 582     crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status);
 583     crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id);
 584     crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted);
 585 
 586     crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME,
 587                    (long long) cmd->epoch_last_run);
 588     crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME,
 589                    (long long) cmd->epoch_rcchange);
 590 #ifdef PCMK__TIME_USE_CGT
 591     crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time);
 592     crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time);
 593 #endif
 594 
 595     crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC);
 596     crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id);
 597     if(cmd->real_action) {
 598         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action);
 599     } else {
 600         crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action);
 601     }
 602     crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str);
 603     crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output);
 604     crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason);
 605 
 606     if (cmd->params) {
 607         char *key = NULL;
 608         char *value = NULL;
 609         GHashTableIter iter;
 610 
 611         xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS);
 612 
 613         g_hash_table_iter_init(&iter, cmd->params);
 614         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 615             hash2smartfield((gpointer) key, (gpointer) value, args);
 616         }
 617     }
 618     if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) {
 619         pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id);
 620 
 621         if (client) {
 622             send_client_notify(client->id, client, notify);
 623         }
 624     } else {
 625         pcmk__foreach_ipc_client(send_client_notify, notify);
 626     }
 627 
 628     free_xml(notify);
 629 }
 630 
 631 static void
 632 send_generic_notify(int rc, xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
 633 {
 634     if (pcmk__ipc_client_count() != 0) {
 635         int call_id = 0;
 636         xmlNode *notify = NULL;
 637         xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
 638         const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
 639         const char *op = crm_element_value(request, F_LRMD_OPERATION);
 640 
 641         crm_element_value_int(request, F_LRMD_CALLID, &call_id);
 642 
 643         notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 644         crm_xml_add(notify, F_LRMD_ORIGIN, __func__);
 645         crm_xml_add_int(notify, F_LRMD_RC, rc);
 646         crm_xml_add_int(notify, F_LRMD_CALLID, call_id);
 647         crm_xml_add(notify, F_LRMD_OPERATION, op);
 648         crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id);
 649 
 650         pcmk__foreach_ipc_client(send_client_notify, notify);
 651 
 652         free_xml(notify);
 653     }
 654 }
 655 
 656 static void
 657 cmd_reset(lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 658 {
 659     cmd->lrmd_op_status = 0;
 660     cmd->last_pid = 0;
 661 #ifdef PCMK__TIME_USE_CGT
 662     memset(&cmd->t_run, 0, sizeof(cmd->t_run));
 663     memset(&cmd->t_queue, 0, sizeof(cmd->t_queue));
 664 #endif
 665     cmd->epoch_last_run = 0;
 666     free(cmd->exit_reason);
 667     cmd->exit_reason = NULL;
 668     free(cmd->output);
 669     cmd->output = NULL;
 670 }
 671 
 672 static void
 673 cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 674 {
 675     crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action,
 676               rsc ? rsc->active : NULL, cmd);
 677 
 678     if (rsc && (rsc->active == cmd)) {
 679         rsc->active = NULL;
 680         mainloop_set_trigger(rsc->work);
 681     }
 682 
 683     if (!rsc) {
 684         cmd->rsc_deleted = 1;
 685     }
 686 
 687     /* reset original timeout so client notification has correct information */
 688     cmd->timeout = cmd->timeout_orig;
 689 
 690     send_cmd_complete_notify(cmd);
 691 
 692     if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) {
 693         if (rsc) {
 694             rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd);
 695             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 696         }
 697         free_lrmd_cmd(cmd);
 698     } else if (cmd->interval_ms == 0) {
 699         if (rsc) {
 700             rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd);
 701         }
 702         free_lrmd_cmd(cmd);
 703     } else {
 704         /* Clear all the values pertaining just to the last iteration of a recurring op. */
 705         cmd_reset(cmd);
 706     }
 707 }
 708 
 709 static int
 710 ocf2uniform_rc(int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712     switch (rc) {
 713         case PCMK_OCF_DEGRADED:
 714         case PCMK_OCF_DEGRADED_MASTER:
 715             break;
 716         default:
 717             if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER)
 718                 return PCMK_OCF_UNKNOWN_ERROR;
 719     }
 720 
 721     return rc;
 722 }
 723 
 724 static int
 725 stonith2uniform_rc(const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 726 {
 727     switch (rc) {
 728         case pcmk_ok:
 729             rc = PCMK_OCF_OK;
 730             break;
 731 
 732         case -ENODEV:
 733             /* This should be possible only for probes in practice, but
 734              * interpret for all actions to be safe.
 735              */
 736             if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) {
 737                 rc = PCMK_OCF_NOT_RUNNING;
 738             } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) {
 739                 rc = PCMK_OCF_OK;
 740             } else {
 741                 rc = PCMK_OCF_NOT_INSTALLED;
 742             }
 743             break;
 744 
 745         case -EOPNOTSUPP:
 746             rc = PCMK_OCF_UNIMPLEMENT_FEATURE;
 747             break;
 748 
 749         case -ETIME:
 750         case -ETIMEDOUT:
 751             rc = PCMK_OCF_TIMEOUT;
 752             break;
 753 
 754         default:
 755             rc = PCMK_OCF_UNKNOWN_ERROR;
 756             break;
 757     }
 758     return rc;
 759 }
 760 
 761 #if SUPPORT_NAGIOS
 762 static int
 763 nagios2uniform_rc(const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 764 {
 765     if (rc < 0) {
 766         return PCMK_OCF_UNKNOWN_ERROR;
 767     }
 768 
 769     switch (rc) {
 770         case NAGIOS_STATE_OK:
 771             return PCMK_OCF_OK;
 772         case NAGIOS_INSUFFICIENT_PRIV:
 773             return PCMK_OCF_INSUFFICIENT_PRIV;
 774         case NAGIOS_NOT_INSTALLED:
 775             return PCMK_OCF_NOT_INSTALLED;
 776         case NAGIOS_STATE_WARNING:
 777         case NAGIOS_STATE_CRITICAL:
 778         case NAGIOS_STATE_UNKNOWN:
 779         case NAGIOS_STATE_DEPENDENT:
 780         default:
 781             return PCMK_OCF_UNKNOWN_ERROR;
 782     }
 783 
 784     return PCMK_OCF_UNKNOWN_ERROR;
 785 }
 786 #endif
 787 
 788 static int
 789 get_uniform_rc(const char *standard, const char *action, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791     if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
 792         return ocf2uniform_rc(rc);
 793     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
 794         return stonith2uniform_rc(action, rc);
 795     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 796         return rc;
 797     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_casei)) {
 798         return rc;
 799 #if SUPPORT_NAGIOS
 800     } else if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 801         return nagios2uniform_rc(action, rc);
 802 #endif
 803     } else {
 804         return services_get_ocf_exitcode(action, rc);
 805     }
 806 }
 807 
 808 static int
 809 action_get_uniform_rc(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 810 {
 811     lrmd_cmd_t *cmd = action->cb_data;
 812     return get_uniform_rc(action->standard, cmd->action, action->rc);
 813 }
 814 
 815 struct notify_new_client_data {
 816     xmlNode *notify;
 817     pcmk__client_t *new_client;
 818 };
 819 
 820 static void
 821 notify_one_client(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 822 {
 823     pcmk__client_t *client = value;
 824     struct notify_new_client_data *data = user_data;
 825 
 826     if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) {
 827         send_client_notify(key, (gpointer) client, (gpointer) data->notify);
 828     }
 829 }
 830 
 831 void
 832 notify_of_new_client(pcmk__client_t *new_client)
     /* [previous][next][first][last][top][bottom][index][help] */
 833 {
 834     struct notify_new_client_data data;
 835 
 836     data.new_client = new_client;
 837     data.notify = create_xml_node(NULL, T_LRMD_NOTIFY);
 838     crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__);
 839     crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT);
 840     pcmk__foreach_ipc_client(notify_one_client, &data);
 841     free_xml(data.notify);
 842 }
 843 
 844 static char *
 845 parse_exit_reason(const char *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 846 {
 847     const char *cur = NULL;
 848     const char *last = NULL;
 849     static int cookie_len = 0;
 850     char *eol = NULL;
 851     size_t reason_len = EXIT_REASON_MAX_LEN;
 852 
 853     if (output == NULL) {
 854         return NULL;
 855     }
 856 
 857     if (!cookie_len) {
 858         cookie_len = strlen(PCMK_OCF_REASON_PREFIX);
 859     }
 860 
 861     cur = strstr(output, PCMK_OCF_REASON_PREFIX);
 862     for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) {
 863         /* skip over the cookie delimiter string */
 864         cur += cookie_len;
 865         last = cur;
 866     }
 867     if (last == NULL) {
 868         return NULL;
 869     }
 870 
 871     // Truncate everything after a new line, and limit reason string size
 872     eol = strchr(last, '\n');
 873     if (eol) {
 874         reason_len = QB_MIN(reason_len, eol - last);
 875     }
 876     return strndup(last, reason_len);
 877 }
 878 
 879 void
 880 client_disconnect_cleanup(const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 881 {
 882     GHashTableIter iter;
 883     lrmd_rsc_t *rsc = NULL;
 884     char *key = NULL;
 885 
 886     g_hash_table_iter_init(&iter, rsc_list);
 887     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
 888         if (rsc->call_opts & lrmd_opt_drop_recurring) {
 889             /* This client is disconnecting, drop any recurring operations
 890              * it may have initiated on the resource */
 891             cancel_all_recurring(rsc, client_id);
 892         }
 893     }
 894 }
 895 
 896 static void
 897 action_complete(svc_action_t * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 898 {
 899     lrmd_rsc_t *rsc;
 900     lrmd_cmd_t *cmd = action->cb_data;
 901     const char *rclass = NULL;
 902 
 903 #ifdef PCMK__TIME_USE_CGT
 904     bool goagain = false;
 905 #endif
 906 
 907     if (!cmd) {
 908         crm_err("Completed executor action (%s) does not match any known operations",
 909                 action->id);
 910         return;
 911     }
 912 
 913 #ifdef PCMK__TIME_USE_CGT
 914     if (cmd->exec_rc != action->rc) {
 915         cmd->epoch_rcchange = time(NULL);
 916     }
 917 #endif
 918 
 919     cmd->last_pid = action->pid;
 920     cmd->exec_rc = action_get_uniform_rc(action);
 921     cmd->lrmd_op_status = action->status;
 922     rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL;
 923 
 924     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) {
 925         rclass = resources_find_service_class(rsc->type);
 926     } else if(rsc) {
 927         rclass = rsc->class;
 928     }
 929 
 930 #ifdef PCMK__TIME_USE_CGT
 931     if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) {
 932         if ((cmd->exec_rc == PCMK_OCF_OK)
 933             && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) {
 934             /* systemd returns from start and stop actions after the action
 935              * begins, not after it completes. We have to jump through a few
 936              * hoops so that we don't report 'complete' to the rest of pacemaker
 937              * until it's actually done.
 938              */
 939             goagain = true;
 940             cmd->real_action = cmd->action;
 941             cmd->action = strdup("monitor");
 942 
 943         } else if (cmd->real_action != NULL) {
 944             // This is follow-up monitor to check whether start/stop completed
 945             if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
 946                 && (cmd->exec_rc == PCMK_OCF_PENDING)) {
 947                 goagain = true;
 948 
 949             } else if ((cmd->exec_rc == PCMK_OCF_OK)
 950                        && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 951                 goagain = true;
 952 
 953             } else {
 954                 int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 955                 int timeout_left = cmd->timeout_orig - time_sum;
 956 
 957                 crm_debug("%s systemd %s is now complete (elapsed=%dms, "
 958                           "remaining=%dms): %s (%d)",
 959                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left,
 960                           services_ocf_exitcode_str(cmd->exec_rc),
 961                           cmd->exec_rc);
 962                 cmd_original_times(cmd);
 963 
 964                 // Monitors may return "not running", but start/stop shouldn't
 965                 if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE)
 966                     && (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) {
 967 
 968                     if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) {
 969                         cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR;
 970                     } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) {
 971                         cmd->exec_rc = PCMK_OCF_OK;
 972                     }
 973                 }
 974             }
 975         }
 976     }
 977 #endif
 978 
 979 #if SUPPORT_NAGIOS
 980     if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) {
 981         if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) &&
 982             (cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) {
 983             /* Successfully executed --version for the nagios plugin */
 984             cmd->exec_rc = PCMK_OCF_NOT_RUNNING;
 985 
 986         } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && cmd->exec_rc != PCMK_OCF_OK) {
 987 #ifdef PCMK__TIME_USE_CGT
 988             goagain = true;
 989 #endif
 990         }
 991     }
 992 #endif
 993 
 994 #ifdef PCMK__TIME_USE_CGT
 995     if (goagain) {
 996         int time_sum = time_diff_ms(NULL, &(cmd->t_first_run));
 997         int timeout_left = cmd->timeout_orig - time_sum;
 998         int delay = cmd->timeout_orig / 10;
 999 
1000         if(delay >= timeout_left && timeout_left > 20) {
1001             delay = timeout_left/2;
1002         }
1003 
1004         delay = QB_MIN(2000, delay);
1005         if (delay < timeout_left) {
1006             cmd->start_delay = delay;
1007             cmd->timeout = timeout_left;
1008 
1009             if(cmd->exec_rc == PCMK_OCF_OK) {
1010                 crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1011                           cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay);
1012 
1013             } else if(cmd->exec_rc == PCMK_OCF_PENDING) {
1014                 crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1015                          cmd->rsc_id, cmd->action, time_sum, timeout_left, delay);
1016 
1017             } else {
1018                 crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)",
1019                            cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay);
1020             }
1021 
1022             cmd_reset(cmd);
1023             if(rsc) {
1024                 rsc->active = NULL;
1025             }
1026             schedule_lrmd_cmd(rsc, cmd);
1027 
1028             /* Don't finalize cmd, we're not done with it yet */
1029             return;
1030 
1031         } else {
1032             crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)",
1033                        cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left);
1034             cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT;
1035             cmd->exec_rc = PCMK_OCF_TIMEOUT;
1036             cmd_original_times(cmd);
1037         }
1038     }
1039 #endif
1040 
1041     if (action->stderr_data) {
1042         cmd->output = strdup(action->stderr_data);
1043         cmd->exit_reason = parse_exit_reason(action->stderr_data);
1044 
1045     } else if (action->stdout_data) {
1046         cmd->output = strdup(action->stdout_data);
1047     }
1048 
1049     cmd_finalize(cmd, rsc);
1050 }
1051 
1052 /*!
1053  * \internal
1054  * \brief Determine operation status of a stonith operation
1055  *
1056  * Non-stonith resource operations get their operation status directly from the
1057  * service library, but the fencer does not have an equivalent, so we must infer
1058  * an operation status from the fencer API's return code.
1059  *
1060  * \param[in] action       Name of action performed on stonith resource
1061  * \param[in] interval_ms  Action interval
1062  * \param[in] rc           Action result from fencer
1063  *
1064  * \return Operation status corresponding to fencer API return code
1065  */
1066 static int
1067 stonith_rc2status(const char *action, guint interval_ms, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1068 {
1069     int status = PCMK_LRM_OP_DONE;
1070 
1071     switch (rc) {
1072         case pcmk_ok:
1073             break;
1074 
1075         case -EOPNOTSUPP:
1076         case -EPROTONOSUPPORT:
1077             status = PCMK_LRM_OP_NOTSUPPORTED;
1078             break;
1079 
1080         case -ETIME:
1081         case -ETIMEDOUT:
1082             status = PCMK_LRM_OP_TIMEOUT;
1083             break;
1084 
1085         case -ENOTCONN:
1086         case -ECOMM:
1087             // Couldn't talk to fencer
1088             status = PCMK_LRM_OP_ERROR;
1089             break;
1090 
1091         case -ENODEV:
1092             // The device is not registered with the fencer
1093             status = PCMK_LRM_OP_ERROR;
1094             break;
1095 
1096         default:
1097             break;
1098     }
1099     return status;
1100 }
1101 
1102 static void
1103 stonith_action_complete(lrmd_cmd_t * cmd, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
1104 {
1105     // This can be NULL if resource was removed before command completed
1106     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id);
1107 
1108     cmd->exec_rc = stonith2uniform_rc(cmd->action, rc);
1109 
1110     /* This function may be called with status already set to cancelled, if a
1111      * pending action was aborted. Otherwise, we need to determine status from
1112      * the fencer return code.
1113      */
1114     if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) {
1115         cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms,
1116                                                 rc);
1117 
1118         // Certain successful actions change the known state of the resource
1119         if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) {
1120             if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1121                 rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK
1122             } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1123                 rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING
1124             }
1125         }
1126     }
1127 
1128     /* The recurring timer should not be running at this point in any case, but
1129      * as a failsafe, stop it if it is.
1130      */
1131     stop_recurring_timer(cmd);
1132 
1133     /* Reschedule this command if appropriate. If a recurring command is *not*
1134      * rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will
1135      * not be removed from recurring_ops by cmd_finalize().
1136      */
1137     if (rsc && (cmd->interval_ms > 0)
1138         && (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) {
1139         start_recurring_timer(cmd);
1140     }
1141 
1142     cmd_finalize(cmd, rsc);
1143 }
1144 
1145 static void
1146 lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
1147 {
1148     stonith_action_complete(data->userdata, data->rc);
1149 }
1150 
1151 void
1152 stonith_connection_failed(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1153 {
1154     GHashTableIter iter;
1155     GList *cmd_list = NULL;
1156     GList *cmd_iter = NULL;
1157     lrmd_rsc_t *rsc = NULL;
1158     char *key = NULL;
1159 
1160     g_hash_table_iter_init(&iter, rsc_list);
1161     while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) {
1162         if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1163             /* If we registered this fence device, we don't know whether the
1164              * fencer still has the registration or not. Cause future probes to
1165              * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or
1166              * started successfully. This is especially important if the
1167              * controller also went away (possibly due to a cluster layer
1168              * restart) and won't receive our client notification of any
1169              * monitors finalized below.
1170              */
1171             if (rsc->st_probe_rc == pcmk_ok) {
1172                 rsc->st_probe_rc = pcmk_err_generic;
1173             }
1174 
1175             if (rsc->active) {
1176                 cmd_list = g_list_append(cmd_list, rsc->active);
1177             }
1178             if (rsc->recurring_ops) {
1179                 cmd_list = g_list_concat(cmd_list, rsc->recurring_ops);
1180             }
1181             if (rsc->pending_ops) {
1182                 cmd_list = g_list_concat(cmd_list, rsc->pending_ops);
1183             }
1184             rsc->pending_ops = rsc->recurring_ops = NULL;
1185         }
1186     }
1187 
1188     if (!cmd_list) {
1189         return;
1190     }
1191 
1192     crm_err("Connection to fencer failed, finalizing %d pending operations",
1193             g_list_length(cmd_list));
1194     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1195         stonith_action_complete(cmd_iter->data, -ENOTCONN);
1196     }
1197     g_list_free(cmd_list);
1198 }
1199 
1200 /*!
1201  * \internal
1202  * \brief Execute a stonith resource "start" action
1203  *
1204  * Start a stonith resource by registering it with the fencer.
1205  * (Stonith agents don't have a start command.)
1206  *
1207  * \param[in] stonith_api  Connection to fencer
1208  * \param[in] rsc          Stonith resource to start
1209  * \param[in] cmd          Start command to execute
1210  *
1211  * \return pcmk_ok on success, -errno otherwise
1212  */
1213 static int
1214 execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1215 {
1216     char *key = NULL;
1217     char *value = NULL;
1218     stonith_key_value_t *device_params = NULL;
1219     int rc = pcmk_ok;
1220 
1221     // Convert command parameters to stonith API key/values
1222     if (cmd->params) {
1223         GHashTableIter iter;
1224 
1225         g_hash_table_iter_init(&iter, cmd->params);
1226         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
1227             device_params = stonith_key_value_add(device_params, key, value);
1228         }
1229     }
1230 
1231     /* The fencer will automatically register devices via CIB notifications
1232      * when the CIB changes, but to avoid a possible race condition between
1233      * the fencer receiving the notification and the executor requesting that
1234      * resource, the executor registers the device as well. The fencer knows how
1235      * to handle duplicate registrations.
1236      */
1237     rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call,
1238                                             cmd->rsc_id, rsc->provider,
1239                                             rsc->type, device_params);
1240 
1241     stonith_key_value_freeall(device_params, 1, 1);
1242     return rc;
1243 }
1244 
1245 /*!
1246  * \internal
1247  * \brief Execute a stonith resource "stop" action
1248  *
1249  * Stop a stonith resource by unregistering it with the fencer.
1250  * (Stonith agents don't have a stop command.)
1251  *
1252  * \param[in] stonith_api  Connection to fencer
1253  * \param[in] rsc          Stonith resource to stop
1254  *
1255  * \return pcmk_ok on success, -errno otherwise
1256  */
1257 static inline int
1258 execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1259 {
1260     /* @TODO Failure would indicate a problem communicating with fencer;
1261      * perhaps we should try reconnecting and retrying a few times?
1262      */
1263     return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call,
1264                                             rsc->rsc_id);
1265 }
1266 
1267 /*!
1268  * \internal
1269  * \brief Initiate a stonith resource agent recurring "monitor" action
1270  *
1271  * \param[in] stonith_api  Connection to fencer
1272  * \param[in] rsc          Stonith resource to monitor
1273  * \param[in] cmd          Monitor command being executed
1274  *
1275  * \return pcmk_ok if monitor was successfully initiated, -errno otherwise
1276  */
1277 static inline int
1278 execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1279 {
1280     int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id,
1281                                         cmd->timeout / 1000);
1282 
1283     rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd,
1284                                               "lrmd_stonith_callback",
1285                                               lrmd_stonith_callback);
1286     if (rc == TRUE) {
1287         rsc->active = cmd;
1288         rc = pcmk_ok;
1289     } else {
1290         rc = -pcmk_err_generic;
1291     }
1292     return rc;
1293 }
1294 
1295 static void
1296 lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1297 {
1298     int rc = 0;
1299     bool do_monitor = FALSE;
1300 
1301     stonith_t *stonith_api = get_stonith_connection();
1302 
1303     if (!stonith_api) {
1304         rc = -ENOTCONN;
1305 
1306     } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
1307         rc = execd_stonith_start(stonith_api, rsc, cmd);
1308         if (rc == 0) {
1309             do_monitor = TRUE;
1310         }
1311 
1312     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1313         rc = execd_stonith_stop(stonith_api, rsc);
1314 
1315     } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1316         if (cmd->interval_ms > 0) {
1317             do_monitor = TRUE;
1318         } else {
1319             rc = rsc->st_probe_rc;
1320         }
1321     }
1322 
1323     if (do_monitor) {
1324         rc = execd_stonith_monitor(stonith_api, rsc, cmd);
1325         if (rc == pcmk_ok) {
1326             // Don't clean up yet, we will find out result of the monitor later
1327             return;
1328         }
1329     }
1330 
1331     stonith_action_complete(cmd, rc);
1332 }
1333 
1334 static int
1335 lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
1336 {
1337     svc_action_t *action = NULL;
1338     GHashTable *params_copy = NULL;
1339 
1340     CRM_ASSERT(rsc);
1341     CRM_ASSERT(cmd);
1342 
1343     crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s",
1344               rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type);
1345 
1346 #if SUPPORT_NAGIOS
1347     /* Recurring operations are cancelled anyway for a stop operation */
1348     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)
1349         && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
1350 
1351         cmd->exec_rc = PCMK_OCF_OK;
1352         goto exec_done;
1353     }
1354 #endif
1355 
1356     params_copy = crm_str_table_dup(cmd->params);
1357 
1358     action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider,
1359                                      rsc->type,
1360                                      normalize_action_name(rsc, cmd->action),
1361                                      cmd->interval_ms, cmd->timeout,
1362                                      params_copy, cmd->service_flags);
1363 
1364     if (!action) {
1365         crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id);
1366         cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
1367         goto exec_done;
1368     }
1369 
1370     action->cb_data = cmd;
1371 
1372     /* 'cmd' may not be valid after this point if
1373      * services_action_async() returned TRUE
1374      *
1375      * Upstart and systemd both synchronously determine monitor/status
1376      * results and call action_complete (which may free 'cmd') if necessary.
1377      */
1378     if (services_action_async(action, action_complete)) {
1379         return TRUE;
1380     }
1381 
1382     cmd->exec_rc = action->rc;
1383     if(action->status != PCMK_LRM_OP_DONE) {
1384         cmd->lrmd_op_status = action->status;
1385     } else {
1386         cmd->lrmd_op_status = PCMK_LRM_OP_ERROR;
1387     }
1388     services_action_free(action);
1389     action = NULL;
1390 
1391   exec_done:
1392     cmd_finalize(cmd, rsc);
1393     return TRUE;
1394 }
1395 
1396 static gboolean
1397 lrmd_rsc_execute(lrmd_rsc_t * rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1398 {
1399     lrmd_cmd_t *cmd = NULL;
1400 
1401     CRM_CHECK(rsc != NULL, return FALSE);
1402 
1403     if (rsc->active) {
1404         crm_trace("%s is still active", rsc->rsc_id);
1405         return TRUE;
1406     }
1407 
1408     if (rsc->pending_ops) {
1409         GList *first = rsc->pending_ops;
1410 
1411         cmd = first->data;
1412         if (cmd->delay_id) {
1413             crm_trace
1414                 ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms",
1415                  cmd->rsc_id, cmd->action, cmd->start_delay);
1416             return TRUE;
1417         }
1418         rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first);
1419         g_list_free_1(first);
1420 
1421 #ifdef PCMK__TIME_USE_CGT
1422         get_current_time(&(cmd->t_run), &(cmd->t_first_run));
1423 #endif
1424         cmd->epoch_last_run = time(NULL);
1425     }
1426 
1427     if (!cmd) {
1428         crm_trace("Nothing further to do for %s", rsc->rsc_id);
1429         return TRUE;
1430     }
1431 
1432     rsc->active = cmd;          /* only one op at a time for a rsc */
1433     if (cmd->interval_ms) {
1434         rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd);
1435     }
1436 
1437     log_execute(cmd);
1438 
1439     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1440         lrmd_rsc_execute_stonith(rsc, cmd);
1441     } else {
1442         lrmd_rsc_execute_service_lib(rsc, cmd);
1443     }
1444 
1445     return TRUE;
1446 }
1447 
1448 static gboolean
1449 lrmd_rsc_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1450 {
1451     return lrmd_rsc_execute(user_data);
1452 }
1453 
1454 void
1455 free_rsc(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1456 {
1457     GListPtr gIter = NULL;
1458     lrmd_rsc_t *rsc = data;
1459     int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH,
1460                                   pcmk__str_casei);
1461 
1462     gIter = rsc->pending_ops;
1463     while (gIter != NULL) {
1464         GListPtr next = gIter->next;
1465         lrmd_cmd_t *cmd = gIter->data;
1466 
1467         /* command was never executed */
1468         cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1469         cmd_finalize(cmd, NULL);
1470 
1471         gIter = next;
1472     }
1473     /* frees list, but not list elements. */
1474     g_list_free(rsc->pending_ops);
1475 
1476     gIter = rsc->recurring_ops;
1477     while (gIter != NULL) {
1478         GListPtr next = gIter->next;
1479         lrmd_cmd_t *cmd = gIter->data;
1480 
1481         if (is_stonith) {
1482             cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1483             /* If a stonith command is in-flight, just mark it as cancelled;
1484              * it is not safe to finalize/free the cmd until the stonith api
1485              * says it has either completed or timed out.
1486              */
1487             if (rsc->active != cmd) {
1488                 cmd_finalize(cmd, NULL);
1489             }
1490         } else {
1491             /* This command is already handed off to service library,
1492              * let service library cancel it and tell us via the callback
1493              * when it is cancelled. The rsc can be safely destroyed
1494              * even if we are waiting for the cancel result */
1495             services_action_cancel(rsc->rsc_id,
1496                                    normalize_action_name(rsc, cmd->action),
1497                                    cmd->interval_ms);
1498         }
1499 
1500         gIter = next;
1501     }
1502     /* frees list, but not list elements. */
1503     g_list_free(rsc->recurring_ops);
1504 
1505     free(rsc->rsc_id);
1506     free(rsc->class);
1507     free(rsc->provider);
1508     free(rsc->type);
1509     mainloop_destroy_trigger(rsc->work);
1510 
1511     free(rsc);
1512 }
1513 
1514 static int
1515 process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
     /* [previous][next][first][last][top][bottom][index][help] */
1516                     xmlNode **reply)
1517 {
1518     int rc = pcmk_ok;
1519     const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER);
1520     const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
1521 
1522     if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
1523         crm_err("Cluster API version must be greater than or equal to %s, not %s",
1524                 LRMD_MIN_PROTOCOL_VERSION, protocol_version);
1525         rc = -EPROTO;
1526     }
1527 
1528     if (crm_is_true(is_ipc_provider)) {
1529 #ifdef SUPPORT_REMOTE
1530         if ((client->remote != NULL) && client->remote->tls_handshake_complete) {
1531             // This is a remote connection from a cluster node's controller
1532             ipc_proxy_add_provider(client);
1533         } else {
1534             rc = -EACCES;
1535         }
1536 #else
1537         rc = -EPROTONOSUPPORT;
1538 #endif
1539     }
1540 
1541     *reply = create_lrmd_reply(__func__, rc, call_id);
1542     crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER);
1543     crm_xml_add(*reply, F_LRMD_CLIENTID, client->id);
1544     crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
1545 
1546     return rc;
1547 }
1548 
1549 static int
1550 process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1551 {
1552     int rc = pcmk_ok;
1553     lrmd_rsc_t *rsc = build_rsc_from_xml(request);
1554     lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id);
1555 
1556     if (dup &&
1557         pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) &&
1558         pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) {
1559 
1560         crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id);
1561         free_rsc(rsc);
1562         return rc;
1563     }
1564 
1565     g_hash_table_replace(rsc_list, rsc->rsc_id, rsc);
1566     crm_info("Cached agent information for '%s'", rsc->rsc_id);
1567     return rc;
1568 }
1569 
1570 static xmlNode *
1571 process_lrmd_get_rsc_info(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1572 {
1573     int rc = pcmk_ok;
1574     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1575     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1576     xmlNode *reply = NULL;
1577     lrmd_rsc_t *rsc = NULL;
1578 
1579     if (rsc_id == NULL) {
1580         rc = -ENODEV;
1581     } else {
1582         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1583         if (rsc == NULL) {
1584             crm_info("Agent information for '%s' not in cache", rsc_id);
1585             rc = -ENODEV;
1586         }
1587     }
1588 
1589     reply = create_lrmd_reply(__func__, rc, call_id);
1590     if (rsc) {
1591         crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id);
1592         crm_xml_add(reply, F_LRMD_CLASS, rsc->class);
1593         crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider);
1594         crm_xml_add(reply, F_LRMD_TYPE, rsc->type);
1595     }
1596     return reply;
1597 }
1598 
1599 static int
1600 process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id,
     /* [previous][next][first][last][top][bottom][index][help] */
1601                             xmlNode *request)
1602 {
1603     int rc = pcmk_ok;
1604     lrmd_rsc_t *rsc = NULL;
1605     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1606     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1607 
1608     if (!rsc_id) {
1609         return -ENODEV;
1610     }
1611 
1612     rsc = g_hash_table_lookup(rsc_list, rsc_id);
1613     if (rsc == NULL) {
1614         crm_info("Ignoring unregistration of resource '%s', which is not registered",
1615                  rsc_id);
1616         return pcmk_ok;
1617     }
1618 
1619     if (rsc->active) {
1620         /* let the caller know there are still active ops on this rsc to watch for */
1621         crm_trace("Operation (0x%p) still in progress for unregistered resource %s",
1622                   rsc->active, rsc_id);
1623         rc = -EINPROGRESS;
1624     }
1625 
1626     g_hash_table_remove(rsc_list, rsc_id);
1627 
1628     return rc;
1629 }
1630 
1631 static int
1632 process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1633 {
1634     lrmd_rsc_t *rsc = NULL;
1635     lrmd_cmd_t *cmd = NULL;
1636     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1637     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1638     int call_id;
1639 
1640     if (!rsc_id) {
1641         return -EINVAL;
1642     }
1643     if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) {
1644         crm_info("Resource '%s' not found (%d active resources)",
1645                  rsc_id, g_hash_table_size(rsc_list));
1646         return -ENODEV;
1647     }
1648 
1649     cmd = create_lrmd_cmd(request, client);
1650     call_id = cmd->call_id;
1651 
1652     /* Don't reference cmd after handing it off to be scheduled.
1653      * The cmd could get merged and freed. */
1654     schedule_lrmd_cmd(rsc, cmd);
1655 
1656     return call_id;
1657 }
1658 
1659 static int
1660 cancel_op(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1661 {
1662     GListPtr gIter = NULL;
1663     lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id);
1664 
1665     /* How to cancel an action.
1666      * 1. Check pending ops list, if it hasn't been handed off
1667      *    to the service library or stonith recurring list remove
1668      *    it there and that will stop it.
1669      * 2. If it isn't in the pending ops list, then it's either a
1670      *    recurring op in the stonith recurring list, or the service
1671      *    library's recurring list.  Stop it there
1672      * 3. If not found in any lists, then this operation has either
1673      *    been executed already and is not a recurring operation, or
1674      *    never existed.
1675      */
1676     if (!rsc) {
1677         return -ENODEV;
1678     }
1679 
1680     for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) {
1681         lrmd_cmd_t *cmd = gIter->data;
1682 
1683         if (pcmk__str_eq(cmd->action, action, pcmk__str_casei)
1684             && (cmd->interval_ms == interval_ms)) {
1685 
1686             cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1687             cmd_finalize(cmd, rsc);
1688             return pcmk_ok;
1689         }
1690     }
1691 
1692     if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) {
1693         /* The service library does not handle stonith operations.
1694          * We have to handle recurring stonith operations ourselves. */
1695         for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) {
1696             lrmd_cmd_t *cmd = gIter->data;
1697 
1698             if (pcmk__str_eq(cmd->action, action, pcmk__str_casei)
1699                 && (cmd->interval_ms == interval_ms)) {
1700 
1701                 cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED;
1702                 if (rsc->active != cmd) {
1703                     cmd_finalize(cmd, rsc);
1704                 }
1705                 return pcmk_ok;
1706             }
1707         }
1708     } else if (services_action_cancel(rsc_id,
1709                                       normalize_action_name(rsc, action),
1710                                       interval_ms) == TRUE) {
1711         /* The service library will tell the action_complete callback function
1712          * this action was cancelled, which will destroy the cmd and remove
1713          * it from the recurring_op list. Do not do that in this function
1714          * if the service library says it cancelled it. */
1715         return pcmk_ok;
1716     }
1717 
1718     return -EOPNOTSUPP;
1719 }
1720 
1721 static void
1722 cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1723 {
1724     GList *cmd_list = NULL;
1725     GList *cmd_iter = NULL;
1726 
1727     /* Notice a copy of each list is created when concat is called.
1728      * This prevents odd behavior from occurring when the cmd_list
1729      * is iterated through later on.  It is possible the cancel_op
1730      * function may end up modifying the recurring_ops and pending_ops
1731      * lists.  If we did not copy those lists, our cmd_list iteration
1732      * could get messed up.*/
1733     if (rsc->recurring_ops) {
1734         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops));
1735     }
1736     if (rsc->pending_ops) {
1737         cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops));
1738     }
1739     if (!cmd_list) {
1740         return;
1741     }
1742 
1743     for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) {
1744         lrmd_cmd_t *cmd = cmd_iter->data;
1745 
1746         if (cmd->interval_ms == 0) {
1747             continue;
1748         }
1749 
1750         if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) {
1751             continue;
1752         }
1753 
1754         cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms);
1755     }
1756     /* frees only the copied list data, not the cmds */
1757     g_list_free(cmd_list);
1758 }
1759 
1760 static int
1761 process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1762 {
1763     xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR);
1764     const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1765     const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION);
1766     guint interval_ms = 0;
1767 
1768     crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms);
1769 
1770     if (!rsc_id || !action) {
1771         return -EINVAL;
1772     }
1773 
1774     return cancel_op(rsc_id, action, interval_ms);
1775 }
1776 
1777 static void
1778 add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1779 {
1780     xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC);
1781 
1782     crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id);
1783     for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) {
1784         lrmd_cmd_t *cmd = item->data;
1785         xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP);
1786 
1787         crm_xml_add(op_xml, F_LRMD_RSC_ACTION,
1788                     (cmd->real_action? cmd->real_action : cmd->action));
1789         crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms);
1790         crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig);
1791     }
1792 }
1793 
1794 static xmlNode *
1795 process_lrmd_get_recurring(xmlNode *request, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1796 {
1797     int rc = pcmk_ok;
1798     const char *rsc_id = NULL;
1799     lrmd_rsc_t *rsc = NULL;
1800     xmlNode *reply = NULL;
1801     xmlNode *rsc_xml = NULL;
1802 
1803     // Resource ID is optional
1804     rsc_xml = first_named_child(request, F_LRMD_CALLDATA);
1805     if (rsc_xml) {
1806         rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC);
1807     }
1808     if (rsc_xml) {
1809         rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID);
1810     }
1811 
1812     // If resource ID is specified, resource must exist
1813     if (rsc_id != NULL) {
1814         rsc = g_hash_table_lookup(rsc_list, rsc_id);
1815         if (rsc == NULL) {
1816             crm_info("Resource '%s' not found (%d active resources)",
1817                      rsc_id, g_hash_table_size(rsc_list));
1818             rc = -ENODEV;
1819         }
1820     }
1821 
1822     reply = create_lrmd_reply(__func__, rc, call_id);
1823 
1824     // If resource ID is not specified, check all resources
1825     if (rsc_id == NULL) {
1826         GHashTableIter iter;
1827         char *key = NULL;
1828 
1829         g_hash_table_iter_init(&iter, rsc_list);
1830         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
1831                                       (gpointer *) &rsc)) {
1832             add_recurring_op_xml(reply, rsc);
1833         }
1834     } else if (rsc) {
1835         add_recurring_op_xml(reply, rsc);
1836     }
1837     return reply;
1838 }
1839 
1840 void
1841 process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request)
     /* [previous][next][first][last][top][bottom][index][help] */
1842 {
1843     int rc = pcmk_ok;
1844     int call_id = 0;
1845     const char *op = crm_element_value(request, F_LRMD_OPERATION);
1846     int do_reply = 0;
1847     int do_notify = 0;
1848     xmlNode *reply = NULL;
1849 
1850 #if ENABLE_ACL
1851     /* Certain IPC commands may be done only by privileged users (i.e. root or
1852      * hacluster) when ACLs are enabled, because they would otherwise provide a
1853      * means of bypassing ACLs.
1854      */
1855     bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged);
1856 #else
1857     bool allowed = true;
1858 #endif
1859 
1860     crm_trace("Processing %s operation from %s", op, client->id);
1861     crm_element_value_int(request, F_LRMD_CALLID, &call_id);
1862 
1863     if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) {
1864 #ifdef SUPPORT_REMOTE
1865         if (allowed) {
1866             ipc_proxy_forward_client(client, request);
1867         } else {
1868             rc = -EACCES;
1869         }
1870 #else
1871         rc = -EPROTONOSUPPORT;
1872 #endif
1873         do_reply = 1;
1874     } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
1875         rc = process_lrmd_signon(client, request, call_id, &reply);
1876         do_reply = 1;
1877     } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) {
1878         if (allowed) {
1879             rc = process_lrmd_rsc_register(client, id, request);
1880             do_notify = 1;
1881         } else {
1882             rc = -EACCES;
1883         }
1884         do_reply = 1;
1885     } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) {
1886         if (allowed) {
1887             reply = process_lrmd_get_rsc_info(request, call_id);
1888         } else {
1889             rc = -EACCES;
1890         }
1891         do_reply = 1;
1892     } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) {
1893         if (allowed) {
1894             rc = process_lrmd_rsc_unregister(client, id, request);
1895             /* don't notify anyone about failed un-registers */
1896             if (rc == pcmk_ok || rc == -EINPROGRESS) {
1897                 do_notify = 1;
1898             }
1899         } else {
1900             rc = -EACCES;
1901         }
1902         do_reply = 1;
1903     } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) {
1904         if (allowed) {
1905             rc = process_lrmd_rsc_exec(client, id, request);
1906         } else {
1907             rc = -EACCES;
1908         }
1909         do_reply = 1;
1910     } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) {
1911         if (allowed) {
1912             rc = process_lrmd_rsc_cancel(client, id, request);
1913         } else {
1914             rc = -EACCES;
1915         }
1916         do_reply = 1;
1917     } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) {
1918         do_notify = 1;
1919         do_reply = 1;
1920     } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) {
1921         if (allowed) {
1922             xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA);
1923 
1924             CRM_LOG_ASSERT(data != NULL);
1925             pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG));
1926         } else {
1927             rc = -EACCES;
1928         }
1929     } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) {
1930         if (allowed) {
1931             rc = process_lrmd_alert_exec(client, id, request);
1932         } else {
1933             rc = -EACCES;
1934         }
1935         do_reply = 1;
1936     } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) {
1937         if (allowed) {
1938             reply = process_lrmd_get_recurring(request, call_id);
1939         } else {
1940             rc = -EACCES;
1941         }
1942         do_reply = 1;
1943     } else {
1944         rc = -EOPNOTSUPP;
1945         do_reply = 1;
1946         crm_err("Unknown IPC request '%s' from %s", op, client->name);
1947     }
1948 
1949     if (rc == -EACCES) {
1950         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
1951                  op, pcmk__client_name(client));
1952     }
1953 
1954     crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d",
1955               op, client->id, rc, do_reply, do_notify);
1956 
1957     if (do_reply) {
1958         int send_rc = pcmk_rc_ok;
1959 
1960         if (reply == NULL) {
1961             reply = create_lrmd_reply(__func__, rc, call_id);
1962         }
1963         send_rc = lrmd_server_send_reply(client, id, reply);
1964         free_xml(reply);
1965         if (send_rc != pcmk_rc_ok) {
1966             crm_warn("Reply to client %s failed: %s " CRM_XS " %d",
1967                      client->name, pcmk_rc_str(send_rc), send_rc);
1968         }
1969     }
1970 
1971     if (do_notify) {
1972         send_generic_notify(rc, request);
1973     }
1974 }

/* [previous][next][first][last][top][bottom][index][help] */