root/daemons/fenced/fenced_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_action_required
  2. get_action_delay_max
  3. get_action_delay_base
  4. get_action_timeout
  5. free_async_command
  6. create_async_command
  7. get_action_limit
  8. get_active_cmds
  9. fork_cb
  10. get_agent_metadata_cb
  11. stonith_device_execute
  12. stonith_device_dispatch
  13. start_delay_helper
  14. schedule_stonith_command
  15. free_device
  16. free_device_list
  17. init_device_list
  18. build_port_aliases
  19. free_metadata_cache
  20. init_metadata_cache
  21. get_agent_metadata
  22. is_nodeid_required
  23. add_action
  24. read_action_metadata
  25. map_action
  26. xml2device_params
  27. build_device_from_xml
  28. target_list_type
  29. schedule_internal_command
  30. string_in_list
  31. status_search_cb
  32. dynamic_list_search_cb
  33. device_params_diff
  34. device_has_duplicate
  35. stonith_device_register
  36. stonith_device_remove
  37. count_active_levels
  38. free_topology_entry
  39. free_topology_list
  40. init_topology_list
  41. stonith_level_key
  42. stonith_level_kind
  43. parse_device_list
  44. stonith_level_register
  45. stonith_level_remove
  46. stonith_device_action
  47. search_devices_record_result
  48. localhost_is_eligible
  49. can_fence_host_with_device
  50. search_devices
  51. get_capable_devices
  52. add_action_specific_attributes
  53. add_disallowed
  54. add_action_reply
  55. stonith_query_capable_device_cb
  56. stonith_query
  57. log_operation
  58. stonith_send_async_reply
  59. cancel_stonith_command
  60. st_child_done
  61. sort_device_priority
  62. stonith_fence_get_devices_cb
  63. stonith_fence
  64. stonith_construct_reply
  65. stonith_construct_async_reply
  66. fencing_peer_active
  67. check_alternate_host
  68. stonith_send_reply
  69. remove_relay_op
  70. handle_request
  71. handle_reply
  72. stonith_command

   1 /*
   2  * Copyright 2009-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 #include <sys/stat.h>
  17 #include <unistd.h>
  18 #include <sys/utsname.h>
  19 
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <ctype.h>
  24 
  25 #include <crm/crm.h>
  26 #include <crm/msg_xml.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/cluster/internal.h>
  30 #include <crm/common/mainloop.h>
  31 
  32 #include <crm/stonith-ng.h>
  33 #include <crm/fencing/internal.h>
  34 #include <crm/common/xml.h>
  35 
  36 #include <pacemaker-fenced.h>
  37 
  38 GHashTable *device_list = NULL;
  39 GHashTable *topology = NULL;
  40 GList *cmd_list = NULL;
  41 
  42 struct device_search_s {
  43     /* target of fence action */
  44     char *host;
  45     /* requested fence action */
  46     char *action;
  47     /* timeout to use if a device is queried dynamically for possible targets */
  48     int per_device_timeout;
  49     /* number of registered fencing devices at time of request */
  50     int replies_needed;
  51     /* number of device replies received so far */
  52     int replies_received;
  53     /* whether the target is eligible to perform requested action (or off) */
  54     bool allow_suicide;
  55 
  56     /* private data to pass to search callback function */
  57     void *user_data;
  58     /* function to call when all replies have been received */
  59     void (*callback) (GList * devices, void *user_data);
  60     /* devices capable of performing requested action (or off if remapping) */
  61     GList *capable;
  62 };
  63 
  64 static gboolean stonith_device_dispatch(gpointer user_data);
  65 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
  66 static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
  67                                const char *client_id);
  68 
  69 static void search_devices_record_result(struct device_search_s *search, const char *device,
  70                                          gboolean can_fence);
  71 
  72 static xmlNode * get_agent_metadata(const char *agent);
  73 static void read_action_metadata(stonith_device_t *device);
  74 
  75 typedef struct async_command_s {
  76 
  77     int id;
  78     int pid;
  79     int fd_stdout;
  80     int options;
  81     int default_timeout; /* seconds */
  82     int timeout; /* seconds */
  83 
  84     int start_delay; /* seconds */
  85     int delay_id;
  86 
  87     char *op;
  88     char *origin;
  89     char *client;
  90     char *client_name;
  91     char *remote_op_id;
  92 
  93     char *victim;
  94     uint32_t victim_nodeid;
  95     char *action;
  96     char *device;
  97 
  98     GList *device_list;
  99     GList *device_next;
 100 
 101     void *internal_user_data;
 102     void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data);
 103     guint timer_sigterm;
 104     guint timer_sigkill;
 105     /*! If the operation timed out, this is the last signal
 106      *  we sent to the process to get it to terminate */
 107     int last_timeout_signo;
 108 
 109     stonith_device_t *active_on;
 110     stonith_device_t *activating_on;
 111 } async_command_t;
 112 
 113 static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
 114                                               xmlNode * data, int rc);
 115 
 116 static gboolean
 117 is_action_required(const char *action, stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119     return device && device->automatic_unfencing && pcmk__str_eq(action, "on",
 120                                                                  pcmk__str_casei);
 121 }
 122 
 123 static int
 124 get_action_delay_max(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 125 {
 126     const char *value = NULL;
 127     int delay_max = 0;
 128 
 129     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 130         return 0;
 131     }
 132 
 133     value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
 134     if (value) {
 135        delay_max = crm_parse_interval_spec(value) / 1000;
 136     }
 137 
 138     return delay_max;
 139 }
 140 
 141 static int
 142 get_action_delay_base(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144     const char *value = NULL;
 145     int delay_base = 0;
 146 
 147     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 148         return 0;
 149     }
 150 
 151     value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
 152     if (value) {
 153        delay_base = crm_parse_interval_spec(value) / 1000;
 154     }
 155 
 156     return delay_base;
 157 }
 158 
 159 /*!
 160  * \internal
 161  * \brief Override STONITH timeout with pcmk_*_timeout if available
 162  *
 163  * \param[in] device           STONITH device to use
 164  * \param[in] action           STONITH action name
 165  * \param[in] default_timeout  Timeout to use if device does not have
 166  *                             a pcmk_*_timeout parameter for action
 167  *
 168  * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
 169  * \note For consistency, it would be nice if reboot/off/on timeouts could be
 170  *       set the same way as start/stop/monitor timeouts, i.e. with an
 171  *       <operation> entry in the fencing resource configuration. However that
 172  *       is insufficient because fencing devices may be registered directly via
 173  *       the fencer's register_device() API instead of going through the CIB
 174  *       (e.g. stonith_admin uses it for its -R option, and the executor uses it
 175  *       to ensure a device is registered when a command is issued). As device
 176  *       properties, pcmk_*_timeout parameters can be grabbed by the fencer when
 177  *       the device is registered, whether by CIB change or API call.
 178  */
 179 static int
 180 get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182     if (action && device && device->params) {
 183         char buffer[64] = { 0, };
 184         const char *value = NULL;
 185 
 186         /* If "reboot" was requested but the device does not support it,
 187          * we will remap to "off", so check timeout for "off" instead
 188          */
 189         if (pcmk__str_eq(action, "reboot", pcmk__str_casei)
 190             && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 191             crm_trace("%s doesn't support reboot, using timeout for off instead",
 192                       device->id);
 193             action = "off";
 194         }
 195 
 196         /* If the device config specified an action-specific timeout, use it */
 197         snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
 198         value = g_hash_table_lookup(device->params, buffer);
 199         if (value) {
 200             return atoi(value);
 201         }
 202     }
 203     return default_timeout;
 204 }
 205 
 206 static void
 207 free_async_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209     if (!cmd) {
 210         return;
 211     }
 212 
 213     if (cmd->delay_id) {
 214         g_source_remove(cmd->delay_id);
 215     }
 216 
 217     cmd_list = g_list_remove(cmd_list, cmd);
 218 
 219     g_list_free_full(cmd->device_list, free);
 220     free(cmd->device);
 221     free(cmd->action);
 222     free(cmd->victim);
 223     free(cmd->remote_op_id);
 224     free(cmd->client);
 225     free(cmd->client_name);
 226     free(cmd->origin);
 227     free(cmd->op);
 228     free(cmd);
 229 }
 230 
 231 static async_command_t *
 232 create_async_command(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234     async_command_t *cmd = NULL;
 235     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
 236     const char *action = crm_element_value(op, F_STONITH_ACTION);
 237 
 238     CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
 239 
 240     crm_log_xml_trace(msg, "Command");
 241     cmd = calloc(1, sizeof(async_command_t));
 242     crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
 243     crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
 244     crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
 245     cmd->timeout = cmd->default_timeout;
 246     // Value -1 means disable any static/random fencing delays
 247     crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
 248 
 249     cmd->origin = crm_element_value_copy(msg, F_ORIG);
 250     cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
 251     cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
 252     cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
 253     cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
 254     cmd->action = strdup(action);
 255     cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
 256     cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
 257 
 258     CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
 259     CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
 260 
 261     cmd->done_cb = st_child_done;
 262     cmd_list = g_list_append(cmd_list, cmd);
 263     return cmd;
 264 }
 265 
 266 static int
 267 get_action_limit(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 268 {
 269     const char *value = NULL;
 270     int action_limit = 1;
 271 
 272     value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
 273     if ((value == NULL)
 274         || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
 275         || (action_limit == 0)) {
 276         action_limit = 1;
 277     }
 278     return action_limit;
 279 }
 280 
 281 static int
 282 get_active_cmds(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284     int counter = 0;
 285     GList *gIter = NULL;
 286     GList *gIterNext = NULL;
 287 
 288     CRM_CHECK(device != NULL, return 0);
 289 
 290     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
 291         async_command_t *cmd = gIter->data;
 292 
 293         gIterNext = gIter->next;
 294 
 295         if (cmd->active_on == device) {
 296             counter++;
 297         }
 298     }
 299 
 300     return counter;
 301 }
 302 
 303 static void
 304 fork_cb(GPid pid, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     async_command_t *cmd = (async_command_t *) user_data;
 307     stonith_device_t * device =
 308         /* in case of a retry we've done the move from
 309            activating_on to active_on already
 310          */
 311         cmd->activating_on?cmd->activating_on:cmd->active_on;
 312 
 313     CRM_ASSERT(device);
 314     crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
 315               cmd->action, pid,
 316               ((cmd->victim == NULL)? "" : " targeting "),
 317               ((cmd->victim == NULL)? "" : cmd->victim),
 318               device->id, cmd->timeout);
 319     cmd->active_on = device;
 320     cmd->activating_on = NULL;
 321 }
 322 
 323 static int
 324 get_agent_metadata_cb(gpointer data) {
     /* [previous][next][first][last][top][bottom][index][help] */
 325     stonith_device_t *device = data;
 326 
 327     device->agent_metadata = get_agent_metadata(device->agent);
 328     if (device->agent_metadata) {
 329         read_action_metadata(device);
 330         stonith__device_parameter_flags(&(device->flags), device->id,
 331                                         device->agent_metadata);
 332         return G_SOURCE_REMOVE;
 333     } else {
 334         guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
 335         if (period_ms < 160 * 1000) {
 336             mainloop_timer_set_period(device->timer, 2 * period_ms);
 337         }
 338         return G_SOURCE_CONTINUE;
 339     }
 340 }
 341 
 342 static gboolean
 343 stonith_device_execute(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345     int exec_rc = 0;
 346     const char *action_str = NULL;
 347     const char *host_arg = NULL;
 348     async_command_t *cmd = NULL;
 349     stonith_action_t *action = NULL;
 350     int active_cmds = 0;
 351     int action_limit = 0;
 352     GList *gIter = NULL;
 353     GList *gIterNext = NULL;
 354 
 355     CRM_CHECK(device != NULL, return FALSE);
 356 
 357     active_cmds = get_active_cmds(device);
 358     action_limit = get_action_limit(device);
 359     if (action_limit > -1 && active_cmds >= action_limit) {
 360         crm_trace("%s is over its action limit of %d (%u active action%s)",
 361                   device->id, action_limit, active_cmds,
 362                   pcmk__plural_s(active_cmds));
 363         return TRUE;
 364     }
 365 
 366     for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
 367         async_command_t *pending_op = gIter->data;
 368 
 369         gIterNext = gIter->next;
 370 
 371         if (pending_op && pending_op->delay_id) {
 372             crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
 373                       "waiting for start delay of %ds",
 374                       pending_op->action,
 375                       ((pending_op->victim == NULL)? "" : " targeting "),
 376                       ((pending_op->victim == NULL)? "" : pending_op->victim),
 377                       device->id, pending_op->start_delay);
 378             continue;
 379         }
 380 
 381         device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
 382         g_list_free_1(gIter);
 383 
 384         cmd = pending_op;
 385         break;
 386     }
 387 
 388     if (cmd == NULL) {
 389         crm_trace("No actions using %s are needed", device->id);
 390         return TRUE;
 391     }
 392 
 393     if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
 394         if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
 395             pcmk__panic(__func__);
 396             goto done;
 397 
 398         } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
 399             pcmk__panic(__func__);
 400             goto done;
 401 
 402         } else {
 403             crm_info("Faking success for %s watchdog operation", cmd->action);
 404             cmd->done_cb(0, 0, NULL, cmd);
 405             goto done;
 406         }
 407     }
 408 
 409 #if SUPPORT_CIBSECRETS
 410     if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) {
 411         /* replacing secrets failed! */
 412         if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 413             /* don't fail on stop! */
 414             crm_info("Proceeding with stop operation for %s", device->id);
 415 
 416         } else {
 417             crm_err("Considering %s unconfigured: Failed to get secrets",
 418                     device->id);
 419             exec_rc = PCMK_OCF_NOT_CONFIGURED;
 420             cmd->done_cb(0, exec_rc, NULL, cmd);
 421             goto done;
 422         }
 423     }
 424 #endif
 425 
 426     action_str = cmd->action;
 427     if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)
 428         && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 429 
 430         crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
 431         action_str = "off";
 432     }
 433 
 434     if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
 435         host_arg = "port";
 436 
 437     } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
 438         host_arg = "plug";
 439     }
 440 
 441     action = stonith_action_create(device->agent,
 442                                    action_str,
 443                                    cmd->victim,
 444                                    cmd->victim_nodeid,
 445                                    cmd->timeout, device->params,
 446                                    device->aliases, host_arg);
 447 
 448     /* for async exec, exec_rc is negative for early error exit
 449        otherwise handling of success/errors is done via callbacks */
 450     cmd->activating_on = device;
 451     exec_rc = stonith_action_execute_async(action, (void *)cmd,
 452                                            cmd->done_cb, fork_cb);
 453 
 454     if (exec_rc < 0) {
 455         crm_warn("Operation '%s'%s%s using %s failed: %s " CRM_XS " rc=%d",
 456                  cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 457                  device->id, pcmk_strerror(exec_rc), exec_rc);
 458         cmd->activating_on = NULL;
 459         cmd->done_cb(0, exec_rc, NULL, cmd);
 460     }
 461 
 462 done:
 463     /* Device might get triggered to work by multiple fencing commands
 464      * simultaneously. Trigger the device again to make sure any
 465      * remaining concurrent commands get executed. */
 466     if (device->pending_ops) {
 467         mainloop_set_trigger(device->work);
 468     }
 469     return TRUE;
 470 }
 471 
 472 static gboolean
 473 stonith_device_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 474 {
 475     return stonith_device_execute(user_data);
 476 }
 477 
 478 static gboolean
 479 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 480 {
 481     async_command_t *cmd = data;
 482     stonith_device_t *device = NULL;
 483 
 484     cmd->delay_id = 0;
 485     device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
 486 
 487     if (device) {
 488         mainloop_set_trigger(device->work);
 489     }
 490 
 491     return FALSE;
 492 }
 493 
 494 static void
 495 schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 496 {
 497     int delay_max = 0;
 498     int delay_base = 0;
 499     int requested_delay = cmd->start_delay;
 500 
 501     CRM_CHECK(cmd != NULL, return);
 502     CRM_CHECK(device != NULL, return);
 503 
 504     if (cmd->device) {
 505         free(cmd->device);
 506     }
 507 
 508     if (device->include_nodeid && cmd->victim) {
 509         crm_node_t *node = crm_get_peer(0, cmd->victim);
 510 
 511         cmd->victim_nodeid = node->id;
 512     }
 513 
 514     cmd->device = strdup(device->id);
 515     cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
 516 
 517     if (cmd->remote_op_id) {
 518         crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
 519                   "with op id %.8s and timeout %ds",
 520                   cmd->action,
 521                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 522                   device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
 523     } else {
 524         crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
 525                   cmd->action,
 526                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 527                   device->id, cmd->client, cmd->timeout);
 528     }
 529 
 530     device->pending_ops = g_list_append(device->pending_ops, cmd);
 531     mainloop_set_trigger(device->work);
 532 
 533     // Value -1 means disable any static/random fencing delays
 534     if (requested_delay < 0) {
 535         return;
 536     }
 537 
 538     delay_max = get_action_delay_max(device, cmd->action);
 539     delay_base = get_action_delay_base(device, cmd->action);
 540     if (delay_max == 0) {
 541         delay_max = delay_base;
 542     }
 543     if (delay_max < delay_base) {
 544         crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
 545                  PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
 546                  "(limiting to maximum delay)",
 547                  delay_base, delay_max, cmd->action, device->id);
 548         delay_base = delay_max;
 549     }
 550     if (delay_max > 0) {
 551         // coverity[dont_call] We're not using rand() for security
 552         cmd->start_delay +=
 553             ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
 554             + delay_base;
 555     }
 556 
 557     if (cmd->start_delay > 0) {
 558         crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
 559                    " timeout=%ds requested_delay=%ds base=%ds max=%ds",
 560                    cmd->action,
 561                    cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 562                    device->id, cmd->start_delay, cmd->timeout,
 563                    requested_delay, delay_base, delay_max);
 564         cmd->delay_id =
 565             g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
 566     }
 567 }
 568 
 569 static void
 570 free_device(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 571 {
 572     GList *gIter = NULL;
 573     stonith_device_t *device = data;
 574 
 575     g_hash_table_destroy(device->params);
 576     g_hash_table_destroy(device->aliases);
 577 
 578     for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
 579         async_command_t *cmd = gIter->data;
 580 
 581         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
 582         cmd->done_cb(0, -ENODEV, NULL, cmd);
 583     }
 584     g_list_free(device->pending_ops);
 585 
 586     g_list_free_full(device->targets, free);
 587 
 588     if (device->timer) {
 589         mainloop_timer_stop(device->timer);
 590         mainloop_timer_del(device->timer);
 591     }
 592 
 593     mainloop_destroy_trigger(device->work);
 594 
 595     free_xml(device->agent_metadata);
 596     free(device->namespace);
 597     free(device->on_target_actions);
 598     free(device->agent);
 599     free(device->id);
 600     free(device);
 601 }
 602 
 603 void free_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 604 {
 605     if (device_list != NULL) {
 606         g_hash_table_destroy(device_list);
 607         device_list = NULL;
 608     }
 609 }
 610 
 611 void
 612 init_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 613 {
 614     if (device_list == NULL) {
 615         device_list = pcmk__strkey_table(NULL, free_device);
 616     }
 617 }
 618 
 619 static GHashTable *
 620 build_port_aliases(const char *hostmap, GList ** targets)
     /* [previous][next][first][last][top][bottom][index][help] */
 621 {
 622     char *name = NULL;
 623     int last = 0, lpc = 0, max = 0, added = 0;
 624     GHashTable *aliases = pcmk__strikey_table(free, free);
 625 
 626     if (hostmap == NULL) {
 627         return aliases;
 628     }
 629 
 630     max = strlen(hostmap);
 631     for (; lpc <= max; lpc++) {
 632         switch (hostmap[lpc]) {
 633                 /* Assignment chars */
 634             case '=':
 635             case ':':
 636                 if (lpc > last) {
 637                     free(name);
 638                     name = calloc(1, 1 + lpc - last);
 639                     memcpy(name, hostmap + last, lpc - last);
 640                 }
 641                 last = lpc + 1;
 642                 break;
 643 
 644                 /* Delimeter chars */
 645                 /* case ',': Potentially used to specify multiple ports */
 646             case 0:
 647             case ';':
 648             case ' ':
 649             case '\t':
 650                 if (name) {
 651                     char *value = NULL;
 652 
 653                     value = calloc(1, 1 + lpc - last);
 654                     memcpy(value, hostmap + last, lpc - last);
 655 
 656                     crm_debug("Adding alias '%s'='%s'", name, value);
 657                     g_hash_table_replace(aliases, name, value);
 658                     if (targets) {
 659                         *targets = g_list_append(*targets, strdup(value));
 660                     }
 661                     value = NULL;
 662                     name = NULL;
 663                     added++;
 664 
 665                 } else if (lpc > last) {
 666                     crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
 667                 }
 668 
 669                 last = lpc + 1;
 670                 break;
 671         }
 672 
 673         if (hostmap[lpc] == 0) {
 674             break;
 675         }
 676     }
 677 
 678     if (added == 0) {
 679         crm_info("No host mappings detected in '%s'", hostmap);
 680     }
 681 
 682     free(name);
 683     return aliases;
 684 }
 685 
 686 GHashTable *metadata_cache = NULL;
 687 
 688 void
 689 free_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 690     if (metadata_cache != NULL) {
 691         g_hash_table_destroy(metadata_cache);
 692         metadata_cache = NULL;
 693     }
 694 }
 695 
 696 static void
 697 init_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 698     if (metadata_cache == NULL) {
 699         metadata_cache = pcmk__strkey_table(free, free);
 700     }
 701 }
 702 
 703 static xmlNode *
 704 get_agent_metadata(const char *agent)
     /* [previous][next][first][last][top][bottom][index][help] */
 705 {
 706     xmlNode *xml = NULL;
 707     char *buffer = NULL;
 708 
 709     init_metadata_cache();
 710     buffer = g_hash_table_lookup(metadata_cache, agent);
 711     if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
 712         return NULL;
 713 
 714     } else if(buffer == NULL) {
 715         stonith_t *st = stonith_api_new();
 716         int rc;
 717 
 718         if (st == NULL) {
 719             crm_warn("Could not get agent meta-data: "
 720                      "API memory allocation failed");
 721             return NULL;
 722         }
 723         rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
 724         stonith_api_delete(st);
 725         if (rc || !buffer) {
 726             crm_err("Could not retrieve metadata for fencing agent %s", agent);
 727             return NULL;
 728         }
 729         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
 730     }
 731 
 732     xml = string2xml(buffer);
 733 
 734     return xml;
 735 }
 736 
 737 static gboolean
 738 is_nodeid_required(xmlNode * xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 739 {
 740     xmlXPathObjectPtr xpath = NULL;
 741 
 742     if (stand_alone) {
 743         return FALSE;
 744     }
 745 
 746     if (!xml) {
 747         return FALSE;
 748     }
 749 
 750     xpath = xpath_search(xml, "//parameter[@name='nodeid']");
 751     if (numXpathResults(xpath)  <= 0) {
 752         freeXpathObject(xpath);
 753         return FALSE;
 754     }
 755 
 756     freeXpathObject(xpath);
 757     return TRUE;
 758 }
 759 
 760 #define MAX_ACTION_LEN 256
 761 
 762 static char *
 763 add_action(char *actions, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 764 {
 765     int offset = 0;
 766 
 767     if (actions == NULL) {
 768         actions = calloc(1, MAX_ACTION_LEN);
 769     } else {
 770         offset = strlen(actions);
 771     }
 772 
 773     if (offset > 0) {
 774         offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
 775     }
 776     offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);
 777 
 778     return actions;
 779 }
 780 
 781 static void
 782 read_action_metadata(stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 783 {
 784     xmlXPathObjectPtr xpath = NULL;
 785     int max = 0;
 786     int lpc = 0;
 787 
 788     if (device->agent_metadata == NULL) {
 789         return;
 790     }
 791 
 792     xpath = xpath_search(device->agent_metadata, "//action");
 793     max = numXpathResults(xpath);
 794 
 795     if (max <= 0) {
 796         freeXpathObject(xpath);
 797         return;
 798     }
 799 
 800     for (lpc = 0; lpc < max; lpc++) {
 801         const char *on_target = NULL;
 802         const char *action = NULL;
 803         xmlNode *match = getXpathResult(xpath, lpc);
 804 
 805         CRM_LOG_ASSERT(match != NULL);
 806         if(match == NULL) { continue; };
 807 
 808         on_target = crm_element_value(match, "on_target");
 809         action = crm_element_value(match, "name");
 810 
 811         if(pcmk__str_eq(action, "list", pcmk__str_casei)) {
 812             stonith__set_device_flags(device->flags, device->id,
 813                                       st_device_supports_list);
 814         } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) {
 815             stonith__set_device_flags(device->flags, device->id,
 816                                       st_device_supports_status);
 817         } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) {
 818             stonith__set_device_flags(device->flags, device->id,
 819                                       st_device_supports_reboot);
 820         } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) {
 821             /* "automatic" means the cluster will unfence node when it joins */
 822             const char *automatic = crm_element_value(match, "automatic");
 823 
 824             /* "required" is a deprecated synonym for "automatic" */
 825             const char *required = crm_element_value(match, "required");
 826 
 827             if (crm_is_true(automatic) || crm_is_true(required)) {
 828                 device->automatic_unfencing = TRUE;
 829             }
 830         }
 831 
 832         if (action && crm_is_true(on_target)) {
 833             device->on_target_actions = add_action(device->on_target_actions, action);
 834         }
 835     }
 836 
 837     freeXpathObject(xpath);
 838 }
 839 
 840 /*!
 841  * \internal
 842  * \brief Set a pcmk_*_action parameter if not already set
 843  *
 844  * \param[in,out] params  Device parameters
 845  * \param[in]     action  Name of action
 846  * \param[in]     value   Value to use if action is not already set
 847  */
 848 static void
 849 map_action(GHashTable *params, const char *action, const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
 850 {
 851     char *key = crm_strdup_printf("pcmk_%s_action", action);
 852 
 853     if (g_hash_table_lookup(params, key)) {
 854         crm_warn("Ignoring %s='%s', see %s instead",
 855                  STONITH_ATTR_ACTION_OP, value, key);
 856         free(key);
 857     } else {
 858         crm_warn("Mapping %s='%s' to %s='%s'",
 859                  STONITH_ATTR_ACTION_OP, value, key, value);
 860         g_hash_table_insert(params, key, strdup(value));
 861     }
 862 }
 863 
 864 /*!
 865  * \internal
 866  * \brief Create device parameter table from XML
 867  *
 868  * \param[in]     name    Device name (used for logging only)
 869  * \param[in,out] params  Device parameters
 870  */
 871 static GHashTable *
 872 xml2device_params(const char *name, xmlNode *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 873 {
 874     GHashTable *params = xml2list(dev);
 875     const char *value;
 876 
 877     /* Action should never be specified in the device configuration,
 878      * but we support it for users who are familiar with other software
 879      * that worked that way.
 880      */
 881     value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
 882     if (value != NULL) {
 883         crm_warn("%s has '%s' parameter, which should never be specified in configuration",
 884                  name, STONITH_ATTR_ACTION_OP);
 885 
 886         if (*value == '\0') {
 887             crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
 888 
 889         } else if (strcmp(value, "reboot") == 0) {
 890             crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
 891                      STONITH_ATTR_ACTION_OP);
 892 
 893         } else if (strcmp(value, "off") == 0) {
 894             map_action(params, "reboot", value);
 895 
 896         } else {
 897             map_action(params, "off", value);
 898             map_action(params, "reboot", value);
 899         }
 900 
 901         g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
 902     }
 903 
 904     return params;
 905 }
 906 
 907 static stonith_device_t *
 908 build_device_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 909 {
 910     const char *value;
 911     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
 912     stonith_device_t *device = NULL;
 913     char *agent = crm_element_value_copy(dev, "agent");
 914 
 915     CRM_CHECK(agent != NULL, return device);
 916 
 917     device = calloc(1, sizeof(stonith_device_t));
 918 
 919     CRM_CHECK(device != NULL, {free(agent); return device;});
 920 
 921     device->id = crm_element_value_copy(dev, XML_ATTR_ID);
 922     device->agent = agent;
 923     device->namespace = crm_element_value_copy(dev, "namespace");
 924     device->params = xml2device_params(device->id, dev);
 925 
 926     value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
 927     if (value) {
 928         device->targets = stonith__parse_targets(value);
 929     }
 930 
 931     value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
 932     device->aliases = build_port_aliases(value, &(device->targets));
 933 
 934     device->agent_metadata = get_agent_metadata(device->agent);
 935     if (device->agent_metadata) {
 936         read_action_metadata(device);
 937         stonith__device_parameter_flags(&(device->flags), device->id,
 938                                         device->agent_metadata);
 939     } else {
 940         if (device->timer == NULL) {
 941             device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
 942                                            TRUE, get_agent_metadata_cb, device);
 943         }
 944         if (!mainloop_timer_running(device->timer)) {
 945             mainloop_timer_start(device->timer);
 946         }
 947     }
 948 
 949     value = g_hash_table_lookup(device->params, "nodeid");
 950     if (!value) {
 951         device->include_nodeid = is_nodeid_required(device->agent_metadata);
 952     }
 953 
 954     value = crm_element_value(dev, "rsc_provides");
 955     if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) {
 956         device->automatic_unfencing = TRUE;
 957     }
 958 
 959     if (is_action_required("on", device)) {
 960         crm_info("Fencing device '%s' requires unfencing", device->id);
 961     }
 962 
 963     if (device->on_target_actions) {
 964         crm_info("Fencing device '%s' requires actions (%s) to be executed "
 965                  "on target", device->id, device->on_target_actions);
 966     }
 967 
 968     device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
 969     /* TODO: Hook up priority */
 970 
 971     return device;
 972 }
 973 
 974 static const char *
 975 target_list_type(stonith_device_t * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 976 {
 977     const char *check_type = NULL;
 978 
 979     check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
 980 
 981     if (check_type == NULL) {
 982 
 983         if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
 984             check_type = "static-list";
 985         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
 986             check_type = "static-list";
 987         } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
 988             check_type = "dynamic-list";
 989         } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
 990             check_type = "status";
 991         } else {
 992             check_type = "none";
 993         }
 994     }
 995 
 996     return check_type;
 997 }
 998 
 999 static void
1000 schedule_internal_command(const char *origin,
     /* [previous][next][first][last][top][bottom][index][help] */
1001                           stonith_device_t * device,
1002                           const char *action,
1003                           const char *victim,
1004                           int timeout,
1005                           void *internal_user_data,
1006                           void (*done_cb) (GPid pid, int rc, const char *output,
1007                                            gpointer user_data))
1008 {
1009     async_command_t *cmd = NULL;
1010 
1011     cmd = calloc(1, sizeof(async_command_t));
1012 
1013     cmd->id = -1;
1014     cmd->default_timeout = timeout ? timeout : 60;
1015     cmd->timeout = cmd->default_timeout;
1016     cmd->action = strdup(action);
1017     cmd->victim = victim ? strdup(victim) : NULL;
1018     cmd->device = strdup(device->id);
1019     cmd->origin = strdup(origin);
1020     cmd->client = strdup(crm_system_name);
1021     cmd->client_name = strdup(crm_system_name);
1022 
1023     cmd->internal_user_data = internal_user_data;
1024     cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
1025 
1026     schedule_stonith_command(cmd, device);
1027 }
1028 
1029 gboolean
1030 string_in_list(GList *list, const char *item)
     /* [previous][next][first][last][top][bottom][index][help] */
1031 {
1032     int lpc = 0;
1033     int max = g_list_length(list);
1034 
1035     for (lpc = 0; lpc < max; lpc++) {
1036         const char *value = g_list_nth_data(list, lpc);
1037 
1038         if (pcmk__str_eq(item, value, pcmk__str_casei)) {
1039             return TRUE;
1040         } else {
1041             crm_trace("%d: '%s' != '%s'", lpc, item, value);
1042         }
1043     }
1044     return FALSE;
1045 }
1046 
1047 static void
1048 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1049 {
1050     async_command_t *cmd = user_data;
1051     struct device_search_s *search = cmd->internal_user_data;
1052     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1053     gboolean can = FALSE;
1054 
1055     free_async_command(cmd);
1056 
1057     if (!dev) {
1058         search_devices_record_result(search, NULL, FALSE);
1059         return;
1060     }
1061 
1062     mainloop_set_trigger(dev->work);
1063 
1064     if (rc == 1 /* unknown */ ) {
1065         crm_trace("Host %s is not known by %s", search->host, dev->id);
1066 
1067     } else if (rc == 0 /* active */  || rc == 2 /* inactive */ ) {
1068         crm_trace("Host %s is known by %s", search->host, dev->id);
1069         can = TRUE;
1070 
1071     } else {
1072         crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
1073                    rc);
1074     }
1075     search_devices_record_result(search, dev->id, can);
1076 }
1077 
1078 static void
1079 dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1080 {
1081     async_command_t *cmd = user_data;
1082     struct device_search_s *search = cmd->internal_user_data;
1083     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1084     gboolean can_fence = FALSE;
1085 
1086     free_async_command(cmd);
1087 
1088     /* Host/alias must be in the list output to be eligible to be fenced
1089      *
1090      * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1091      *  if the guest is still listed despite being moved to another machine
1092      */
1093     if (!dev) {
1094         search_devices_record_result(search, NULL, FALSE);
1095         return;
1096     }
1097 
1098     mainloop_set_trigger(dev->work);
1099 
1100     /* If we successfully got the targets earlier, don't disable. */
1101     if (rc != 0 && !dev->targets) {
1102         crm_notice("Disabling port list queries for %s: %s "
1103                    CRM_XS " rc=%d", dev->id, output, rc);
1104         /* Fall back to status */
1105         g_hash_table_replace(dev->params,
1106                              strdup(PCMK_STONITH_HOST_CHECK), strdup("status"));
1107 
1108         g_list_free_full(dev->targets, free);
1109         dev->targets = NULL;
1110     } else if (!rc) {
1111         crm_info("Refreshing port list for %s", dev->id);
1112         g_list_free_full(dev->targets, free);
1113         dev->targets = stonith__parse_targets(output);
1114         dev->targets_age = time(NULL);
1115     }
1116 
1117     if (dev->targets) {
1118         const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1119 
1120         if (!alias) {
1121             alias = search->host;
1122         }
1123         if (string_in_list(dev->targets, alias)) {
1124             can_fence = TRUE;
1125         }
1126     }
1127     search_devices_record_result(search, dev->id, can_fence);
1128 }
1129 
1130 /*!
1131  * \internal
1132  * \brief Returns true if any key in first is not in second or second has a different value for key
1133  */
1134 static int
1135 device_params_diff(GHashTable *first, GHashTable *second) {
     /* [previous][next][first][last][top][bottom][index][help] */
1136     char *key = NULL;
1137     char *value = NULL;
1138     GHashTableIter gIter;
1139 
1140     g_hash_table_iter_init(&gIter, first);
1141     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1142 
1143         if(strstr(key, "CRM_meta") == key) {
1144             continue;
1145         } else if(strcmp(key, "crm_feature_set") == 0) {
1146             continue;
1147         } else {
1148             char *other_value = g_hash_table_lookup(second, key);
1149 
1150             if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1151                 crm_trace("Different value for %s: %s != %s", key, other_value, value);
1152                 return 1;
1153             }
1154         }
1155     }
1156 
1157     return 0;
1158 }
1159 
1160 /*!
1161  * \internal
1162  * \brief Checks to see if an identical device already exists in the device_list
1163  */
1164 static stonith_device_t *
1165 device_has_duplicate(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
1166 {
1167     stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
1168 
1169     if (!dup) {
1170         crm_trace("No match for %s", device->id);
1171         return NULL;
1172 
1173     } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1174         crm_trace("Different agent: %s != %s", dup->agent, device->agent);
1175         return NULL;
1176     }
1177 
1178     /* Use calculate_operation_digest() here? */
1179     if (device_params_diff(device->params, dup->params) ||
1180         device_params_diff(dup->params, device->params)) {
1181         return NULL;
1182     }
1183 
1184     crm_trace("Match");
1185     return dup;
1186 }
1187 
1188 int
1189 stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191     stonith_device_t *dup = NULL;
1192     stonith_device_t *device = build_device_from_xml(msg);
1193     guint ndevices = 0;
1194 
1195     CRM_CHECK(device != NULL, return -ENOMEM);
1196 
1197     dup = device_has_duplicate(device);
1198     if (dup) {
1199         ndevices = g_hash_table_size(device_list);
1200         crm_debug("Device '%s' already in device list (%d active device%s)",
1201                   device->id, ndevices, pcmk__plural_s(ndevices));
1202         free_device(device);
1203         device = dup;
1204         dup = g_hash_table_lookup(device_list, device->id);
1205         dup->dirty = FALSE;
1206 
1207     } else {
1208         stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
1209 
1210         if (from_cib && old && old->api_registered) {
1211             /* If the cib is writing over an entry that is shared with a stonith client,
1212              * copy any pending ops that currently exist on the old entry to the new one.
1213              * Otherwise the pending ops will be reported as failures
1214              */
1215             crm_info("Overwriting existing entry for %s from CIB", device->id);
1216             device->pending_ops = old->pending_ops;
1217             device->api_registered = TRUE;
1218             old->pending_ops = NULL;
1219             if (device->pending_ops) {
1220                 mainloop_set_trigger(device->work);
1221             }
1222         }
1223         g_hash_table_replace(device_list, device->id, device);
1224 
1225         ndevices = g_hash_table_size(device_list);
1226         crm_notice("Added '%s' to device list (%d active device%s)",
1227                    device->id, ndevices, pcmk__plural_s(ndevices));
1228     }
1229     if (desc) {
1230         *desc = device->id;
1231     }
1232 
1233     if (from_cib) {
1234         device->cib_registered = TRUE;
1235     } else {
1236         device->api_registered = TRUE;
1237     }
1238 
1239     return pcmk_ok;
1240 }
1241 
1242 int
1243 stonith_device_remove(const char *id, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1244 {
1245     stonith_device_t *device = g_hash_table_lookup(device_list, id);
1246     guint ndevices = 0;
1247 
1248     if (!device) {
1249         ndevices = g_hash_table_size(device_list);
1250         crm_info("Device '%s' not found (%d active device%s)",
1251                  id, ndevices, pcmk__plural_s(ndevices));
1252         return pcmk_ok;
1253     }
1254 
1255     if (from_cib) {
1256         device->cib_registered = FALSE;
1257     } else {
1258         device->verified = FALSE;
1259         device->api_registered = FALSE;
1260     }
1261 
1262     if (!device->cib_registered && !device->api_registered) {
1263         g_hash_table_remove(device_list, id);
1264         ndevices = g_hash_table_size(device_list);
1265         crm_info("Removed '%s' from device list (%d active device%s)",
1266                  id, ndevices, pcmk__plural_s(ndevices));
1267     } else {
1268         crm_trace("Not removing '%s' from device list (%d active) because "
1269                   "still registered via:%s%s",
1270                   id, g_hash_table_size(device_list),
1271                   (device->cib_registered? " cib" : ""),
1272                   (device->api_registered? " api" : ""));
1273     }
1274     return pcmk_ok;
1275 }
1276 
1277 /*!
1278  * \internal
1279  * \brief Return the number of stonith levels registered for a node
1280  *
1281  * \param[in] tp  Node's topology table entry
1282  *
1283  * \return Number of non-NULL levels in topology entry
1284  * \note This function is used only for log messages.
1285  */
1286 static int
1287 count_active_levels(stonith_topology_t * tp)
     /* [previous][next][first][last][top][bottom][index][help] */
1288 {
1289     int lpc = 0;
1290     int count = 0;
1291 
1292     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1293         if (tp->levels[lpc] != NULL) {
1294             count++;
1295         }
1296     }
1297     return count;
1298 }
1299 
1300 static void
1301 free_topology_entry(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1302 {
1303     stonith_topology_t *tp = data;
1304 
1305     int lpc = 0;
1306 
1307     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1308         if (tp->levels[lpc] != NULL) {
1309             g_list_free_full(tp->levels[lpc], free);
1310         }
1311     }
1312     free(tp->target);
1313     free(tp->target_value);
1314     free(tp->target_pattern);
1315     free(tp->target_attribute);
1316     free(tp);
1317 }
1318 
1319 void
1320 free_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1321 {
1322     if (topology != NULL) {
1323         g_hash_table_destroy(topology);
1324         topology = NULL;
1325     }
1326 }
1327 
1328 void
1329 init_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1330 {
1331     if (topology == NULL) {
1332         topology = pcmk__strkey_table(NULL, free_topology_entry);
1333     }
1334 }
1335 
1336 char *stonith_level_key(xmlNode *level, int mode)
     /* [previous][next][first][last][top][bottom][index][help] */
1337 {
1338     if(mode == -1) {
1339         mode = stonith_level_kind(level);
1340     }
1341 
1342     switch(mode) {
1343         case 0:
1344             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
1345         case 1:
1346             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1347         case 2:
1348             {
1349                 const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1350                 const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);
1351 
1352                 if(name && value) {
1353                     return crm_strdup_printf("%s=%s", name, value);
1354                 }
1355             }
1356         default:
1357             return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
1358     }
1359 }
1360 
1361 int stonith_level_kind(xmlNode * level)
     /* [previous][next][first][last][top][bottom][index][help] */
1362 {
1363     int mode = 0;
1364     const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);
1365 
1366     if(target == NULL) {
1367         mode++;
1368         target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
1369     }
1370 
1371     if(stand_alone == FALSE && target == NULL) {
1372 
1373         mode++;
1374 
1375         if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
1376             mode++;
1377 
1378         } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
1379             mode++;
1380         }
1381     }
1382 
1383     return mode;
1384 }
1385 
1386 static stonith_key_value_t *
1387 parse_device_list(const char *devices)
     /* [previous][next][first][last][top][bottom][index][help] */
1388 {
1389     int lpc = 0;
1390     int max = 0;
1391     int last = 0;
1392     stonith_key_value_t *output = NULL;
1393 
1394     if (devices == NULL) {
1395         return output;
1396     }
1397 
1398     max = strlen(devices);
1399     for (lpc = 0; lpc <= max; lpc++) {
1400         if (devices[lpc] == ',' || devices[lpc] == 0) {
1401             char *line = strndup(devices + last, lpc - last);
1402 
1403             output = stonith_key_value_add(output, NULL, line);
1404             free(line);
1405 
1406             last = lpc + 1;
1407         }
1408     }
1409 
1410     return output;
1411 }
1412 
1413 /*!
1414  * \internal
1415  * \brief Register a STONITH level for a target
1416  *
1417  * Given an XML request specifying the target name, level index, and device IDs
1418  * for the level, this will create an entry for the target in the global topology
1419  * table if one does not already exist, then append the specified device IDs to
1420  * the entry's device list for the specified level.
1421  *
1422  * \param[in]  msg   XML request for STONITH level registration
1423  * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
1424  *
1425  * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
1426  */
1427 int
1428 stonith_level_register(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1429 {
1430     int id = 0;
1431     xmlNode *level;
1432     int mode;
1433     char *target;
1434 
1435     stonith_topology_t *tp;
1436     stonith_key_value_t *dIter = NULL;
1437     stonith_key_value_t *devices = NULL;
1438 
1439     /* Allow the XML here to point to the level tag directly, or wrapped in
1440      * another tag. If directly, don't search by xpath, because it might give
1441      * multiple hits (e.g. if the XML is the CIB).
1442      */
1443     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
1444         level = msg;
1445     } else {
1446         level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1447     }
1448     CRM_CHECK(level != NULL, return -EINVAL);
1449 
1450     mode = stonith_level_kind(level);
1451     target = stonith_level_key(level, mode);
1452     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1453 
1454     if (desc) {
1455         *desc = crm_strdup_printf("%s[%d]", target, id);
1456     }
1457 
1458     /* Sanity-check arguments */
1459     if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
1460         crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
1461         free(target);
1462         crm_log_xml_err(level, "Bad topology");
1463         return -EINVAL;
1464     }
1465 
1466     /* Find or create topology table entry */
1467     tp = g_hash_table_lookup(topology, target);
1468     if (tp == NULL) {
1469         tp = calloc(1, sizeof(stonith_topology_t));
1470         tp->kind = mode;
1471         tp->target = target;
1472         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
1473         tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1474         tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1475 
1476         g_hash_table_replace(topology, tp->target, tp);
1477         crm_trace("Added %s (%d) to the topology (%d active entries)",
1478                   target, mode, g_hash_table_size(topology));
1479     } else {
1480         free(target);
1481     }
1482 
1483     if (tp->levels[id] != NULL) {
1484         crm_info("Adding to the existing %s[%d] topology entry",
1485                  tp->target, id);
1486     }
1487 
1488     devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
1489     for (dIter = devices; dIter; dIter = dIter->next) {
1490         const char *device = dIter->value;
1491 
1492         crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
1493         tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
1494     }
1495     stonith_key_value_freeall(devices, 1, 1);
1496 
1497     {
1498         int nlevels = count_active_levels(tp);
1499 
1500         crm_info("Target %s has %d active fencing level%s",
1501                  tp->target, nlevels, pcmk__plural_s(nlevels));
1502     }
1503     return pcmk_ok;
1504 }
1505 
1506 int
1507 stonith_level_remove(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1508 {
1509     int id = 0;
1510     stonith_topology_t *tp;
1511     char *target;
1512 
1513     /* Unlike additions, removal requests should always have one level tag */
1514     xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1515 
1516     CRM_CHECK(level != NULL, return -EINVAL);
1517 
1518     target = stonith_level_key(level, -1);
1519     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1520     if (desc) {
1521         *desc = crm_strdup_printf("%s[%d]", target, id);
1522     }
1523 
1524     /* Sanity-check arguments */
1525     if (id >= ST_LEVEL_MAX) {
1526         free(target);
1527         return -EINVAL;
1528     }
1529 
1530     tp = g_hash_table_lookup(topology, target);
1531     if (tp == NULL) {
1532         guint nentries = g_hash_table_size(topology);
1533 
1534         crm_info("No fencing topology found for %s (%d active %s)",
1535                  target, nentries,
1536                  pcmk__plural_alt(nentries, "entry", "entries"));
1537 
1538     } else if (id == 0 && g_hash_table_remove(topology, target)) {
1539         guint nentries = g_hash_table_size(topology);
1540 
1541         crm_info("Removed all fencing topology entries related to %s "
1542                  "(%d active %s remaining)", target, nentries,
1543                  pcmk__plural_alt(nentries, "entry", "entries"));
1544 
1545     } else if (id > 0 && tp->levels[id] != NULL) {
1546         guint nlevels;
1547 
1548         g_list_free_full(tp->levels[id], free);
1549         tp->levels[id] = NULL;
1550 
1551         nlevels = count_active_levels(tp);
1552         crm_info("Removed level %d from fencing topology for %s "
1553                  "(%d active level%s remaining)",
1554                  id, target, nlevels, pcmk__plural_s(nlevels));
1555     }
1556 
1557     free(target);
1558     return pcmk_ok;
1559 }
1560 
1561 /*!
1562  * \internal
1563  * \brief Schedule an (asynchronous) action directly on a stonith device
1564  *
1565  * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1566  * directly on a specified device. Only list, monitor, and status actions are
1567  * expected to use this call, though it should work with any agent command.
1568  *
1569  * \param[in]  msg     API message XML with desired action
1570  * \param[out] output  Unused
1571  *
1572  * \return -EINPROGRESS on success, -errno otherwise
1573  * \note If the action is monitor, the device must be registered via the API
1574  *       (CIB registration is not sufficient), because monitor should not be
1575  *       possible unless the device is "started" (API registered).
1576  */
1577 static int
1578 stonith_device_action(xmlNode * msg, char **output)
     /* [previous][next][first][last][top][bottom][index][help] */
1579 {
1580     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
1581     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
1582     const char *id = crm_element_value(dev, F_STONITH_DEVICE);
1583     const char *action = crm_element_value(op, F_STONITH_ACTION);
1584     async_command_t *cmd = NULL;
1585     stonith_device_t *device = NULL;
1586 
1587     if ((id == NULL) || (action == NULL)) {
1588         crm_info("Malformed API action request: device %s, action %s",
1589                  (id? id : "not specified"),
1590                  (action? action : "not specified"));
1591         return -EPROTO;
1592     }
1593 
1594     device = g_hash_table_lookup(device_list, id);
1595     if ((device == NULL)
1596         || (!device->api_registered && !strcmp(action, "monitor"))) {
1597 
1598         // Monitors may run only on "started" (API-registered) devices
1599         crm_info("Ignoring API '%s' action request because device %s not found",
1600                  action, id);
1601         return -ENODEV;
1602     }
1603 
1604     cmd = create_async_command(msg);
1605     if (cmd == NULL) {
1606         return -EPROTO;
1607     }
1608 
1609     schedule_stonith_command(cmd, device);
1610     return -EINPROGRESS;
1611 }
1612 
1613 static void
1614 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
     /* [previous][next][first][last][top][bottom][index][help] */
1615 {
1616     search->replies_received++;
1617 
1618     if (can_fence && device) {
1619         search->capable = g_list_append(search->capable, strdup(device));
1620     }
1621 
1622     if (search->replies_needed == search->replies_received) {
1623 
1624         guint ndevices = g_list_length(search->capable);
1625 
1626         crm_debug("Search found %d device%s that can perform '%s' targeting %s",
1627                   ndevices, pcmk__plural_s(ndevices),
1628                   (search->action? search->action : "unknown action"),
1629                   (search->host? search->host : "any node"));
1630 
1631         search->callback(search->capable, search->user_data);
1632         free(search->host);
1633         free(search->action);
1634         free(search);
1635     }
1636 }
1637 
1638 /*!
1639  * \internal
1640  * \brief Check whether the local host is allowed to execute a fencing action
1641  *
1642  * \param[in] device         Fence device to check
1643  * \param[in] action         Fence action to check
1644  * \param[in] target         Hostname of fence target
1645  * \param[in] allow_suicide  Whether self-fencing is allowed for this operation
1646  *
1647  * \return TRUE if local host is allowed to execute action, FALSE otherwise
1648  */
1649 static gboolean
1650 localhost_is_eligible(const stonith_device_t *device, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1651                       const char *target, gboolean allow_suicide)
1652 {
1653     gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
1654                                                 pcmk__str_casei);
1655 
1656     if (device && action && device->on_target_actions
1657         && strstr(device->on_target_actions, action)) {
1658         if (!localhost_is_target) {
1659             crm_trace("Operation '%s' using %s can only be executed for "
1660                       "local host, not %s", action, device->id, target);
1661             return FALSE;
1662         }
1663 
1664     } else if (localhost_is_target && !allow_suicide) {
1665         crm_trace("'%s' operation does not support self-fencing", action);
1666         return FALSE;
1667     }
1668     return TRUE;
1669 }
1670 
1671 static void
1672 can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
     /* [previous][next][first][last][top][bottom][index][help] */
1673 {
1674     gboolean can = FALSE;
1675     const char *check_type = NULL;
1676     const char *host = search->host;
1677     const char *alias = NULL;
1678 
1679     CRM_LOG_ASSERT(dev != NULL);
1680 
1681     if (dev == NULL) {
1682         goto search_report_results;
1683     } else if (host == NULL) {
1684         can = TRUE;
1685         goto search_report_results;
1686     }
1687 
1688     /* Short-circuit query if this host is not allowed to perform the action */
1689     if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) {
1690         /* A "reboot" *might* get remapped to "off" then "on", so short-circuit
1691          * only if all three are disallowed. If only one or two are disallowed,
1692          * we'll report that with the results. We never allow suicide for
1693          * remapped "on" operations because the host is off at that point.
1694          */
1695         if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
1696             && !localhost_is_eligible(dev, "off", host, search->allow_suicide)
1697             && !localhost_is_eligible(dev, "on", host, FALSE)) {
1698             goto search_report_results;
1699         }
1700     } else if (!localhost_is_eligible(dev, search->action, host,
1701                                       search->allow_suicide)) {
1702         goto search_report_results;
1703     }
1704 
1705     alias = g_hash_table_lookup(dev->aliases, host);
1706     if (alias == NULL) {
1707         alias = host;
1708     }
1709 
1710     check_type = target_list_type(dev);
1711 
1712     if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) {
1713         can = TRUE;
1714 
1715     } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
1716 
1717         /* Presence in the hostmap is sufficient
1718          * Only use if all hosts on which the device can be active can always fence all listed hosts
1719          */
1720 
1721         if (string_in_list(dev->targets, host)) {
1722             can = TRUE;
1723         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
1724                    && g_hash_table_lookup(dev->aliases, host)) {
1725             can = TRUE;
1726         }
1727 
1728     } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
1729         time_t now = time(NULL);
1730 
1731         if (dev->targets == NULL || dev->targets_age + 60 < now) {
1732             crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1733                       check_type, dev->id, search->host, search->action);
1734 
1735             schedule_internal_command(__func__, dev, "list", NULL,
1736                                       search->per_device_timeout, search, dynamic_list_search_cb);
1737 
1738             /* we'll respond to this search request async in the cb */
1739             return;
1740         }
1741 
1742         if (string_in_list(dev->targets, alias)) {
1743             can = TRUE;
1744         }
1745 
1746     } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
1747         crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1748                   check_type, dev->id, search->host, search->action);
1749         schedule_internal_command(__func__, dev, "status", search->host,
1750                                   search->per_device_timeout, search, status_search_cb);
1751         /* we'll respond to this search request async in the cb */
1752         return;
1753     } else {
1754         crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
1755         check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
1756     }
1757 
1758     if (pcmk__str_eq(host, alias, pcmk__str_casei)) {
1759         crm_notice("%s is%s eligible to fence (%s) %s: %s",
1760                    dev->id, (can? "" : " not"), search->action, host,
1761                    check_type);
1762     } else {
1763         crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s",
1764                    dev->id, (can? "" : " not"), search->action, host, alias,
1765                    check_type);
1766     }
1767 
1768   search_report_results:
1769     search_devices_record_result(search, dev ? dev->id : NULL, can);
1770 }
1771 
1772 static void
1773 search_devices(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1774 {
1775     stonith_device_t *dev = value;
1776     struct device_search_s *search = user_data;
1777 
1778     can_fence_host_with_device(dev, search);
1779 }
1780 
1781 #define DEFAULT_QUERY_TIMEOUT 20
1782 static void
1783 get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
     /* [previous][next][first][last][top][bottom][index][help] */
1784                     void (*callback) (GList * devices, void *user_data))
1785 {
1786     struct device_search_s *search;
1787     int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1788     int devices_needing_async_query = 0;
1789     char *key = NULL;
1790     const char *check_type = NULL;
1791     GHashTableIter gIter;
1792     stonith_device_t *device = NULL;
1793     guint ndevices = g_hash_table_size(device_list);
1794 
1795     if (ndevices == 0) {
1796         callback(NULL, user_data);
1797         return;
1798     }
1799 
1800     search = calloc(1, sizeof(struct device_search_s));
1801     if (!search) {
1802         callback(NULL, user_data);
1803         return;
1804     }
1805 
1806     g_hash_table_iter_init(&gIter, device_list);
1807     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) {
1808         check_type = target_list_type(device);
1809         if (pcmk__strcase_any_of(check_type, "status", "dynamic-list", NULL)) {
1810             devices_needing_async_query++;
1811         }
1812     }
1813 
1814     /* If we have devices that require an async event in order to know what
1815      * nodes they can fence, we have to give the events a timeout. The total
1816      * query timeout is divided among those events. */
1817     if (devices_needing_async_query) {
1818         per_device_timeout = timeout / devices_needing_async_query;
1819         if (!per_device_timeout) {
1820             crm_err("Fencing timeout %ds is too low; using %ds, "
1821                     "but consider raising to at least %ds",
1822                     timeout, DEFAULT_QUERY_TIMEOUT,
1823                     DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1824             per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1825         } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
1826             crm_notice("Fencing timeout %ds is low for the current "
1827                        "configuration; consider raising to at least %ds",
1828                        timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1829         }
1830     }
1831 
1832     search->host = host ? strdup(host) : NULL;
1833     search->action = action ? strdup(action) : NULL;
1834     search->per_device_timeout = per_device_timeout;
1835     /* We are guaranteed this many replies. Even if a device gets
1836      * unregistered some how during the async search, we will get
1837      * the correct number of replies. */
1838     search->replies_needed = ndevices;
1839     search->allow_suicide = suicide;
1840     search->callback = callback;
1841     search->user_data = user_data;
1842     /* kick off the search */
1843 
1844     crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
1845               ndevices, pcmk__plural_s(ndevices),
1846               (search->action? search->action : "unknown action"),
1847               (search->host? search->host : "any node"));
1848     g_hash_table_foreach(device_list, search_devices, search);
1849 }
1850 
1851 struct st_query_data {
1852     xmlNode *reply;
1853     char *remote_peer;
1854     char *client_id;
1855     char *target;
1856     char *action;
1857     int call_options;
1858 };
1859 
1860 /*!
1861  * \internal
1862  * \brief Add action-specific attributes to query reply XML
1863  *
1864  * \param[in,out] xml     XML to add attributes to
1865  * \param[in]     action  Fence action
1866  * \param[in]     device  Fence device
1867  */
1868 static void
1869 add_action_specific_attributes(xmlNode *xml, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1870                                stonith_device_t *device)
1871 {
1872     int action_specific_timeout;
1873     int delay_max;
1874     int delay_base;
1875 
1876     CRM_CHECK(xml && action && device, return);
1877 
1878     if (is_action_required(action, device)) {
1879         crm_trace("Action '%s' is required using %s", action, device->id);
1880         crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
1881     }
1882 
1883     action_specific_timeout = get_action_timeout(device, action, 0);
1884     if (action_specific_timeout) {
1885         crm_trace("Action '%s' has timeout %dms using %s",
1886                   action, action_specific_timeout, device->id);
1887         crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
1888     }
1889 
1890     delay_max = get_action_delay_max(device, action);
1891     if (delay_max > 0) {
1892         crm_trace("Action '%s' has maximum random delay %dms using %s",
1893                   action, delay_max, device->id);
1894         crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
1895     }
1896 
1897     delay_base = get_action_delay_base(device, action);
1898     if (delay_base > 0) {
1899         crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
1900     }
1901 
1902     if ((delay_max > 0) && (delay_base == 0)) {
1903         crm_trace("Action '%s' has maximum random delay %dms using %s",
1904                   action, delay_max, device->id);
1905     } else if ((delay_max == 0) && (delay_base > 0)) {
1906         crm_trace("Action '%s' has a static delay of %dms using %s",
1907                   action, delay_base, device->id);
1908     } else if ((delay_max > 0) && (delay_base > 0)) {
1909         crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen "
1910                   "maximum delay of %dms using %s",
1911                   action, delay_base, delay_max, device->id);
1912     }
1913 }
1914 
1915 /*!
1916  * \internal
1917  * \brief Add "disallowed" attribute to query reply XML if appropriate
1918  *
1919  * \param[in,out] xml            XML to add attribute to
1920  * \param[in]     action         Fence action
1921  * \param[in]     device         Fence device
1922  * \param[in]     target         Fence target
1923  * \param[in]     allow_suicide  Whether self-fencing is allowed
1924  */
1925 static void
1926 add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1927                const char *target, gboolean allow_suicide)
1928 {
1929     if (!localhost_is_eligible(device, action, target, allow_suicide)) {
1930         crm_trace("Action '%s' using %s is disallowed for local host",
1931                   action, device->id);
1932         crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
1933     }
1934 }
1935 
1936 /*!
1937  * \internal
1938  * \brief Add child element with action-specific values to query reply XML
1939  *
1940  * \param[in,out] xml            XML to add attribute to
1941  * \param[in]     action         Fence action
1942  * \param[in]     device         Fence device
1943  * \param[in]     target         Fence target
1944  * \param[in]     allow_suicide  Whether self-fencing is allowed
1945  */
1946 static void
1947 add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1948                const char *target, gboolean allow_suicide)
1949 {
1950     xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
1951 
1952     crm_xml_add(child, XML_ATTR_ID, action);
1953     add_action_specific_attributes(child, action, device);
1954     add_disallowed(child, action, device, target, allow_suicide);
1955 }
1956 
1957 static void
1958 stonith_query_capable_device_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1959 {
1960     struct st_query_data *query = user_data;
1961     int available_devices = 0;
1962     xmlNode *dev = NULL;
1963     xmlNode *list = NULL;
1964     GList *lpc = NULL;
1965 
1966     /* Pack the results into XML */
1967     list = create_xml_node(NULL, __func__);
1968     crm_xml_add(list, F_STONITH_TARGET, query->target);
1969     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
1970         stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
1971         const char *action = query->action;
1972 
1973         if (!device) {
1974             /* It is possible the device got unregistered while
1975              * determining who can fence the target */
1976             continue;
1977         }
1978 
1979         available_devices++;
1980 
1981         dev = create_xml_node(list, F_STONITH_DEVICE);
1982         crm_xml_add(dev, XML_ATTR_ID, device->id);
1983         crm_xml_add(dev, "namespace", device->namespace);
1984         crm_xml_add(dev, "agent", device->agent);
1985         crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
1986 
1987         /* If the originating fencer wants to reboot the node, and we have a
1988          * capable device that doesn't support "reboot", remap to "off" instead.
1989          */
1990         if (!pcmk_is_set(device->flags, st_device_supports_reboot)
1991             && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
1992             crm_trace("%s doesn't support reboot, using values for off instead",
1993                       device->id);
1994             action = "off";
1995         }
1996 
1997         /* Add action-specific values if available */
1998         add_action_specific_attributes(dev, action, device);
1999         if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
2000             /* A "reboot" *might* get remapped to "off" then "on", so after
2001              * sending the "reboot"-specific values in the main element, we add
2002              * sub-elements for "off" and "on" values.
2003              *
2004              * We short-circuited earlier if "reboot", "off" and "on" are all
2005              * disallowed for the local host. However if only one or two are
2006              * disallowed, we send back the results and mark which ones are
2007              * disallowed. If "reboot" is disallowed, this might cause problems
2008              * with older fencer versions, which won't check for it. Older
2009              * versions will ignore "off" and "on", so they are not a problem.
2010              */
2011             add_disallowed(dev, action, device, query->target,
2012                            pcmk_is_set(query->call_options, st_opt_allow_suicide));
2013             add_action_reply(dev, "off", device, query->target,
2014                              pcmk_is_set(query->call_options, st_opt_allow_suicide));
2015             add_action_reply(dev, "on", device, query->target, FALSE);
2016         }
2017 
2018         /* A query without a target wants device parameters */
2019         if (query->target == NULL) {
2020             xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
2021 
2022             g_hash_table_foreach(device->params, hash2field, attrs);
2023         }
2024     }
2025 
2026     crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
2027     if (query->target) {
2028         crm_debug("Found %d matching device%s for target '%s'",
2029                   available_devices, pcmk__plural_s(available_devices),
2030                   query->target);
2031     } else {
2032         crm_debug("%d device%s installed",
2033                   available_devices, pcmk__plural_s(available_devices));
2034     }
2035 
2036     if (list != NULL) {
2037         crm_log_xml_trace(list, "Add query results");
2038         add_message_xml(query->reply, F_STONITH_CALLDATA, list);
2039     }
2040     stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
2041 
2042     free_xml(query->reply);
2043     free(query->remote_peer);
2044     free(query->client_id);
2045     free(query->target);
2046     free(query->action);
2047     free(query);
2048     free_xml(list);
2049     g_list_free_full(devices, free);
2050 }
2051 
2052 static void
2053 stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options)
     /* [previous][next][first][last][top][bottom][index][help] */
2054 {
2055     struct st_query_data *query = NULL;
2056     const char *action = NULL;
2057     const char *target = NULL;
2058     int timeout = 0;
2059     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
2060 
2061     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
2062     if (dev) {
2063         const char *device = crm_element_value(dev, F_STONITH_DEVICE);
2064 
2065         target = crm_element_value(dev, F_STONITH_TARGET);
2066         action = crm_element_value(dev, F_STONITH_ACTION);
2067         if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
2068             /* No query or reply necessary */
2069             return;
2070         }
2071     }
2072 
2073     crm_log_xml_debug(msg, "Query");
2074     query = calloc(1, sizeof(struct st_query_data));
2075 
2076     query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
2077     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
2078     query->client_id = client_id ? strdup(client_id) : NULL;
2079     query->target = target ? strdup(target) : NULL;
2080     query->action = action ? strdup(action) : NULL;
2081     query->call_options = call_options;
2082 
2083     get_capable_devices(target, action, timeout,
2084                         pcmk_is_set(call_options, st_opt_allow_suicide),
2085                         query, stonith_query_capable_device_cb);
2086 }
2087 
2088 #define ST_LOG_OUTPUT_MAX 512
2089 static void
2090 log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged)
     /* [previous][next][first][last][top][bottom][index][help] */
2091 {
2092     if (rc == 0) {
2093         next = NULL;
2094     }
2095 
2096     if (cmd->victim != NULL) {
2097         do_crm_log(((rc == 0)? LOG_NOTICE : LOG_ERR),
2098                    "Operation '%s' [%d] (%scall %d from %s) targeting %s "
2099                    "using %s returned %d (%s)%s%s",
2100                    cmd->action, pid, (op_merged? "merged " : ""), cmd->id,
2101                    cmd->client_name, cmd->victim,
2102                    cmd->device, rc, pcmk_strerror(rc),
2103                    (next? ", retrying with " : ""), (next ? next : ""));
2104     } else {
2105         do_crm_log_unlikely(((rc == 0)? LOG_DEBUG : LOG_NOTICE),
2106                             "Operation '%s' [%d]%s using %s returned %d (%s)%s%s",
2107                             cmd->action, pid, (op_merged? " (merged)" : ""),
2108                             cmd->device, rc, pcmk_strerror(rc),
2109                             (next? ", retrying with " : ""), (next ? next : ""));
2110     }
2111 
2112     if (output) {
2113         // Output may have multiple lines
2114         char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
2115 
2116         crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
2117         free(prefix);
2118     }
2119 }
2120 
2121 static void
2122 stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
2123 {
2124     xmlNode *reply = NULL;
2125     gboolean bcast = FALSE;
2126 
2127     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
2128 
2129     if (pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
2130         /* Too verbose to log */
2131         crm_trace("Metadata query for %s", cmd->device);
2132         output = NULL;
2133 
2134     } else if (pcmk__str_any_of(cmd->action, "monitor", "list", "status", NULL)) {
2135         crm_trace("Never broadcast '%s' replies", cmd->action);
2136 
2137     } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) {
2138         crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
2139         crm_xml_add(reply, F_SUBTYPE, "broadcast");
2140         bcast = TRUE;
2141     }
2142 
2143     log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE));
2144     crm_log_xml_trace(reply, "Reply");
2145 
2146     if (options & st_reply_opt_merged) {
2147         crm_xml_add(reply, F_STONITH_MERGED, "true");
2148     }
2149 
2150     if (bcast) {
2151         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
2152         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
2153 
2154     } else if (cmd->origin) {
2155         crm_trace("Directed reply to %s", cmd->origin);
2156         send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
2157 
2158     } else {
2159         crm_trace("Directed local %ssync reply to %s",
2160                   (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
2161         do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
2162     }
2163 
2164     if (stand_alone) {
2165         /* Do notification with a clean data object */
2166         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
2167 
2168         crm_xml_add_int(notify_data, F_STONITH_RC, rc);
2169         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
2170         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
2171         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
2172         crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
2173         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2174         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
2175 
2176         do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
2177         do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
2178     }
2179 
2180     free_xml(reply);
2181 }
2182 
2183 static void
2184 cancel_stonith_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
2185 {
2186     stonith_device_t *device;
2187 
2188     CRM_CHECK(cmd != NULL, return);
2189 
2190     if (!cmd->device) {
2191         return;
2192     }
2193 
2194     device = g_hash_table_lookup(device_list, cmd->device);
2195 
2196     if (device) {
2197         crm_trace("Cancel scheduled '%s' action using %s",
2198                   cmd->action, device->id);
2199         device->pending_ops = g_list_remove(device->pending_ops, cmd);
2200     }
2201 }
2202 
2203 static void
2204 st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2205 {
2206     stonith_device_t *device = NULL;
2207     stonith_device_t *next_device = NULL;
2208     async_command_t *cmd = user_data;
2209 
2210     GList *gIter = NULL;
2211     GList *gIterNext = NULL;
2212 
2213     CRM_CHECK(cmd != NULL, return);
2214 
2215     cmd->active_on = NULL;
2216 
2217     /* The device is ready to do something else now */
2218     device = g_hash_table_lookup(device_list, cmd->device);
2219     if (device) {
2220         if (!device->verified && (rc == pcmk_ok) &&
2221             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
2222 
2223             device->verified = TRUE;
2224         }
2225 
2226         mainloop_set_trigger(device->work);
2227     }
2228 
2229     crm_debug("Operation '%s' using %s returned %d (%d devices remaining)",
2230               cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
2231 
2232     if (rc == 0) {
2233         GList *iter;
2234         /* see if there are any required devices left to execute for this op */
2235         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
2236             next_device = g_hash_table_lookup(device_list, iter->data);
2237 
2238             if (next_device != NULL && is_action_required(cmd->action, next_device)) {
2239                 cmd->device_next = iter->next;
2240                 break;
2241             }
2242             next_device = NULL;
2243         }
2244 
2245     } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
2246         /* if this device didn't work out, see if there are any others we can try.
2247          * if the failed device was 'required', we can't pick another device. */
2248         next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
2249         cmd->device_next = cmd->device_next->next;
2250     }
2251 
2252     /* this operation requires more fencing, hooray! */
2253     if (next_device) {
2254         log_operation(cmd, rc, pid, next_device->id, output, FALSE);
2255 
2256         schedule_stonith_command(cmd, next_device);
2257         /* Prevent cmd from being freed */
2258         cmd = NULL;
2259         goto done;
2260     }
2261 
2262     stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none);
2263 
2264     if (rc != 0) {
2265         goto done;
2266     }
2267 
2268     /* Check to see if any operations are scheduled to do the exact
2269      * same thing that just completed.  If so, rather than
2270      * performing the same fencing operation twice, return the result
2271      * of this operation for all pending commands it matches. */
2272     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
2273         async_command_t *cmd_other = gIter->data;
2274 
2275         gIterNext = gIter->next;
2276 
2277         if (cmd == cmd_other) {
2278             continue;
2279         }
2280 
2281         /* A pending scheduled command matches the command that just finished if.
2282          * 1. The client connections are different.
2283          * 2. The node victim is the same.
2284          * 3. The fencing action is the same.
2285          * 4. The device scheduled to execute the action is the same.
2286          */
2287         if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2288             !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) ||
2289             !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) ||
2290             !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2291 
2292             continue;
2293         }
2294 
2295         /* Duplicate merging will do the right thing for either type of remapped
2296          * reboot. If the executing fencer remapped an unsupported reboot to
2297          * off, then cmd->action will be reboot and will be merged with any
2298          * other reboot requests. If the originating fencer remapped a
2299          * topology reboot to off then on, we will get here once with
2300          * cmd->action "off" and once with "on", and they will be merged
2301          * separately with similar requests.
2302          */
2303         crm_notice("Merging fencing action '%s' targeting %s originating from "
2304                    "client %s with identical fencing request from client %s",
2305                    cmd_other->action, cmd_other->victim, cmd_other->client_name,
2306                    cmd->client_name);
2307 
2308         cmd_list = g_list_remove_link(cmd_list, gIter);
2309 
2310         stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged);
2311         cancel_stonith_command(cmd_other);
2312 
2313         free_async_command(cmd_other);
2314         g_list_free_1(gIter);
2315     }
2316 
2317   done:
2318     free_async_command(cmd);
2319 }
2320 
2321 static gint
2322 sort_device_priority(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
2323 {
2324     const stonith_device_t *dev_a = a;
2325     const stonith_device_t *dev_b = b;
2326 
2327     if (dev_a->priority > dev_b->priority) {
2328         return -1;
2329     } else if (dev_a->priority < dev_b->priority) {
2330         return 1;
2331     }
2332     return 0;
2333 }
2334 
2335 static void
2336 stonith_fence_get_devices_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2337 {
2338     async_command_t *cmd = user_data;
2339     stonith_device_t *device = NULL;
2340     guint ndevices = g_list_length(devices);
2341 
2342     crm_info("Found %d matching device%s for target '%s'",
2343              ndevices, pcmk__plural_s(ndevices), cmd->victim);
2344 
2345     if (devices != NULL) {
2346         /* Order based on priority */
2347         devices = g_list_sort(devices, sort_device_priority);
2348         device = g_hash_table_lookup(device_list, devices->data);
2349 
2350         if (device) {
2351             cmd->device_list = devices;
2352             cmd->device_next = devices->next;
2353             devices = NULL;     /* list owned by cmd now */
2354         }
2355     }
2356 
2357     /* we have a device, schedule it for fencing. */
2358     if (device) {
2359         schedule_stonith_command(cmd, device);
2360         /* in progress */
2361         return;
2362     }
2363 
2364     /* no device found! */
2365     stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none);
2366 
2367     free_async_command(cmd);
2368     g_list_free_full(devices, free);
2369 }
2370 
2371 static int
2372 stonith_fence(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2373 {
2374     const char *device_id = NULL;
2375     stonith_device_t *device = NULL;
2376     async_command_t *cmd = create_async_command(msg);
2377     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
2378 
2379     if (cmd == NULL) {
2380         return -EPROTO;
2381     }
2382 
2383     device_id = crm_element_value(dev, F_STONITH_DEVICE);
2384     if (device_id) {
2385         device = g_hash_table_lookup(device_list, device_id);
2386         if (device == NULL) {
2387             crm_err("Requested device '%s' is not available", device_id);
2388             return -ENODEV;
2389         }
2390         schedule_stonith_command(cmd, device);
2391 
2392     } else {
2393         const char *host = crm_element_value(dev, F_STONITH_TARGET);
2394 
2395         if (cmd->options & st_opt_cs_nodeid) {
2396             int nodeid;
2397             crm_node_t *node;
2398 
2399             pcmk__scan_min_int(host, &nodeid, 0);
2400             node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
2401             if (node) {
2402                 host = node->uname;
2403             }
2404         }
2405 
2406         /* If we get to here, then self-fencing is implicitly allowed */
2407         get_capable_devices(host, cmd->action, cmd->default_timeout,
2408                             TRUE, cmd, stonith_fence_get_devices_cb);
2409     }
2410 
2411     return -EINPROGRESS;
2412 }
2413 
2414 xmlNode *
2415 stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2416 {
2417     xmlNode *reply = NULL;
2418 
2419     reply = create_xml_node(NULL, T_STONITH_REPLY);
2420 
2421     crm_xml_add(reply, "st_origin", __func__);
2422     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2423     crm_xml_add(reply, "st_output", output);
2424     crm_xml_add_int(reply, F_STONITH_RC, rc);
2425 
2426     if (request == NULL) {
2427         /* Most likely, this is the result of a stonith operation that was
2428          * initiated before we came up. Unfortunately that means we lack enough
2429          * information to provide clients with a full result.
2430          *
2431          * @TODO Maybe synchronize this information at start-up?
2432          */
2433         crm_warn("Missing request information for client notifications for "
2434                  "operation with result %d (initiated before we came up?)", rc);
2435 
2436     } else {
2437         const char *name = NULL;
2438         const char *value = NULL;
2439 
2440         const char *names[] = {
2441             F_STONITH_OPERATION,
2442             F_STONITH_CALLID,
2443             F_STONITH_CLIENTID,
2444             F_STONITH_CLIENTNAME,
2445             F_STONITH_REMOTE_OP_ID,
2446             F_STONITH_CALLOPTS
2447         };
2448 
2449         crm_trace("Creating a result reply with%s reply output (rc=%d)",
2450                   (data? "" : "out"), rc);
2451         for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
2452             name = names[lpc];
2453             value = crm_element_value(request, name);
2454             crm_xml_add(reply, name, value);
2455         }
2456         if (data != NULL) {
2457             add_message_xml(reply, F_STONITH_CALLDATA, data);
2458         }
2459     }
2460     return reply;
2461 }
2462 
2463 static xmlNode *
2464 stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2465 {
2466     xmlNode *reply = NULL;
2467 
2468     crm_trace("Creating a basic reply");
2469     reply = create_xml_node(NULL, T_STONITH_REPLY);
2470 
2471     crm_xml_add(reply, "st_origin", __func__);
2472     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2473 
2474     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
2475     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
2476     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2477     crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
2478     crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
2479     crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
2480     crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
2481     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
2482     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
2483     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
2484 
2485     crm_xml_add_int(reply, F_STONITH_RC, rc);
2486 
2487     crm_xml_add(reply, "st_output", output);
2488 
2489     if (data != NULL) {
2490         crm_info("Attaching reply output");
2491         add_message_xml(reply, F_STONITH_CALLDATA, data);
2492     }
2493     return reply;
2494 }
2495 
2496 bool fencing_peer_active(crm_node_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2497 {
2498     if (peer == NULL) {
2499         return FALSE;
2500     } else if (peer->uname == NULL) {
2501         return FALSE;
2502     } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
2503         return TRUE;
2504     }
2505     return FALSE;
2506 }
2507 
2508 /*!
2509  * \internal
2510  * \brief Determine if we need to use an alternate node to
2511  * fence the target. If so return that node's uname
2512  *
2513  * \retval NULL, no alternate host
2514  * \retval uname, uname of alternate host to use
2515  */
2516 static const char *
2517 check_alternate_host(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
2518 {
2519     const char *alternate_host = NULL;
2520 
2521     crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
2522     if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2523         GHashTableIter gIter;
2524         crm_node_t *entry = NULL;
2525 
2526         g_hash_table_iter_init(&gIter, crm_peer_cache);
2527         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2528             crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
2529             if (fencing_peer_active(entry)
2530                 && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
2531                 alternate_host = entry->uname;
2532                 break;
2533             }
2534         }
2535         if (alternate_host == NULL) {
2536             crm_err("No alternate host available to handle request "
2537                     "for self-fencing with topology");
2538             g_hash_table_iter_init(&gIter, crm_peer_cache);
2539             while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2540                 crm_notice("Peer[%d] %s", entry->id, entry->uname);
2541             }
2542         }
2543     }
2544 
2545     return alternate_host;
2546 }
2547 
2548 static void
2549 stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
     /* [previous][next][first][last][top][bottom][index][help] */
2550                    const char *client_id)
2551 {
2552     if (remote_peer) {
2553         send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
2554     } else {
2555         do_local_reply(reply, client_id,
2556                        pcmk_is_set(call_options, st_opt_sync_call),
2557                        (remote_peer != NULL));
2558     }
2559 }
2560 
2561 static void 
2562 remove_relay_op(xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
2563 {
2564     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
2565     const char *relay_op_id = NULL; 
2566     const char *op_id = NULL;
2567     const char *client_name = NULL;
2568     const char *target = NULL; 
2569     remote_fencing_op_t *relay_op = NULL; 
2570 
2571     if (dev) { 
2572         target = crm_element_value(dev, F_STONITH_TARGET); 
2573     }
2574 
2575     relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
2576     op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
2577     client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
2578 
2579     /* Delete RELAY operation. */
2580     if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2581         relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
2582 
2583         if (relay_op) {
2584             GHashTableIter iter;
2585             remote_fencing_op_t *list_op = NULL; 
2586             g_hash_table_iter_init(&iter, stonith_remote_op_list);
2587 
2588             /* If the operation to be deleted is registered as a duplicate, delete the registration. */
2589             while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
2590                 GList *dup_iter = NULL;
2591                 if (list_op != relay_op) {
2592                     for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
2593                         remote_fencing_op_t *other = dup_iter->data;
2594                         if (other == relay_op) {
2595                             other->duplicates = g_list_remove(other->duplicates, relay_op);
2596                             break;
2597                         }
2598                     }
2599                 }
2600             }
2601             crm_debug("Deleting relay op %s ('%s' targeting %s for %s), "
2602                       "replaced by op %s ('%s' targeting %s for %s)",
2603                       relay_op->id, relay_op->action, relay_op->target,
2604                       relay_op->client_name, op_id, relay_op->action, target,
2605                       client_name);
2606 
2607             g_hash_table_remove(stonith_remote_op_list, relay_op_id);
2608         }
2609     }
2610 }
2611 
2612 static int
2613 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2614                xmlNode *request, const char *remote_peer)
2615 {
2616     int call_options = 0;
2617     int rc = -EOPNOTSUPP;
2618 
2619     xmlNode *data = NULL;
2620     xmlNode *reply = NULL;
2621 
2622     char *output = NULL;
2623     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2624     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2625 
2626     /* IPC commands related to fencing configuration may be done only by
2627      * privileged users (i.e. root or hacluster), because all other users should
2628      * go through the CIB to have ACLs applied.
2629      *
2630      * If no client was given, this is a peer request, which is always allowed.
2631      */
2632     bool allowed = (client == NULL)
2633                    || pcmk_is_set(client->flags, pcmk__client_privileged);
2634 
2635     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2636 
2637     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2638         CRM_ASSERT(client == NULL || client->request_id == id);
2639     }
2640 
2641     if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
2642         xmlNode *reply = create_xml_node(NULL, "reply");
2643 
2644         CRM_ASSERT(client);
2645         crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
2646         crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
2647         pcmk__ipc_send_xml(client, id, reply, flags);
2648         client->request_id = 0;
2649         free_xml(reply);
2650         return 0;
2651 
2652     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
2653         rc = stonith_device_action(request, &output);
2654 
2655     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
2656         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
2657         const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2658         int op_timeout = 0;
2659 
2660         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
2661         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
2662         return 0;
2663 
2664     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2665         if (remote_peer) {
2666             create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
2667         }
2668 
2669         /* Delete the DC node RELAY operation. */
2670         remove_relay_op(request);
2671 
2672         stonith_query(request, remote_peer, client_id, call_options);
2673         return 0;
2674 
2675     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2676         const char *flag_name = NULL;
2677 
2678         CRM_ASSERT(client);
2679         flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
2680         if (flag_name) {
2681             crm_debug("Enabling %s callbacks for client %s",
2682                       flag_name, pcmk__client_name(client));
2683             pcmk__set_client_flags(client, get_stonith_flag(flag_name));
2684         }
2685 
2686         flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
2687         if (flag_name) {
2688             crm_debug("Disabling %s callbacks for client %s",
2689                       flag_name, pcmk__client_name(client));
2690             pcmk__clear_client_flags(client, get_stonith_flag(flag_name));
2691         }
2692 
2693         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
2694         return 0;
2695 
2696     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
2697         xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2698 
2699         crm_notice("Received forwarded fencing request from "
2700                    "%s%s to fence (%s) peer %s",
2701                    ((client == NULL)? "peer" : "client"),
2702                    ((client == NULL)? remote_peer : pcmk__client_name(client)),
2703                    crm_element_value(dev, F_STONITH_ACTION),
2704                    crm_element_value(dev, F_STONITH_TARGET));
2705 
2706         if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
2707             rc = -EINPROGRESS;
2708         }
2709 
2710     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2711 
2712         if (remote_peer || stand_alone) {
2713             rc = stonith_fence(request);
2714 
2715         } else if (call_options & st_opt_manual_ack) {
2716             remote_fencing_op_t *rop = NULL;
2717             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2718             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2719 
2720             crm_notice("Received manual confirmation that %s is fenced", target);
2721             rop = initiate_remote_stonith_op(client, request, TRUE);
2722             rc = stonith_manual_ack(request, rop);
2723 
2724         } else {
2725             const char *alternate_host = NULL;
2726             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2727             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2728             const char *action = crm_element_value(dev, F_STONITH_ACTION);
2729             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
2730 
2731             if (client) {
2732                 int tolerance = 0;
2733 
2734                 crm_notice("Client %s wants to fence (%s) %s using %s",
2735                            pcmk__client_name(client), action,
2736                            target, (device? device : "any device"));
2737 
2738                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
2739 
2740                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
2741                     rc = 0;
2742                     goto done;
2743                 }
2744 
2745             } else {
2746                 crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
2747                            remote_peer, action, target, device ? device : "(any)");
2748             }
2749 
2750             alternate_host = check_alternate_host(target);
2751 
2752             if (alternate_host && client) {
2753                 const char *client_id = NULL;
2754                 remote_fencing_op_t *op = NULL;
2755 
2756                 crm_notice("Forwarding self-fencing request to peer %s"
2757                            "due to topology", alternate_host);
2758 
2759                 if (client->id) {
2760                     client_id = client->id;
2761                 } else {
2762                     client_id = crm_element_value(request, F_STONITH_CLIENTID);
2763                 }
2764 
2765                 /* Create an operation for RELAY and send the ID in the RELAY message. */
2766                 /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */
2767                 op = create_remote_stonith_op(client_id, request, FALSE);
2768 
2769                 crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
2770                 crm_xml_add(request, F_STONITH_CLIENTID, client->id);
2771                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
2772                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
2773                                      FALSE);
2774                 rc = -EINPROGRESS;
2775 
2776             } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
2777                 rc = -EINPROGRESS;
2778             }
2779         }
2780 
2781     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
2782         rc = stonith_fence_history(request, &data, remote_peer, call_options);
2783         if (call_options & st_opt_discard_reply) {
2784             /* we don't expect answers to the broadcast
2785              * we might have sent out
2786              */
2787             free_xml(data);
2788             return pcmk_ok;
2789         }
2790 
2791     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
2792         const char *device_id = NULL;
2793 
2794         if (allowed) {
2795             rc = stonith_device_register(request, &device_id, FALSE);
2796         } else {
2797             rc = -EACCES;
2798         }
2799         do_stonith_notify_device(call_options, op, rc, device_id);
2800 
2801     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
2802         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
2803         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
2804 
2805         if (allowed) {
2806             rc = stonith_device_remove(device_id, FALSE);
2807         } else {
2808             rc = -EACCES;
2809         }
2810         do_stonith_notify_device(call_options, op, rc, device_id);
2811 
2812     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
2813         char *device_id = NULL;
2814 
2815         if (allowed) {
2816             rc = stonith_level_register(request, &device_id);
2817         } else {
2818             rc = -EACCES;
2819         }
2820         do_stonith_notify_level(call_options, op, rc, device_id);
2821         free(device_id);
2822 
2823     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
2824         char *device_id = NULL;
2825 
2826         if (allowed) {
2827             rc = stonith_level_remove(request, &device_id);
2828         } else {
2829             rc = -EACCES;
2830         }
2831         do_stonith_notify_level(call_options, op, rc, device_id);
2832 
2833     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
2834         int node_id = 0;
2835         const char *name = NULL;
2836 
2837         crm_element_value_int(request, XML_ATTR_ID, &node_id);
2838         name = crm_element_value(request, XML_ATTR_UNAME);
2839         reap_crm_member(node_id, name);
2840 
2841         return pcmk_ok;
2842 
2843     } else {
2844         crm_err("Unknown IPC request %s from %s %s", op,
2845                 ((client == NULL)? "peer" : "client"),
2846                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
2847     }
2848 
2849   done:
2850 
2851     if (rc == -EACCES) {
2852         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
2853                  crm_str(op), pcmk__client_name(client));
2854     }
2855 
2856     /* Always reply unless the request is in process still.
2857      * If in progress, a reply will happen async after the request
2858      * processing is finished */
2859     if (rc != -EINPROGRESS) {
2860         crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
2861                   id, pcmk_is_set(call_options, st_opt_sync_call), call_options,
2862                   crm_element_value(request, F_STONITH_CALLOPTS));
2863 
2864         if (pcmk_is_set(call_options, st_opt_sync_call)) {
2865             CRM_ASSERT(client == NULL || client->request_id == id);
2866         }
2867         reply = stonith_construct_reply(request, output, data, rc);
2868         stonith_send_reply(reply, call_options, remote_peer, client_id);
2869     }
2870 
2871     free(output);
2872     free_xml(data);
2873     free_xml(reply);
2874 
2875     return rc;
2876 }
2877 
2878 static void
2879 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2880 {
2881     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2882 
2883     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2884         process_remote_stonith_query(request);
2885     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2886         process_remote_stonith_exec(request);
2887     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2888         /* Reply to a complex fencing op */
2889         process_remote_stonith_exec(request);
2890     } else {
2891         crm_err("Unknown %s reply from %s %s", op,
2892                 ((client == NULL)? "peer" : "client"),
2893                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
2894         crm_log_xml_warn(request, "UnknownOp");
2895     }
2896 }
2897 
2898 void
2899 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2900                 xmlNode *request, const char *remote_peer)
2901 {
2902     int call_options = 0;
2903     int rc = 0;
2904     gboolean is_reply = FALSE;
2905 
2906     /* Copy op for reporting. The original might get freed by handle_reply()
2907      * before we use it in crm_debug():
2908      *     handle_reply()
2909      *     |- process_remote_stonith_exec()
2910      *     |-- remote_op_done()
2911      *     |--- handle_local_reply_and_notify()
2912      *     |---- crm_xml_add(...F_STONITH_OPERATION...)
2913      *     |--- free_xml(op->request)
2914      */
2915     char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
2916 
2917     if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
2918         is_reply = TRUE;
2919     }
2920 
2921     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2922     crm_debug("Processing %s%s %u from %s %s with call options 0x%08x",
2923               op, (is_reply? " reply" : ""), id,
2924               ((client == NULL)? "peer" : "client"),
2925               ((client == NULL)? remote_peer : pcmk__client_name(client)),
2926               call_options);
2927 
2928     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2929         CRM_ASSERT(client == NULL || client->request_id == id);
2930     }
2931 
2932     if (is_reply) {
2933         handle_reply(client, request, remote_peer);
2934     } else {
2935         rc = handle_request(client, id, flags, request, remote_peer);
2936     }
2937 
2938     crm_debug("Processed %s%s from %s %s: %s (rc=%d)",
2939               op, (is_reply? " reply" : ""),
2940               ((client == NULL)? "peer" : "client"),
2941               ((client == NULL)? remote_peer : pcmk__client_name(client)),
2942               ((rc > 0)? "" : pcmk_strerror(rc)), rc);
2943     free(op);
2944 }

/* [previous][next][first][last][top][bottom][index][help] */