root/daemons/fenced/fenced_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_action_required
  2. get_action_delay_max
  3. get_action_delay_base
  4. get_action_timeout
  5. free_async_command
  6. create_async_command
  7. get_action_limit
  8. get_active_cmds
  9. fork_cb
  10. stonith_device_execute
  11. stonith_device_dispatch
  12. start_delay_helper
  13. schedule_stonith_command
  14. free_device
  15. free_device_list
  16. init_device_list
  17. build_port_aliases
  18. free_metadata_cache
  19. init_metadata_cache
  20. get_agent_metadata
  21. is_nodeid_required
  22. add_action
  23. read_action_metadata
  24. map_action
  25. xml2device_params
  26. build_device_from_xml
  27. target_list_type
  28. schedule_internal_command
  29. string_in_list
  30. status_search_cb
  31. dynamic_list_search_cb
  32. device_params_diff
  33. device_has_duplicate
  34. stonith_device_register
  35. stonith_device_remove
  36. count_active_levels
  37. free_topology_entry
  38. free_topology_list
  39. init_topology_list
  40. stonith_level_key
  41. stonith_level_kind
  42. parse_device_list
  43. stonith_level_register
  44. stonith_level_remove
  45. stonith_device_action
  46. search_devices_record_result
  47. localhost_is_eligible
  48. can_fence_host_with_device
  49. search_devices
  50. get_capable_devices
  51. add_action_specific_attributes
  52. add_disallowed
  53. add_action_reply
  54. stonith_query_capable_device_cb
  55. stonith_query
  56. log_operation
  57. stonith_send_async_reply
  58. cancel_stonith_command
  59. st_child_done
  60. sort_device_priority
  61. stonith_fence_get_devices_cb
  62. stonith_fence
  63. stonith_construct_reply
  64. stonith_construct_async_reply
  65. fencing_peer_active
  66. check_alternate_host
  67. stonith_send_reply
  68. remove_relay_op
  69. handle_request
  70. handle_reply
  71. stonith_command

   1 /*
   2  * Copyright 2009-2020 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 #include <sys/stat.h>
  17 #include <unistd.h>
  18 #include <sys/utsname.h>
  19 
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <ctype.h>
  24 
  25 #include <crm/crm.h>
  26 #include <crm/msg_xml.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/cluster/internal.h>
  30 #include <crm/common/mainloop.h>
  31 
  32 #include <crm/stonith-ng.h>
  33 #include <crm/fencing/internal.h>
  34 #include <crm/common/xml.h>
  35 
  36 #include <pacemaker-fenced.h>
  37 
  38 GHashTable *device_list = NULL;
  39 GHashTable *topology = NULL;
  40 GList *cmd_list = NULL;
  41 
  42 struct device_search_s {
  43     /* target of fence action */
  44     char *host;
  45     /* requested fence action */
  46     char *action;
  47     /* timeout to use if a device is queried dynamically for possible targets */
  48     int per_device_timeout;
  49     /* number of registered fencing devices at time of request */
  50     int replies_needed;
  51     /* number of device replies received so far */
  52     int replies_received;
  53     /* whether the target is eligible to perform requested action (or off) */
  54     bool allow_suicide;
  55 
  56     /* private data to pass to search callback function */
  57     void *user_data;
  58     /* function to call when all replies have been received */
  59     void (*callback) (GList * devices, void *user_data);
  60     /* devices capable of performing requested action (or off if remapping) */
  61     GListPtr capable;
  62 };
  63 
  64 static gboolean stonith_device_dispatch(gpointer user_data);
  65 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
  66 static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
  67                                const char *client_id);
  68 
  69 static void search_devices_record_result(struct device_search_s *search, const char *device,
  70                                          gboolean can_fence);
  71 
  72 typedef struct async_command_s {
  73 
  74     int id;
  75     int pid;
  76     int fd_stdout;
  77     int options;
  78     int default_timeout; /* seconds */
  79     int timeout; /* seconds */
  80 
  81     int start_delay; /* seconds */
  82     int delay_id;
  83 
  84     char *op;
  85     char *origin;
  86     char *client;
  87     char *client_name;
  88     char *remote_op_id;
  89 
  90     char *victim;
  91     uint32_t victim_nodeid;
  92     char *action;
  93     char *device;
  94     char *mode;
  95 
  96     GListPtr device_list;
  97     GListPtr device_next;
  98 
  99     void *internal_user_data;
 100     void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data);
 101     guint timer_sigterm;
 102     guint timer_sigkill;
 103     /*! If the operation timed out, this is the last signal
 104      *  we sent to the process to get it to terminate */
 105     int last_timeout_signo;
 106 
 107     stonith_device_t *active_on;
 108     stonith_device_t *activating_on;
 109 } async_command_t;
 110 
 111 static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
 112                                               xmlNode * data, int rc);
 113 
 114 static gboolean
 115 is_action_required(const char *action, stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117     return device && device->automatic_unfencing && pcmk__str_eq(action, "on",
 118                                                                  pcmk__str_casei);
 119 }
 120 
 121 static int
 122 get_action_delay_max(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 123 {
 124     const char *value = NULL;
 125     int delay_max = 0;
 126 
 127     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 128         return 0;
 129     }
 130 
 131     value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
 132     if (value) {
 133        delay_max = crm_parse_interval_spec(value) / 1000;
 134     }
 135 
 136     return delay_max;
 137 }
 138 
 139 static int
 140 get_action_delay_base(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142     const char *value = NULL;
 143     int delay_base = 0;
 144 
 145     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 146         return 0;
 147     }
 148 
 149     value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
 150     if (value) {
 151        delay_base = crm_parse_interval_spec(value) / 1000;
 152     }
 153 
 154     return delay_base;
 155 }
 156 
 157 /*!
 158  * \internal
 159  * \brief Override STONITH timeout with pcmk_*_timeout if available
 160  *
 161  * \param[in] device           STONITH device to use
 162  * \param[in] action           STONITH action name
 163  * \param[in] default_timeout  Timeout to use if device does not have
 164  *                             a pcmk_*_timeout parameter for action
 165  *
 166  * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
 167  * \note For consistency, it would be nice if reboot/off/on timeouts could be
 168  *       set the same way as start/stop/monitor timeouts, i.e. with an
 169  *       <operation> entry in the fencing resource configuration. However that
 170  *       is insufficient because fencing devices may be registered directly via
 171  *       the fencer's register_device() API instead of going through the CIB
 172  *       (e.g. stonith_admin uses it for its -R option, and the executor uses it
 173  *       to ensure a device is registered when a command is issued). As device
 174  *       properties, pcmk_*_timeout parameters can be grabbed by the fencer when
 175  *       the device is registered, whether by CIB change or API call.
 176  */
 177 static int
 178 get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 179 {
 180     if (action && device && device->params) {
 181         char buffer[64] = { 0, };
 182         const char *value = NULL;
 183 
 184         /* If "reboot" was requested but the device does not support it,
 185          * we will remap to "off", so check timeout for "off" instead
 186          */
 187         if (pcmk__str_eq(action, "reboot", pcmk__str_casei)
 188             && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 189             crm_trace("%s doesn't support reboot, using timeout for off instead",
 190                       device->id);
 191             action = "off";
 192         }
 193 
 194         /* If the device config specified an action-specific timeout, use it */
 195         snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
 196         value = g_hash_table_lookup(device->params, buffer);
 197         if (value) {
 198             return atoi(value);
 199         }
 200     }
 201     return default_timeout;
 202 }
 203 
 204 static void
 205 free_async_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207     if (!cmd) {
 208         return;
 209     }
 210 
 211     if (cmd->delay_id) {
 212         g_source_remove(cmd->delay_id);
 213     }
 214 
 215     cmd_list = g_list_remove(cmd_list, cmd);
 216 
 217     g_list_free_full(cmd->device_list, free);
 218     free(cmd->device);
 219     free(cmd->action);
 220     free(cmd->victim);
 221     free(cmd->remote_op_id);
 222     free(cmd->client);
 223     free(cmd->client_name);
 224     free(cmd->origin);
 225     free(cmd->mode);
 226     free(cmd->op);
 227     free(cmd);
 228 }
 229 
 230 static async_command_t *
 231 create_async_command(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233     async_command_t *cmd = NULL;
 234     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
 235     const char *action = crm_element_value(op, F_STONITH_ACTION);
 236 
 237     CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
 238 
 239     crm_log_xml_trace(msg, "Command");
 240     cmd = calloc(1, sizeof(async_command_t));
 241     crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
 242     crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
 243     crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
 244     cmd->timeout = cmd->default_timeout;
 245     // Value -1 means disable any static/random fencing delays
 246     crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
 247 
 248     cmd->origin = crm_element_value_copy(msg, F_ORIG);
 249     cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
 250     cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
 251     cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
 252     cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
 253     cmd->action = strdup(action);
 254     cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
 255     cmd->mode = crm_element_value_copy(op, F_STONITH_MODE);
 256     cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
 257 
 258     CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
 259     CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
 260 
 261     cmd->done_cb = st_child_done;
 262     cmd_list = g_list_append(cmd_list, cmd);
 263     return cmd;
 264 }
 265 
 266 static int
 267 get_action_limit(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 268 {
 269     const char *value = NULL;
 270     int action_limit = 1;
 271 
 272     value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT);
 273     if (value) {
 274        action_limit = crm_parse_int(value, "1");
 275        if (action_limit == 0) {
 276            /* pcmk_action_limit should not be 0. Enforce it to be 1. */
 277            action_limit = 1;
 278        }
 279     }
 280 
 281     return action_limit;
 282 }
 283 
 284 static int
 285 get_active_cmds(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287     int counter = 0;
 288     GListPtr gIter = NULL;
 289     GListPtr gIterNext = NULL;
 290 
 291     CRM_CHECK(device != NULL, return 0);
 292 
 293     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
 294         async_command_t *cmd = gIter->data;
 295 
 296         gIterNext = gIter->next;
 297 
 298         if (cmd->active_on == device) {
 299             counter++;
 300         }
 301     }
 302 
 303     return counter;
 304 }
 305 
 306 static void
 307 fork_cb(GPid pid, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 308 {
 309     async_command_t *cmd = (async_command_t *) user_data;
 310     stonith_device_t * device =
 311         /* in case of a retry we've done the move from
 312            activating_on to active_on already
 313          */
 314         cmd->activating_on?cmd->activating_on:cmd->active_on;
 315 
 316     CRM_ASSERT(device);
 317     crm_debug("Operation '%s'%s%s on %s now running with pid=%d, timeout=%ds",
 318                   cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 319                   device->id, pid, cmd->timeout);
 320     cmd->active_on = device;
 321     cmd->activating_on = NULL;
 322 }
 323 
 324 static gboolean
 325 stonith_device_execute(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 326 {
 327     int exec_rc = 0;
 328     const char *action_str = NULL;
 329     const char *host_arg = NULL;
 330     async_command_t *cmd = NULL;
 331     stonith_action_t *action = NULL;
 332     int active_cmds = 0;
 333     int action_limit = 0;
 334     GListPtr gIter = NULL;
 335     GListPtr gIterNext = NULL;
 336 
 337     CRM_CHECK(device != NULL, return FALSE);
 338 
 339     active_cmds = get_active_cmds(device);
 340     action_limit = get_action_limit(device);
 341     if (action_limit > -1 && active_cmds >= action_limit) {
 342         crm_trace("%s is over its action limit of %d (%u active action%s)",
 343                   device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : "");
 344         return TRUE;
 345     }
 346 
 347     for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
 348         async_command_t *pending_op = gIter->data;
 349 
 350         gIterNext = gIter->next;
 351 
 352         if (pending_op && pending_op->delay_id) {
 353             crm_trace
 354                 ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
 355                  pending_op->action, pending_op->victim ? " targeting " : "",
 356                  pending_op->victim ? pending_op->victim : "",
 357                  device->id, pending_op->start_delay);
 358             continue;
 359         }
 360 
 361         device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
 362         g_list_free_1(gIter);
 363 
 364         cmd = pending_op;
 365         break;
 366     }
 367 
 368     if (cmd == NULL) {
 369         crm_trace("Nothing further to do for %s for now", device->id);
 370         return TRUE;
 371     }
 372 
 373     if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
 374         if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
 375             pcmk__panic(__func__);
 376             goto done;
 377 
 378         } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
 379             pcmk__panic(__func__);
 380             goto done;
 381 
 382         } else {
 383             crm_info("Faking success for %s watchdog operation", cmd->action);
 384             cmd->done_cb(0, 0, NULL, cmd);
 385             goto done;
 386         }
 387     }
 388 
 389 #if SUPPORT_CIBSECRETS
 390     if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) {
 391         /* replacing secrets failed! */
 392         if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 393             /* don't fail on stop! */
 394             crm_info("proceeding with the stop operation for %s", device->id);
 395 
 396         } else {
 397             crm_err("failed to get secrets for %s, "
 398                     "considering resource not configured", device->id);
 399             exec_rc = PCMK_OCF_NOT_CONFIGURED;
 400             cmd->done_cb(0, exec_rc, NULL, cmd);
 401             goto done;
 402         }
 403     }
 404 #endif
 405 
 406     action_str = cmd->action;
 407     if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)
 408         && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 409 
 410         crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
 411         action_str = "off";
 412     }
 413 
 414     if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
 415         host_arg = "port";
 416 
 417     } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
 418         host_arg = "plug";
 419     }
 420 
 421     action = stonith_action_create(device->agent,
 422                                    action_str,
 423                                    cmd->victim,
 424                                    cmd->victim_nodeid,
 425                                    cmd->timeout, device->params,
 426                                    device->aliases, host_arg);
 427 
 428     /* for async exec, exec_rc is negative for early error exit
 429        otherwise handling of success/errors is done via callbacks */
 430     cmd->activating_on = device;
 431     exec_rc = stonith_action_execute_async(action, (void *)cmd,
 432                                            cmd->done_cb, fork_cb);
 433 
 434     if (exec_rc < 0) {
 435         crm_warn("Operation '%s'%s%s on %s failed: %s (%d)",
 436                  cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 437                  device->id, pcmk_strerror(exec_rc), exec_rc);
 438         cmd->activating_on = NULL;
 439         cmd->done_cb(0, exec_rc, NULL, cmd);
 440     }
 441 
 442 done:
 443     /* Device might get triggered to work by multiple fencing commands
 444      * simultaneously. Trigger the device again to make sure any
 445      * remaining concurrent commands get executed. */
 446     if (device->pending_ops) {
 447         mainloop_set_trigger(device->work);
 448     }
 449     return TRUE;
 450 }
 451 
 452 static gboolean
 453 stonith_device_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 454 {
 455     return stonith_device_execute(user_data);
 456 }
 457 
 458 static gboolean
 459 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461     async_command_t *cmd = data;
 462     stonith_device_t *device = NULL;
 463 
 464     cmd->delay_id = 0;
 465     device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
 466 
 467     if (device) {
 468         mainloop_set_trigger(device->work);
 469     }
 470 
 471     return FALSE;
 472 }
 473 
 474 static void
 475 schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 476 {
 477     int delay_max = 0;
 478     int delay_base = 0;
 479     int requested_delay = cmd->start_delay;
 480 
 481     CRM_CHECK(cmd != NULL, return);
 482     CRM_CHECK(device != NULL, return);
 483 
 484     if (cmd->device) {
 485         free(cmd->device);
 486     }
 487 
 488     if (device->include_nodeid && cmd->victim) {
 489         crm_node_t *node = crm_get_peer(0, cmd->victim);
 490 
 491         cmd->victim_nodeid = node->id;
 492     }
 493 
 494     cmd->device = strdup(device->id);
 495     cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
 496 
 497     if (cmd->remote_op_id) {
 498         crm_debug("Scheduling '%s' action%s%s on %s for remote peer %s with op id (%s) (timeout=%ds)",
 499                   cmd->action,
 500                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 501                   device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
 502     } else {
 503         crm_debug("Scheduling '%s' action%s%s on %s for %s (timeout=%ds)",
 504                   cmd->action,
 505                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 506                   device->id, cmd->client, cmd->timeout);
 507     }
 508 
 509     device->pending_ops = g_list_append(device->pending_ops, cmd);
 510     mainloop_set_trigger(device->work);
 511 
 512     // Value -1 means disable any static/random fencing delays
 513     if (requested_delay < 0) {
 514         return;
 515     }
 516 
 517     delay_max = get_action_delay_max(device, cmd->action);
 518     delay_base = get_action_delay_base(device, cmd->action);
 519     if (delay_max == 0) {
 520         delay_max = delay_base;
 521     }
 522     if (delay_max < delay_base) {
 523         crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
 524                  "for %s on %s - limiting to max-delay",
 525                  delay_base, delay_max, cmd->action, device->id);
 526         delay_base = delay_max;
 527     }
 528     if (delay_max > 0) {
 529         // coverity[dont_call] We're not using rand() for security
 530         cmd->start_delay +=
 531             ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
 532             + delay_base;
 533     }
 534 
 535     if (cmd->start_delay > 0) {
 536         crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, "
 537                    "requested_delay=%ds, base=%ds, max=%ds)",
 538                    cmd->action,
 539                    cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 540                    device->id, cmd->start_delay, cmd->timeout,
 541                    requested_delay, delay_base, delay_max);
 542         cmd->delay_id =
 543             g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
 544     }
 545 }
 546 
 547 static void
 548 free_device(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 549 {
 550     GListPtr gIter = NULL;
 551     stonith_device_t *device = data;
 552 
 553     g_hash_table_destroy(device->params);
 554     g_hash_table_destroy(device->aliases);
 555 
 556     for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
 557         async_command_t *cmd = gIter->data;
 558 
 559         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
 560         cmd->done_cb(0, -ENODEV, NULL, cmd);
 561     }
 562     g_list_free(device->pending_ops);
 563 
 564     g_list_free_full(device->targets, free);
 565 
 566     mainloop_destroy_trigger(device->work);
 567 
 568     free_xml(device->agent_metadata);
 569     free(device->namespace);
 570     free(device->on_target_actions);
 571     free(device->agent);
 572     free(device->id);
 573     free(device);
 574 }
 575 
 576 void free_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 577 {
 578     if (device_list != NULL) {
 579         g_hash_table_destroy(device_list);
 580         device_list = NULL;
 581     }
 582 }
 583 
 584 void
 585 init_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 586 {
 587     if (device_list == NULL) {
 588         device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
 589                                             free_device);
 590     }
 591 }
 592 
 593 static GHashTable *
 594 build_port_aliases(const char *hostmap, GListPtr * targets)
     /* [previous][next][first][last][top][bottom][index][help] */
 595 {
 596     char *name = NULL;
 597     int last = 0, lpc = 0, max = 0, added = 0;
 598     GHashTable *aliases = crm_strcase_table_new();
 599 
 600     if (hostmap == NULL) {
 601         return aliases;
 602     }
 603 
 604     max = strlen(hostmap);
 605     for (; lpc <= max; lpc++) {
 606         switch (hostmap[lpc]) {
 607                 /* Assignment chars */
 608             case '=':
 609             case ':':
 610                 if (lpc > last) {
 611                     free(name);
 612                     name = calloc(1, 1 + lpc - last);
 613                     memcpy(name, hostmap + last, lpc - last);
 614                 }
 615                 last = lpc + 1;
 616                 break;
 617 
 618                 /* Delimeter chars */
 619                 /* case ',': Potentially used to specify multiple ports */
 620             case 0:
 621             case ';':
 622             case ' ':
 623             case '\t':
 624                 if (name) {
 625                     char *value = NULL;
 626 
 627                     value = calloc(1, 1 + lpc - last);
 628                     memcpy(value, hostmap + last, lpc - last);
 629 
 630                     crm_debug("Adding alias '%s'='%s'", name, value);
 631                     g_hash_table_replace(aliases, name, value);
 632                     if (targets) {
 633                         *targets = g_list_append(*targets, strdup(value));
 634                     }
 635                     value = NULL;
 636                     name = NULL;
 637                     added++;
 638 
 639                 } else if (lpc > last) {
 640                     crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
 641                 }
 642 
 643                 last = lpc + 1;
 644                 break;
 645         }
 646 
 647         if (hostmap[lpc] == 0) {
 648             break;
 649         }
 650     }
 651 
 652     if (added == 0) {
 653         crm_info("No host mappings detected in '%s'", hostmap);
 654     }
 655 
 656     free(name);
 657     return aliases;
 658 }
 659 
 660 GHashTable *metadata_cache = NULL;
 661 
 662 void
 663 free_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 664     if (metadata_cache != NULL) {
 665         g_hash_table_destroy(metadata_cache);
 666         metadata_cache = NULL;
 667     }
 668 }
 669 
 670 static void
 671 init_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 672     if (metadata_cache == NULL) {
 673         metadata_cache = crm_str_table_new();
 674     }
 675 }
 676 
 677 static xmlNode *
 678 get_agent_metadata(const char *agent)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680     xmlNode *xml = NULL;
 681     char *buffer = NULL;
 682 
 683     init_metadata_cache();
 684     buffer = g_hash_table_lookup(metadata_cache, agent);
 685     if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
 686         return NULL;
 687 
 688     } else if(buffer == NULL) {
 689         stonith_t *st = stonith_api_new();
 690         int rc;
 691 
 692         if (st == NULL) {
 693             crm_warn("Could not get agent meta-data: "
 694                      "API memory allocation failed");
 695             return NULL;
 696         }
 697         rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10);
 698         stonith_api_delete(st);
 699         if (rc || !buffer) {
 700             crm_err("Could not retrieve metadata for fencing agent %s", agent);
 701             return NULL;
 702         }
 703         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
 704     }
 705 
 706     xml = string2xml(buffer);
 707 
 708     return xml;
 709 }
 710 
 711 static gboolean
 712 is_nodeid_required(xmlNode * xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 713 {
 714     xmlXPathObjectPtr xpath = NULL;
 715 
 716     if (stand_alone) {
 717         return FALSE;
 718     }
 719 
 720     if (!xml) {
 721         return FALSE;
 722     }
 723 
 724     xpath = xpath_search(xml, "//parameter[@name='nodeid']");
 725     if (numXpathResults(xpath)  <= 0) {
 726         freeXpathObject(xpath);
 727         return FALSE;
 728     }
 729 
 730     freeXpathObject(xpath);
 731     return TRUE;
 732 }
 733 
 734 #define MAX_ACTION_LEN 256
 735 
 736 static char *
 737 add_action(char *actions, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 738 {
 739     int offset = 0;
 740 
 741     if (actions == NULL) {
 742         actions = calloc(1, MAX_ACTION_LEN);
 743     } else {
 744         offset = strlen(actions);
 745     }
 746 
 747     if (offset > 0) {
 748         offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
 749     }
 750     offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);
 751 
 752     return actions;
 753 }
 754 
 755 static void
 756 read_action_metadata(stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758     xmlXPathObjectPtr xpath = NULL;
 759     int max = 0;
 760     int lpc = 0;
 761 
 762     if (device->agent_metadata == NULL) {
 763         return;
 764     }
 765 
 766     xpath = xpath_search(device->agent_metadata, "//action");
 767     max = numXpathResults(xpath);
 768 
 769     if (max <= 0) {
 770         freeXpathObject(xpath);
 771         return;
 772     }
 773 
 774     for (lpc = 0; lpc < max; lpc++) {
 775         const char *on_target = NULL;
 776         const char *action = NULL;
 777         xmlNode *match = getXpathResult(xpath, lpc);
 778 
 779         CRM_LOG_ASSERT(match != NULL);
 780         if(match == NULL) { continue; };
 781 
 782         on_target = crm_element_value(match, "on_target");
 783         action = crm_element_value(match, "name");
 784 
 785         if(pcmk__str_eq(action, "list", pcmk__str_casei)) {
 786             stonith__set_device_flags(device->flags, device->id,
 787                                       st_device_supports_list);
 788         } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) {
 789             stonith__set_device_flags(device->flags, device->id,
 790                                       st_device_supports_status);
 791         } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) {
 792             stonith__set_device_flags(device->flags, device->id,
 793                                       st_device_supports_reboot);
 794         } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) {
 795             /* "automatic" means the cluster will unfence node when it joins */
 796             const char *automatic = crm_element_value(match, "automatic");
 797 
 798             /* "required" is a deprecated synonym for "automatic" */
 799             const char *required = crm_element_value(match, "required");
 800 
 801             if (crm_is_true(automatic) || crm_is_true(required)) {
 802                 device->automatic_unfencing = TRUE;
 803             }
 804         }
 805 
 806         if (action && crm_is_true(on_target)) {
 807             device->on_target_actions = add_action(device->on_target_actions, action);
 808         }
 809     }
 810 
 811     freeXpathObject(xpath);
 812 }
 813 
 814 /*!
 815  * \internal
 816  * \brief Set a pcmk_*_action parameter if not already set
 817  *
 818  * \param[in,out] params  Device parameters
 819  * \param[in]     action  Name of action
 820  * \param[in]     value   Value to use if action is not already set
 821  */
 822 static void
 823 map_action(GHashTable *params, const char *action, const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
 824 {
 825     char *key = crm_strdup_printf("pcmk_%s_action", action);
 826 
 827     if (g_hash_table_lookup(params, key)) {
 828         crm_warn("Ignoring %s='%s', see %s instead",
 829                  STONITH_ATTR_ACTION_OP, value, key);
 830         free(key);
 831     } else {
 832         crm_warn("Mapping %s='%s' to %s='%s'",
 833                  STONITH_ATTR_ACTION_OP, value, key, value);
 834         g_hash_table_insert(params, key, strdup(value));
 835     }
 836 }
 837 
 838 /*!
 839  * \internal
 840  * \brief Create device parameter table from XML
 841  *
 842  * \param[in]     name    Device name (used for logging only)
 843  * \param[in,out] params  Device parameters
 844  */
 845 static GHashTable *
 846 xml2device_params(const char *name, xmlNode *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 847 {
 848     GHashTable *params = xml2list(dev);
 849     const char *value;
 850 
 851     /* Action should never be specified in the device configuration,
 852      * but we support it for users who are familiar with other software
 853      * that worked that way.
 854      */
 855     value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
 856     if (value != NULL) {
 857         crm_warn("%s has '%s' parameter, which should never be specified in configuration",
 858                  name, STONITH_ATTR_ACTION_OP);
 859 
 860         if (*value == '\0') {
 861             crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
 862 
 863         } else if (strcmp(value, "reboot") == 0) {
 864             crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
 865                      STONITH_ATTR_ACTION_OP);
 866 
 867         } else if (strcmp(value, "off") == 0) {
 868             map_action(params, "reboot", value);
 869 
 870         } else {
 871             map_action(params, "off", value);
 872             map_action(params, "reboot", value);
 873         }
 874 
 875         g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
 876     }
 877 
 878     return params;
 879 }
 880 
 881 static stonith_device_t *
 882 build_device_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 883 {
 884     const char *value;
 885     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
 886     stonith_device_t *device = NULL;
 887     char *agent = crm_element_value_copy(dev, "agent");
 888 
 889     CRM_CHECK(agent != NULL, return device);
 890 
 891     device = calloc(1, sizeof(stonith_device_t));
 892 
 893     CRM_CHECK(device != NULL, {free(agent); return device;});
 894 
 895     device->id = crm_element_value_copy(dev, XML_ATTR_ID);
 896     device->agent = agent;
 897     device->namespace = crm_element_value_copy(dev, "namespace");
 898     device->params = xml2device_params(device->id, dev);
 899 
 900     value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST);
 901     if (value) {
 902         device->targets = stonith__parse_targets(value);
 903     }
 904 
 905     value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP);
 906     device->aliases = build_port_aliases(value, &(device->targets));
 907 
 908     device->agent_metadata = get_agent_metadata(device->agent);
 909     if (device->agent_metadata) {
 910         read_action_metadata(device);
 911         stonith__device_parameter_flags(&(device->flags), device->id,
 912                                         device->agent_metadata);
 913     }
 914 
 915     value = g_hash_table_lookup(device->params, "nodeid");
 916     if (!value) {
 917         device->include_nodeid = is_nodeid_required(device->agent_metadata);
 918     }
 919 
 920     value = crm_element_value(dev, "rsc_provides");
 921     if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) {
 922         device->automatic_unfencing = TRUE;
 923     }
 924 
 925     if (is_action_required("on", device)) {
 926         crm_info("The fencing device '%s' requires unfencing", device->id);
 927     }
 928 
 929     if (device->on_target_actions) {
 930         crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node",
 931                  device->id, device->on_target_actions);
 932     }
 933 
 934     device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
 935     /* TODO: Hook up priority */
 936 
 937     return device;
 938 }
 939 
 940 static const char *
 941 target_list_type(stonith_device_t * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 942 {
 943     const char *check_type = NULL;
 944 
 945     check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK);
 946 
 947     if (check_type == NULL) {
 948 
 949         if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) {
 950             check_type = "static-list";
 951         } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) {
 952             check_type = "static-list";
 953         } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
 954             check_type = "dynamic-list";
 955         } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
 956             check_type = "status";
 957         } else {
 958             check_type = "none";
 959         }
 960     }
 961 
 962     return check_type;
 963 }
 964 
 965 static void
 966 schedule_internal_command(const char *origin,
     /* [previous][next][first][last][top][bottom][index][help] */
 967                           stonith_device_t * device,
 968                           const char *action,
 969                           const char *victim,
 970                           int timeout,
 971                           void *internal_user_data,
 972                           void (*done_cb) (GPid pid, int rc, const char *output,
 973                                            gpointer user_data))
 974 {
 975     async_command_t *cmd = NULL;
 976 
 977     cmd = calloc(1, sizeof(async_command_t));
 978 
 979     cmd->id = -1;
 980     cmd->default_timeout = timeout ? timeout : 60;
 981     cmd->timeout = cmd->default_timeout;
 982     cmd->action = strdup(action);
 983     cmd->victim = victim ? strdup(victim) : NULL;
 984     cmd->device = strdup(device->id);
 985     cmd->origin = strdup(origin);
 986     cmd->client = strdup(crm_system_name);
 987     cmd->client_name = strdup(crm_system_name);
 988 
 989     cmd->internal_user_data = internal_user_data;
 990     cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
 991 
 992     schedule_stonith_command(cmd, device);
 993 }
 994 
 995 gboolean
 996 string_in_list(GListPtr list, const char *item)
     /* [previous][next][first][last][top][bottom][index][help] */
 997 {
 998     int lpc = 0;
 999     int max = g_list_length(list);
1000 
1001     for (lpc = 0; lpc < max; lpc++) {
1002         const char *value = g_list_nth_data(list, lpc);
1003 
1004         if (pcmk__str_eq(item, value, pcmk__str_casei)) {
1005             return TRUE;
1006         } else {
1007             crm_trace("%d: '%s' != '%s'", lpc, item, value);
1008         }
1009     }
1010     return FALSE;
1011 }
1012 
1013 static void
1014 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1015 {
1016     async_command_t *cmd = user_data;
1017     struct device_search_s *search = cmd->internal_user_data;
1018     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1019     gboolean can = FALSE;
1020 
1021     free_async_command(cmd);
1022 
1023     if (!dev) {
1024         search_devices_record_result(search, NULL, FALSE);
1025         return;
1026     }
1027 
1028     mainloop_set_trigger(dev->work);
1029 
1030     if (rc == 1 /* unknown */ ) {
1031         crm_trace("Host %s is not known by %s", search->host, dev->id);
1032 
1033     } else if (rc == 0 /* active */  || rc == 2 /* inactive */ ) {
1034         crm_trace("Host %s is known by %s", search->host, dev->id);
1035         can = TRUE;
1036 
1037     } else {
1038         crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
1039                    rc);
1040     }
1041     search_devices_record_result(search, dev->id, can);
1042 }
1043 
1044 static void
1045 dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1046 {
1047     async_command_t *cmd = user_data;
1048     struct device_search_s *search = cmd->internal_user_data;
1049     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1050     gboolean can_fence = FALSE;
1051 
1052     free_async_command(cmd);
1053 
1054     /* Host/alias must be in the list output to be eligible to be fenced
1055      *
1056      * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1057      *  if the guest is still listed despite being moved to another machine
1058      */
1059     if (!dev) {
1060         search_devices_record_result(search, NULL, FALSE);
1061         return;
1062     }
1063 
1064     mainloop_set_trigger(dev->work);
1065 
1066     /* If we successfully got the targets earlier, don't disable. */
1067     if (rc != 0 && !dev->targets) {
1068         crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output);
1069         /* Fall back to status */
1070         g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status"));
1071 
1072         g_list_free_full(dev->targets, free);
1073         dev->targets = NULL;
1074     } else if (!rc) {
1075         crm_info("Refreshing port list for %s", dev->id);
1076         g_list_free_full(dev->targets, free);
1077         dev->targets = stonith__parse_targets(output);
1078         dev->targets_age = time(NULL);
1079     }
1080 
1081     if (dev->targets) {
1082         const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1083 
1084         if (!alias) {
1085             alias = search->host;
1086         }
1087         if (string_in_list(dev->targets, alias)) {
1088             can_fence = TRUE;
1089         }
1090     }
1091     search_devices_record_result(search, dev->id, can_fence);
1092 }
1093 
1094 /*!
1095  * \internal
1096  * \brief Returns true if any key in first is not in second or second has a different value for key
1097  */
1098 static int
1099 device_params_diff(GHashTable *first, GHashTable *second) {
     /* [previous][next][first][last][top][bottom][index][help] */
1100     char *key = NULL;
1101     char *value = NULL;
1102     GHashTableIter gIter;
1103 
1104     g_hash_table_iter_init(&gIter, first);
1105     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1106 
1107         if(strstr(key, "CRM_meta") == key) {
1108             continue;
1109         } else if(strcmp(key, "crm_feature_set") == 0) {
1110             continue;
1111         } else {
1112             char *other_value = g_hash_table_lookup(second, key);
1113 
1114             if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1115                 crm_trace("Different value for %s: %s != %s", key, other_value, value);
1116                 return 1;
1117             }
1118         }
1119     }
1120 
1121     return 0;
1122 }
1123 
1124 /*!
1125  * \internal
1126  * \brief Checks to see if an identical device already exists in the device_list
1127  */
1128 static stonith_device_t *
1129 device_has_duplicate(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
1130 {
1131     stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
1132 
1133     if (!dup) {
1134         crm_trace("No match for %s", device->id);
1135         return NULL;
1136 
1137     } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1138         crm_trace("Different agent: %s != %s", dup->agent, device->agent);
1139         return NULL;
1140     }
1141 
1142     /* Use calculate_operation_digest() here? */
1143     if (device_params_diff(device->params, dup->params) ||
1144         device_params_diff(dup->params, device->params)) {
1145         return NULL;
1146     }
1147 
1148     crm_trace("Match");
1149     return dup;
1150 }
1151 
1152 int
1153 stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1154 {
1155     stonith_device_t *dup = NULL;
1156     stonith_device_t *device = build_device_from_xml(msg);
1157 
1158     CRM_CHECK(device != NULL, return -ENOMEM);
1159 
1160     dup = device_has_duplicate(device);
1161     if (dup) {
1162         crm_debug("Device '%s' already existed in device list (%d active devices)", device->id,
1163                    g_hash_table_size(device_list));
1164         free_device(device);
1165         device = dup;
1166 
1167     } else {
1168         stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
1169 
1170         if (from_cib && old && old->api_registered) {
1171             /* If the cib is writing over an entry that is shared with a stonith client,
1172              * copy any pending ops that currently exist on the old entry to the new one.
1173              * Otherwise the pending ops will be reported as failures
1174              */
1175             crm_info("Overwriting an existing entry for %s from the cib", device->id);
1176             device->pending_ops = old->pending_ops;
1177             device->api_registered = TRUE;
1178             old->pending_ops = NULL;
1179             if (device->pending_ops) {
1180                 mainloop_set_trigger(device->work);
1181             }
1182         }
1183         g_hash_table_replace(device_list, device->id, device);
1184 
1185         crm_notice("Added '%s' to the device list (%d active devices)", device->id,
1186                    g_hash_table_size(device_list));
1187     }
1188     if (desc) {
1189         *desc = device->id;
1190     }
1191 
1192     if (from_cib) {
1193         device->cib_registered = TRUE;
1194     } else {
1195         device->api_registered = TRUE;
1196     }
1197 
1198     return pcmk_ok;
1199 }
1200 
1201 int
1202 stonith_device_remove(const char *id, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1203 {
1204     stonith_device_t *device = g_hash_table_lookup(device_list, id);
1205 
1206     if (!device) {
1207         crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list));
1208         return pcmk_ok;
1209     }
1210 
1211     if (from_cib) {
1212         device->cib_registered = FALSE;
1213     } else {
1214         device->verified = FALSE;
1215         device->api_registered = FALSE;
1216     }
1217 
1218     if (!device->cib_registered && !device->api_registered) {
1219         g_hash_table_remove(device_list, id);
1220         crm_info("Removed '%s' from the device list (%d active devices)",
1221                  id, g_hash_table_size(device_list));
1222     } else {
1223         crm_trace("Not removing '%s' from the device list (%d active devices) "
1224                   "- still %s%s_registered", id, g_hash_table_size(device_list),
1225                   device->cib_registered?"cib":"", device->api_registered?"api":"");
1226     }
1227     return pcmk_ok;
1228 }
1229 
1230 /*!
1231  * \internal
1232  * \brief Return the number of stonith levels registered for a node
1233  *
1234  * \param[in] tp  Node's topology table entry
1235  *
1236  * \return Number of non-NULL levels in topology entry
1237  * \note This function is used only for log messages.
1238  */
1239 static int
1240 count_active_levels(stonith_topology_t * tp)
     /* [previous][next][first][last][top][bottom][index][help] */
1241 {
1242     int lpc = 0;
1243     int count = 0;
1244 
1245     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1246         if (tp->levels[lpc] != NULL) {
1247             count++;
1248         }
1249     }
1250     return count;
1251 }
1252 
1253 static void
1254 free_topology_entry(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1255 {
1256     stonith_topology_t *tp = data;
1257 
1258     int lpc = 0;
1259 
1260     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1261         if (tp->levels[lpc] != NULL) {
1262             g_list_free_full(tp->levels[lpc], free);
1263         }
1264     }
1265     free(tp->target);
1266     free(tp->target_value);
1267     free(tp->target_pattern);
1268     free(tp->target_attribute);
1269     free(tp);
1270 }
1271 
1272 void
1273 free_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1274 {
1275     if (topology != NULL) {
1276         g_hash_table_destroy(topology);
1277         topology = NULL;
1278     }
1279 }
1280 
1281 void
1282 init_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1283 {
1284     if (topology == NULL) {
1285         topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL,
1286                                          free_topology_entry);
1287     }
1288 }
1289 
1290 char *stonith_level_key(xmlNode *level, int mode)
     /* [previous][next][first][last][top][bottom][index][help] */
1291 {
1292     if(mode == -1) {
1293         mode = stonith_level_kind(level);
1294     }
1295 
1296     switch(mode) {
1297         case 0:
1298             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
1299         case 1:
1300             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1301         case 2:
1302             {
1303                 const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1304                 const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);
1305 
1306                 if(name && value) {
1307                     return crm_strdup_printf("%s=%s", name, value);
1308                 }
1309             }
1310         default:
1311             return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
1312     }
1313 }
1314 
1315 int stonith_level_kind(xmlNode * level)
     /* [previous][next][first][last][top][bottom][index][help] */
1316 {
1317     int mode = 0;
1318     const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);
1319 
1320     if(target == NULL) {
1321         mode++;
1322         target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
1323     }
1324 
1325     if(stand_alone == FALSE && target == NULL) {
1326 
1327         mode++;
1328 
1329         if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
1330             mode++;
1331 
1332         } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
1333             mode++;
1334         }
1335     }
1336 
1337     return mode;
1338 }
1339 
1340 static stonith_key_value_t *
1341 parse_device_list(const char *devices)
     /* [previous][next][first][last][top][bottom][index][help] */
1342 {
1343     int lpc = 0;
1344     int max = 0;
1345     int last = 0;
1346     stonith_key_value_t *output = NULL;
1347 
1348     if (devices == NULL) {
1349         return output;
1350     }
1351 
1352     max = strlen(devices);
1353     for (lpc = 0; lpc <= max; lpc++) {
1354         if (devices[lpc] == ',' || devices[lpc] == 0) {
1355             char *line = strndup(devices + last, lpc - last);
1356 
1357             output = stonith_key_value_add(output, NULL, line);
1358             free(line);
1359 
1360             last = lpc + 1;
1361         }
1362     }
1363 
1364     return output;
1365 }
1366 
1367 /*!
1368  * \internal
1369  * \brief Register a STONITH level for a target
1370  *
1371  * Given an XML request specifying the target name, level index, and device IDs
1372  * for the level, this will create an entry for the target in the global topology
1373  * table if one does not already exist, then append the specified device IDs to
1374  * the entry's device list for the specified level.
1375  *
1376  * \param[in]  msg   XML request for STONITH level registration
1377  * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
1378  *
1379  * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
1380  */
1381 int
1382 stonith_level_register(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1383 {
1384     int id = 0;
1385     xmlNode *level;
1386     int mode;
1387     char *target;
1388 
1389     stonith_topology_t *tp;
1390     stonith_key_value_t *dIter = NULL;
1391     stonith_key_value_t *devices = NULL;
1392 
1393     /* Allow the XML here to point to the level tag directly, or wrapped in
1394      * another tag. If directly, don't search by xpath, because it might give
1395      * multiple hits (e.g. if the XML is the CIB).
1396      */
1397     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
1398         level = msg;
1399     } else {
1400         level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1401     }
1402     CRM_CHECK(level != NULL, return -EINVAL);
1403 
1404     mode = stonith_level_kind(level);
1405     target = stonith_level_key(level, mode);
1406     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1407 
1408     if (desc) {
1409         *desc = crm_strdup_printf("%s[%d]", target, id);
1410     }
1411 
1412     /* Sanity-check arguments */
1413     if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
1414         crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
1415         free(target);
1416         crm_log_xml_err(level, "Bad topology");
1417         return -EINVAL;
1418     }
1419 
1420     /* Find or create topology table entry */
1421     tp = g_hash_table_lookup(topology, target);
1422     if (tp == NULL) {
1423         tp = calloc(1, sizeof(stonith_topology_t));
1424         tp->kind = mode;
1425         tp->target = target;
1426         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
1427         tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1428         tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1429 
1430         g_hash_table_replace(topology, tp->target, tp);
1431         crm_trace("Added %s (%d) to the topology (%d active entries)",
1432                   target, mode, g_hash_table_size(topology));
1433     } else {
1434         free(target);
1435     }
1436 
1437     if (tp->levels[id] != NULL) {
1438         crm_info("Adding to the existing %s[%d] topology entry",
1439                  tp->target, id);
1440     }
1441 
1442     devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
1443     for (dIter = devices; dIter; dIter = dIter->next) {
1444         const char *device = dIter->value;
1445 
1446         crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
1447         tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
1448     }
1449     stonith_key_value_freeall(devices, 1, 1);
1450 
1451     crm_info("Target %s has %d active fencing levels",
1452              tp->target, count_active_levels(tp));
1453     return pcmk_ok;
1454 }
1455 
1456 int
1457 stonith_level_remove(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1458 {
1459     int id = 0;
1460     stonith_topology_t *tp;
1461     char *target;
1462 
1463     /* Unlike additions, removal requests should always have one level tag */
1464     xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1465 
1466     CRM_CHECK(level != NULL, return -EINVAL);
1467 
1468     target = stonith_level_key(level, -1);
1469     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1470     if (desc) {
1471         *desc = crm_strdup_printf("%s[%d]", target, id);
1472     }
1473 
1474     /* Sanity-check arguments */
1475     if (id >= ST_LEVEL_MAX) {
1476         free(target);
1477         return -EINVAL;
1478     }
1479 
1480     tp = g_hash_table_lookup(topology, target);
1481     if (tp == NULL) {
1482         crm_info("Topology for %s not found (%d active entries)",
1483                  target, g_hash_table_size(topology));
1484 
1485     } else if (id == 0 && g_hash_table_remove(topology, target)) {
1486         crm_info("Removed all %s related entries from the topology (%d active entries)",
1487                  target, g_hash_table_size(topology));
1488 
1489     } else if (id > 0 && tp->levels[id] != NULL) {
1490         g_list_free_full(tp->levels[id], free);
1491         tp->levels[id] = NULL;
1492 
1493         crm_info("Removed level '%d' from topology for %s (%d active levels remaining)",
1494                  id, target, count_active_levels(tp));
1495     }
1496 
1497     free(target);
1498     return pcmk_ok;
1499 }
1500 
1501 /*!
1502  * \internal
1503  * \brief Schedule an (asynchronous) action directly on a stonith device
1504  *
1505  * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1506  * directly on a specified device. Only list, monitor, and status actions are
1507  * expected to use this call, though it should work with any agent command.
1508  *
1509  * \param[in]  msg     API message XML with desired action
1510  * \param[out] output  Unused
1511  *
1512  * \return -EINPROGRESS on success, -errno otherwise
1513  * \note If the action is monitor, the device must be registered via the API
1514  *       (CIB registration is not sufficient), because monitor should not be
1515  *       possible unless the device is "started" (API registered).
1516  */
1517 static int
1518 stonith_device_action(xmlNode * msg, char **output)
     /* [previous][next][first][last][top][bottom][index][help] */
1519 {
1520     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
1521     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
1522     const char *id = crm_element_value(dev, F_STONITH_DEVICE);
1523     const char *action = crm_element_value(op, F_STONITH_ACTION);
1524     async_command_t *cmd = NULL;
1525     stonith_device_t *device = NULL;
1526 
1527     if ((id == NULL) || (action == NULL)) {
1528         crm_info("Malformed API action request: device %s, action %s",
1529                  (id? id : "not specified"),
1530                  (action? action : "not specified"));
1531         return -EPROTO;
1532     }
1533 
1534     device = g_hash_table_lookup(device_list, id);
1535     if ((device == NULL)
1536         || (!device->api_registered && !strcmp(action, "monitor"))) {
1537 
1538         // Monitors may run only on "started" (API-registered) devices
1539         crm_info("Ignoring API '%s' action request because device %s not found",
1540                  action, id);
1541         return -ENODEV;
1542     }
1543 
1544     cmd = create_async_command(msg);
1545     if (cmd == NULL) {
1546         return -EPROTO;
1547     }
1548 
1549     schedule_stonith_command(cmd, device);
1550     return -EINPROGRESS;
1551 }
1552 
1553 static void
1554 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
     /* [previous][next][first][last][top][bottom][index][help] */
1555 {
1556     search->replies_received++;
1557 
1558     if (can_fence && device) {
1559         search->capable = g_list_append(search->capable, strdup(device));
1560     }
1561 
1562     if (search->replies_needed == search->replies_received) {
1563 
1564         crm_debug("Finished Search. %d devices can perform action (%s) on node %s",
1565                   g_list_length(search->capable),
1566                   search->action ? search->action : "<unknown>",
1567                   search->host ? search->host : "<anyone>");
1568 
1569         search->callback(search->capable, search->user_data);
1570         free(search->host);
1571         free(search->action);
1572         free(search);
1573     }
1574 }
1575 
1576 /*!
1577  * \internal
1578  * \brief Check whether the local host is allowed to execute a fencing action
1579  *
1580  * \param[in] device         Fence device to check
1581  * \param[in] action         Fence action to check
1582  * \param[in] target         Hostname of fence target
1583  * \param[in] allow_suicide  Whether self-fencing is allowed for this operation
1584  *
1585  * \return TRUE if local host is allowed to execute action, FALSE otherwise
1586  */
1587 static gboolean
1588 localhost_is_eligible(const stonith_device_t *device, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1589                       const char *target, gboolean allow_suicide)
1590 {
1591     gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
1592                                                 pcmk__str_casei);
1593 
1594     if (device && action && device->on_target_actions
1595         && strstr(device->on_target_actions, action)) {
1596         if (!localhost_is_target) {
1597             crm_trace("'%s' operation with %s can only be executed for localhost not %s",
1598                       action, device->id, target);
1599             return FALSE;
1600         }
1601 
1602     } else if (localhost_is_target && !allow_suicide) {
1603         crm_trace("'%s' operation does not support self-fencing", action);
1604         return FALSE;
1605     }
1606     return TRUE;
1607 }
1608 
1609 static void
1610 can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
     /* [previous][next][first][last][top][bottom][index][help] */
1611 {
1612     gboolean can = FALSE;
1613     const char *check_type = NULL;
1614     const char *host = search->host;
1615     const char *alias = NULL;
1616 
1617     CRM_LOG_ASSERT(dev != NULL);
1618 
1619     if (dev == NULL) {
1620         goto search_report_results;
1621     } else if (host == NULL) {
1622         can = TRUE;
1623         goto search_report_results;
1624     }
1625 
1626     /* Short-circuit query if this host is not allowed to perform the action */
1627     if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) {
1628         /* A "reboot" *might* get remapped to "off" then "on", so short-circuit
1629          * only if all three are disallowed. If only one or two are disallowed,
1630          * we'll report that with the results. We never allow suicide for
1631          * remapped "on" operations because the host is off at that point.
1632          */
1633         if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
1634             && !localhost_is_eligible(dev, "off", host, search->allow_suicide)
1635             && !localhost_is_eligible(dev, "on", host, FALSE)) {
1636             goto search_report_results;
1637         }
1638     } else if (!localhost_is_eligible(dev, search->action, host,
1639                                       search->allow_suicide)) {
1640         goto search_report_results;
1641     }
1642 
1643     alias = g_hash_table_lookup(dev->aliases, host);
1644     if (alias == NULL) {
1645         alias = host;
1646     }
1647 
1648     check_type = target_list_type(dev);
1649 
1650     if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) {
1651         can = TRUE;
1652 
1653     } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
1654 
1655         /* Presence in the hostmap is sufficient
1656          * Only use if all hosts on which the device can be active can always fence all listed hosts
1657          */
1658 
1659         if (string_in_list(dev->targets, host)) {
1660             can = TRUE;
1661         } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)
1662                    && g_hash_table_lookup(dev->aliases, host)) {
1663             can = TRUE;
1664         }
1665 
1666     } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
1667         time_t now = time(NULL);
1668 
1669         if (dev->targets == NULL || dev->targets_age + 60 < now) {
1670             crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1671                       check_type, dev->id, search->host, search->action);
1672 
1673             schedule_internal_command(__func__, dev, "list", NULL,
1674                                       search->per_device_timeout, search, dynamic_list_search_cb);
1675 
1676             /* we'll respond to this search request async in the cb */
1677             return;
1678         }
1679 
1680         if (string_in_list(dev->targets, alias)) {
1681             can = TRUE;
1682         }
1683 
1684     } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
1685         crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1686                   check_type, dev->id, search->host, search->action);
1687         schedule_internal_command(__func__, dev, "status", search->host,
1688                                   search->per_device_timeout, search, status_search_cb);
1689         /* we'll respond to this search request async in the cb */
1690         return;
1691     } else {
1692         crm_err("Invalid value for " STONITH_ATTR_HOSTCHECK ": %s", check_type);
1693         check_type = "Invalid " STONITH_ATTR_HOSTCHECK;
1694     }
1695 
1696     if (pcmk__str_eq(host, alias, pcmk__str_casei)) {
1697         crm_notice("%s is%s eligible to fence (%s) %s: %s",
1698                    dev->id, (can? "" : " not"), search->action, host,
1699                    check_type);
1700     } else {
1701         crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s",
1702                    dev->id, (can? "" : " not"), search->action, host, alias,
1703                    check_type);
1704     }
1705 
1706   search_report_results:
1707     search_devices_record_result(search, dev ? dev->id : NULL, can);
1708 }
1709 
1710 static void
1711 search_devices(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1712 {
1713     stonith_device_t *dev = value;
1714     struct device_search_s *search = user_data;
1715 
1716     can_fence_host_with_device(dev, search);
1717 }
1718 
1719 #define DEFAULT_QUERY_TIMEOUT 20
1720 static void
1721 get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
     /* [previous][next][first][last][top][bottom][index][help] */
1722                     void (*callback) (GList * devices, void *user_data))
1723 {
1724     struct device_search_s *search;
1725     int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1726     int devices_needing_async_query = 0;
1727     char *key = NULL;
1728     const char *check_type = NULL;
1729     GHashTableIter gIter;
1730     stonith_device_t *device = NULL;
1731 
1732     if (!g_hash_table_size(device_list)) {
1733         callback(NULL, user_data);
1734         return;
1735     }
1736 
1737     search = calloc(1, sizeof(struct device_search_s));
1738     if (!search) {
1739         callback(NULL, user_data);
1740         return;
1741     }
1742 
1743     g_hash_table_iter_init(&gIter, device_list);
1744     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) {
1745         check_type = target_list_type(device);
1746         if (pcmk__strcase_any_of(check_type, "status", "dynamic-list", NULL)) {
1747             devices_needing_async_query++;
1748         }
1749     }
1750 
1751     /* If we have devices that require an async event in order to know what
1752      * nodes they can fence, we have to give the events a timeout. The total
1753      * query timeout is divided among those events. */
1754     if (devices_needing_async_query) {
1755         per_device_timeout = timeout / devices_needing_async_query;
1756         if (!per_device_timeout) {
1757             crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds",
1758                     timeout, DEFAULT_QUERY_TIMEOUT,
1759                     DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1760             per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1761         } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
1762             crm_notice("STONITH timeout %ds is low for the current configuration;"
1763                        " consider raising to at least %ds",
1764                        timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1765         }
1766     }
1767 
1768     search->host = host ? strdup(host) : NULL;
1769     search->action = action ? strdup(action) : NULL;
1770     search->per_device_timeout = per_device_timeout;
1771     /* We are guaranteed this many replies. Even if a device gets
1772      * unregistered some how during the async search, we will get
1773      * the correct number of replies. */
1774     search->replies_needed = g_hash_table_size(device_list);
1775     search->allow_suicide = suicide;
1776     search->callback = callback;
1777     search->user_data = user_data;
1778     /* kick off the search */
1779 
1780     crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s",
1781               search->replies_needed,
1782               search->action ? search->action : "<unknown>",
1783               search->host ? search->host : "<anyone>");
1784     g_hash_table_foreach(device_list, search_devices, search);
1785 }
1786 
1787 struct st_query_data {
1788     xmlNode *reply;
1789     char *remote_peer;
1790     char *client_id;
1791     char *target;
1792     char *action;
1793     int call_options;
1794 };
1795 
1796 /*!
1797  * \internal
1798  * \brief Add action-specific attributes to query reply XML
1799  *
1800  * \param[in,out] xml     XML to add attributes to
1801  * \param[in]     action  Fence action
1802  * \param[in]     device  Fence device
1803  */
1804 static void
1805 add_action_specific_attributes(xmlNode *xml, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1806                                stonith_device_t *device)
1807 {
1808     int action_specific_timeout;
1809     int delay_max;
1810     int delay_base;
1811 
1812     CRM_CHECK(xml && action && device, return);
1813 
1814     if (is_action_required(action, device)) {
1815         crm_trace("Action '%s' is required on %s", action, device->id);
1816         crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
1817     }
1818 
1819     action_specific_timeout = get_action_timeout(device, action, 0);
1820     if (action_specific_timeout) {
1821         crm_trace("Action '%s' has timeout %dms on %s",
1822                   action, action_specific_timeout, device->id);
1823         crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
1824     }
1825 
1826     delay_max = get_action_delay_max(device, action);
1827     if (delay_max > 0) {
1828         crm_trace("Action '%s' has maximum random delay %dms on %s",
1829                   action, delay_max, device->id);
1830         crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
1831     }
1832 
1833     delay_base = get_action_delay_base(device, action);
1834     if (delay_base > 0) {
1835         crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
1836     }
1837 
1838     if ((delay_max > 0) && (delay_base == 0)) {
1839         crm_trace("Action '%s' has maximum random delay %dms on %s",
1840                   action, delay_max, device->id);
1841     } else if ((delay_max == 0) && (delay_base > 0)) {
1842         crm_trace("Action '%s' has a static delay of %dms on %s",
1843                   action, delay_base, device->id);
1844     } else if ((delay_max > 0) && (delay_base > 0)) {
1845         crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen "
1846                   "maximum delay of %dms on %s",
1847                   action, delay_base, delay_max, device->id);
1848     }
1849 }
1850 
1851 /*!
1852  * \internal
1853  * \brief Add "disallowed" attribute to query reply XML if appropriate
1854  *
1855  * \param[in,out] xml            XML to add attribute to
1856  * \param[in]     action         Fence action
1857  * \param[in]     device         Fence device
1858  * \param[in]     target         Fence target
1859  * \param[in]     allow_suicide  Whether self-fencing is allowed
1860  */
1861 static void
1862 add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1863                const char *target, gboolean allow_suicide)
1864 {
1865     if (!localhost_is_eligible(device, action, target, allow_suicide)) {
1866         crm_trace("Action '%s' on %s is disallowed for local host",
1867                   action, device->id);
1868         crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
1869     }
1870 }
1871 
1872 /*!
1873  * \internal
1874  * \brief Add child element with action-specific values to query reply XML
1875  *
1876  * \param[in,out] xml            XML to add attribute to
1877  * \param[in]     action         Fence action
1878  * \param[in]     device         Fence device
1879  * \param[in]     target         Fence target
1880  * \param[in]     allow_suicide  Whether self-fencing is allowed
1881  */
1882 static void
1883 add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1884                const char *target, gboolean allow_suicide)
1885 {
1886     xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
1887 
1888     crm_xml_add(child, XML_ATTR_ID, action);
1889     add_action_specific_attributes(child, action, device);
1890     add_disallowed(child, action, device, target, allow_suicide);
1891 }
1892 
1893 static void
1894 stonith_query_capable_device_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1895 {
1896     struct st_query_data *query = user_data;
1897     int available_devices = 0;
1898     xmlNode *dev = NULL;
1899     xmlNode *list = NULL;
1900     GListPtr lpc = NULL;
1901 
1902     /* Pack the results into XML */
1903     list = create_xml_node(NULL, __func__);
1904     crm_xml_add(list, F_STONITH_TARGET, query->target);
1905     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
1906         stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
1907         const char *action = query->action;
1908 
1909         if (!device) {
1910             /* It is possible the device got unregistered while
1911              * determining who can fence the target */
1912             continue;
1913         }
1914 
1915         available_devices++;
1916 
1917         dev = create_xml_node(list, F_STONITH_DEVICE);
1918         crm_xml_add(dev, XML_ATTR_ID, device->id);
1919         crm_xml_add(dev, "namespace", device->namespace);
1920         crm_xml_add(dev, "agent", device->agent);
1921         crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
1922 
1923         /* If the originating fencer wants to reboot the node, and we have a
1924          * capable device that doesn't support "reboot", remap to "off" instead.
1925          */
1926         if (!pcmk_is_set(device->flags, st_device_supports_reboot)
1927             && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
1928             crm_trace("%s doesn't support reboot, using values for off instead",
1929                       device->id);
1930             action = "off";
1931         }
1932 
1933         /* Add action-specific values if available */
1934         add_action_specific_attributes(dev, action, device);
1935         if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
1936             /* A "reboot" *might* get remapped to "off" then "on", so after
1937              * sending the "reboot"-specific values in the main element, we add
1938              * sub-elements for "off" and "on" values.
1939              *
1940              * We short-circuited earlier if "reboot", "off" and "on" are all
1941              * disallowed for the local host. However if only one or two are
1942              * disallowed, we send back the results and mark which ones are
1943              * disallowed. If "reboot" is disallowed, this might cause problems
1944              * with older fencer versions, which won't check for it. Older
1945              * versions will ignore "off" and "on", so they are not a problem.
1946              */
1947             add_disallowed(dev, action, device, query->target,
1948                            pcmk_is_set(query->call_options, st_opt_allow_suicide));
1949             add_action_reply(dev, "off", device, query->target,
1950                              pcmk_is_set(query->call_options, st_opt_allow_suicide));
1951             add_action_reply(dev, "on", device, query->target, FALSE);
1952         }
1953 
1954         /* A query without a target wants device parameters */
1955         if (query->target == NULL) {
1956             xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
1957 
1958             g_hash_table_foreach(device->params, hash2field, attrs);
1959         }
1960     }
1961 
1962     crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
1963     if (query->target) {
1964         crm_debug("Found %d matching devices for '%s'", available_devices, query->target);
1965     } else {
1966         crm_debug("%d devices installed", available_devices);
1967     }
1968 
1969     if (list != NULL) {
1970         crm_log_xml_trace(list, "Add query results");
1971         add_message_xml(query->reply, F_STONITH_CALLDATA, list);
1972     }
1973     stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
1974 
1975     free_xml(query->reply);
1976     free(query->remote_peer);
1977     free(query->client_id);
1978     free(query->target);
1979     free(query->action);
1980     free(query);
1981     free_xml(list);
1982     g_list_free_full(devices, free);
1983 }
1984 
1985 static void
1986 stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options)
     /* [previous][next][first][last][top][bottom][index][help] */
1987 {
1988     struct st_query_data *query = NULL;
1989     const char *action = NULL;
1990     const char *target = NULL;
1991     int timeout = 0;
1992     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
1993 
1994     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
1995     if (dev) {
1996         const char *device = crm_element_value(dev, F_STONITH_DEVICE);
1997 
1998         target = crm_element_value(dev, F_STONITH_TARGET);
1999         action = crm_element_value(dev, F_STONITH_ACTION);
2000         if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
2001             /* No query or reply necessary */
2002             return;
2003         }
2004     }
2005 
2006     crm_log_xml_debug(msg, "Query");
2007     query = calloc(1, sizeof(struct st_query_data));
2008 
2009     query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
2010     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
2011     query->client_id = client_id ? strdup(client_id) : NULL;
2012     query->target = target ? strdup(target) : NULL;
2013     query->action = action ? strdup(action) : NULL;
2014     query->call_options = call_options;
2015 
2016     get_capable_devices(target, action, timeout,
2017                         pcmk_is_set(call_options, st_opt_allow_suicide),
2018                         query, stonith_query_capable_device_cb);
2019 }
2020 
2021 #define ST_LOG_OUTPUT_MAX 512
2022 static void
2023 log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged)
     /* [previous][next][first][last][top][bottom][index][help] */
2024 {
2025     if (rc == 0) {
2026         next = NULL;
2027     }
2028 
2029     if (cmd->victim != NULL) {
2030         do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR,
2031                    "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned%s: %d (%s)%s%s",
2032                    cmd->action, pid, cmd->id, cmd->client_name, cmd->victim,
2033                    cmd->device, (op_merged? " (merged)" : ""),
2034                    rc, pcmk_strerror(rc),
2035                    (next? ", retrying with " : ""), (next ? next : ""));
2036     } else {
2037         do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
2038                             "Operation '%s' [%d] for device '%s' returned%s: %d (%s)%s%s",
2039                             cmd->action, pid, cmd->device,
2040                             (op_merged? " (merged)" : ""),
2041                             rc, pcmk_strerror(rc),
2042                             (next? ", retrying with " : ""), (next ? next : ""));
2043     }
2044 
2045     if (output) {
2046         /* Logging the whole string confuses syslog when the string is xml */
2047         char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid);
2048 
2049         crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
2050         free(prefix);
2051     }
2052 }
2053 
2054 static void
2055 stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
2056 {
2057     xmlNode *reply = NULL;
2058     gboolean bcast = FALSE;
2059 
2060     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
2061 
2062     if (pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
2063         /* Too verbose to log */
2064         crm_trace("Metadata query for %s", cmd->device);
2065         output = NULL;
2066 
2067     } else if (pcmk__str_any_of(cmd->action, "monitor", "list", "status", NULL)) {
2068         crm_trace("Never broadcast '%s' replies", cmd->action);
2069 
2070     } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) {
2071         crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
2072         crm_xml_add(reply, F_SUBTYPE, "broadcast");
2073         bcast = TRUE;
2074     }
2075 
2076     log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE));
2077     crm_log_xml_trace(reply, "Reply");
2078 
2079     if (options & st_reply_opt_merged) {
2080         crm_xml_add(reply, F_STONITH_MERGED, "true");
2081     }
2082 
2083     if (bcast) {
2084         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
2085         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
2086 
2087     } else if (cmd->origin) {
2088         crm_trace("Directed reply to %s", cmd->origin);
2089         send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
2090 
2091     } else {
2092         crm_trace("Directed local %ssync reply to %s",
2093                   (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
2094         do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
2095     }
2096 
2097     if (stand_alone) {
2098         /* Do notification with a clean data object */
2099         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
2100 
2101         crm_xml_add_int(notify_data, F_STONITH_RC, rc);
2102         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
2103         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
2104         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
2105         crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
2106         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2107         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
2108 
2109         do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
2110         do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
2111     }
2112 
2113     free_xml(reply);
2114 }
2115 
2116 static void
2117 cancel_stonith_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
2118 {
2119     stonith_device_t *device;
2120 
2121     CRM_CHECK(cmd != NULL, return);
2122 
2123     if (!cmd->device) {
2124         return;
2125     }
2126 
2127     device = g_hash_table_lookup(device_list, cmd->device);
2128 
2129     if (device) {
2130         crm_trace("Cancel scheduled '%s' action on %s", cmd->action, device->id);
2131         device->pending_ops = g_list_remove(device->pending_ops, cmd);
2132     }
2133 }
2134 
2135 static void
2136 st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2137 {
2138     stonith_device_t *device = NULL;
2139     stonith_device_t *next_device = NULL;
2140     async_command_t *cmd = user_data;
2141 
2142     GListPtr gIter = NULL;
2143     GListPtr gIterNext = NULL;
2144 
2145     CRM_CHECK(cmd != NULL, return);
2146 
2147     cmd->active_on = NULL;
2148 
2149     /* The device is ready to do something else now */
2150     device = g_hash_table_lookup(device_list, cmd->device);
2151     if (device) {
2152         if (!device->verified && (rc == pcmk_ok) &&
2153             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
2154 
2155             device->verified = TRUE;
2156         }
2157 
2158         mainloop_set_trigger(device->work);
2159     }
2160 
2161     crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)",
2162               cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
2163 
2164     if (rc == 0) {
2165         GListPtr iter;
2166         /* see if there are any required devices left to execute for this op */
2167         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
2168             next_device = g_hash_table_lookup(device_list, iter->data);
2169 
2170             if (next_device != NULL && is_action_required(cmd->action, next_device)) {
2171                 cmd->device_next = iter->next;
2172                 break;
2173             }
2174             next_device = NULL;
2175         }
2176 
2177     } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
2178         /* if this device didn't work out, see if there are any others we can try.
2179          * if the failed device was 'required', we can't pick another device. */
2180         next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
2181         cmd->device_next = cmd->device_next->next;
2182     }
2183 
2184     /* this operation requires more fencing, hooray! */
2185     if (next_device) {
2186         log_operation(cmd, rc, pid, next_device->id, output, FALSE);
2187 
2188         schedule_stonith_command(cmd, next_device);
2189         /* Prevent cmd from being freed */
2190         cmd = NULL;
2191         goto done;
2192     }
2193 
2194     stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none);
2195 
2196     if (rc != 0) {
2197         goto done;
2198     }
2199 
2200     /* Check to see if any operations are scheduled to do the exact
2201      * same thing that just completed.  If so, rather than
2202      * performing the same fencing operation twice, return the result
2203      * of this operation for all pending commands it matches. */
2204     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
2205         async_command_t *cmd_other = gIter->data;
2206 
2207         gIterNext = gIter->next;
2208 
2209         if (cmd == cmd_other) {
2210             continue;
2211         }
2212 
2213         /* A pending scheduled command matches the command that just finished if.
2214          * 1. The client connections are different.
2215          * 2. The node victim is the same.
2216          * 3. The fencing action is the same.
2217          * 4. The device scheduled to execute the action is the same.
2218          */
2219         if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2220             !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) ||
2221             !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) ||
2222             !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2223 
2224             continue;
2225         }
2226 
2227         /* Duplicate merging will do the right thing for either type of remapped
2228          * reboot. If the executing fencer remapped an unsupported reboot to
2229          * off, then cmd->action will be reboot and will be merged with any
2230          * other reboot requests. If the originating fencer remapped a
2231          * topology reboot to off then on, we will get here once with
2232          * cmd->action "off" and once with "on", and they will be merged
2233          * separately with similar requests.
2234          */
2235         crm_notice
2236             ("Merging stonith action '%s' targeting %s originating from client %s with identical stonith request from client %s",
2237              cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name);
2238 
2239         cmd_list = g_list_remove_link(cmd_list, gIter);
2240 
2241         stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged);
2242         cancel_stonith_command(cmd_other);
2243 
2244         free_async_command(cmd_other);
2245         g_list_free_1(gIter);
2246     }
2247 
2248   done:
2249     free_async_command(cmd);
2250 }
2251 
2252 static gint
2253 sort_device_priority(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
2254 {
2255     const stonith_device_t *dev_a = a;
2256     const stonith_device_t *dev_b = b;
2257 
2258     if (dev_a->priority > dev_b->priority) {
2259         return -1;
2260     } else if (dev_a->priority < dev_b->priority) {
2261         return 1;
2262     }
2263     return 0;
2264 }
2265 
2266 static void
2267 stonith_fence_get_devices_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2268 {
2269     async_command_t *cmd = user_data;
2270     stonith_device_t *device = NULL;
2271 
2272     crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim);
2273 
2274     if (devices != NULL) {
2275         /* Order based on priority */
2276         devices = g_list_sort(devices, sort_device_priority);
2277         device = g_hash_table_lookup(device_list, devices->data);
2278 
2279         if (device) {
2280             cmd->device_list = devices;
2281             cmd->device_next = devices->next;
2282             devices = NULL;     /* list owned by cmd now */
2283         }
2284     }
2285 
2286     /* we have a device, schedule it for fencing. */
2287     if (device) {
2288         schedule_stonith_command(cmd, device);
2289         /* in progress */
2290         return;
2291     }
2292 
2293     /* no device found! */
2294     stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none);
2295 
2296     free_async_command(cmd);
2297     g_list_free_full(devices, free);
2298 }
2299 
2300 static int
2301 stonith_fence(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2302 {
2303     const char *device_id = NULL;
2304     stonith_device_t *device = NULL;
2305     async_command_t *cmd = create_async_command(msg);
2306     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
2307 
2308     if (cmd == NULL) {
2309         return -EPROTO;
2310     }
2311 
2312     device_id = crm_element_value(dev, F_STONITH_DEVICE);
2313     if (device_id) {
2314         device = g_hash_table_lookup(device_list, device_id);
2315         if (device == NULL) {
2316             crm_err("Requested device '%s' is not available", device_id);
2317             return -ENODEV;
2318         }
2319         schedule_stonith_command(cmd, device);
2320 
2321     } else {
2322         const char *host = crm_element_value(dev, F_STONITH_TARGET);
2323 
2324         if (cmd->options & st_opt_cs_nodeid) {
2325             int nodeid = crm_atoi(host, NULL);
2326             crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY);
2327 
2328             if (node) {
2329                 host = node->uname;
2330             }
2331         }
2332 
2333         /* If we get to here, then self-fencing is implicitly allowed */
2334         get_capable_devices(host, cmd->action, cmd->default_timeout,
2335                             TRUE, cmd, stonith_fence_get_devices_cb);
2336     }
2337 
2338     return -EINPROGRESS;
2339 }
2340 
2341 xmlNode *
2342 stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2343 {
2344     xmlNode *reply = NULL;
2345 
2346     reply = create_xml_node(NULL, T_STONITH_REPLY);
2347 
2348     crm_xml_add(reply, "st_origin", __func__);
2349     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2350     crm_xml_add(reply, "st_output", output);
2351     crm_xml_add_int(reply, F_STONITH_RC, rc);
2352 
2353     if (request == NULL) {
2354         /* Most likely, this is the result of a stonith operation that was
2355          * initiated before we came up. Unfortunately that means we lack enough
2356          * information to provide clients with a full result.
2357          *
2358          * @TODO Maybe synchronize this information at start-up?
2359          */
2360         crm_warn("Missing request information for client notifications for "
2361                  "operation with result %d (initiated before we came up?)", rc);
2362 
2363     } else {
2364         const char *name = NULL;
2365         const char *value = NULL;
2366 
2367         const char *names[] = {
2368             F_STONITH_OPERATION,
2369             F_STONITH_CALLID,
2370             F_STONITH_CLIENTID,
2371             F_STONITH_CLIENTNAME,
2372             F_STONITH_REMOTE_OP_ID,
2373             F_STONITH_CALLOPTS
2374         };
2375 
2376         crm_trace("Creating a result reply with%s reply output (rc=%d)",
2377                   (data? "" : "out"), rc);
2378         for (int lpc = 0; lpc < DIMOF(names); lpc++) {
2379             name = names[lpc];
2380             value = crm_element_value(request, name);
2381             crm_xml_add(reply, name, value);
2382         }
2383         if (data != NULL) {
2384             add_message_xml(reply, F_STONITH_CALLDATA, data);
2385         }
2386     }
2387     return reply;
2388 }
2389 
2390 static xmlNode *
2391 stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2392 {
2393     xmlNode *reply = NULL;
2394 
2395     crm_trace("Creating a basic reply");
2396     reply = create_xml_node(NULL, T_STONITH_REPLY);
2397 
2398     crm_xml_add(reply, "st_origin", __func__);
2399     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2400 
2401     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
2402     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
2403     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2404     crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
2405     crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
2406     crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
2407     crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
2408     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
2409     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
2410     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
2411 
2412     crm_xml_add_int(reply, F_STONITH_RC, rc);
2413 
2414     crm_xml_add(reply, "st_output", output);
2415 
2416     if (data != NULL) {
2417         crm_info("Attaching reply output");
2418         add_message_xml(reply, F_STONITH_CALLDATA, data);
2419     }
2420     return reply;
2421 }
2422 
2423 bool fencing_peer_active(crm_node_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2424 {
2425     if (peer == NULL) {
2426         return FALSE;
2427     } else if (peer->uname == NULL) {
2428         return FALSE;
2429     } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
2430         return TRUE;
2431     }
2432     return FALSE;
2433 }
2434 
2435 /*!
2436  * \internal
2437  * \brief Determine if we need to use an alternate node to
2438  * fence the target. If so return that node's uname
2439  *
2440  * \retval NULL, no alternate host
2441  * \retval uname, uname of alternate host to use
2442  */
2443 static const char *
2444 check_alternate_host(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
2445 {
2446     const char *alternate_host = NULL;
2447 
2448     crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
2449     if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2450         GHashTableIter gIter;
2451         crm_node_t *entry = NULL;
2452 
2453         g_hash_table_iter_init(&gIter, crm_peer_cache);
2454         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2455             crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
2456             if (fencing_peer_active(entry)
2457                 && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
2458                 alternate_host = entry->uname;
2459                 break;
2460             }
2461         }
2462         if (alternate_host == NULL) {
2463             crm_err("No alternate host available to handle complex self fencing request");
2464             g_hash_table_iter_init(&gIter, crm_peer_cache);
2465             while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2466                 crm_notice("Peer[%d] %s", entry->id, entry->uname);
2467             }
2468         }
2469     }
2470 
2471     return alternate_host;
2472 }
2473 
2474 static void
2475 stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
     /* [previous][next][first][last][top][bottom][index][help] */
2476                    const char *client_id)
2477 {
2478     if (remote_peer) {
2479         send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
2480     } else {
2481         do_local_reply(reply, client_id,
2482                        pcmk_is_set(call_options, st_opt_sync_call),
2483                        (remote_peer != NULL));
2484     }
2485 }
2486 
2487 static void 
2488 remove_relay_op(xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
2489 {
2490     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
2491     const char *relay_op_id = NULL; 
2492     const char *op_id = NULL;
2493     const char *client_name = NULL;
2494     const char *target = NULL; 
2495     remote_fencing_op_t *relay_op = NULL; 
2496 
2497     if (dev) { 
2498         target = crm_element_value(dev, F_STONITH_TARGET); 
2499     }
2500 
2501     relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
2502     op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
2503     client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
2504 
2505     /* Delete RELAY operation. */
2506     if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2507         relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
2508 
2509         if (relay_op) {
2510             GHashTableIter iter;
2511             remote_fencing_op_t *list_op = NULL; 
2512             g_hash_table_iter_init(&iter, stonith_remote_op_list);
2513 
2514             /* If the operation to be deleted is registered as a duplicate, delete the registration. */
2515             while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
2516                 GListPtr dup_iter = NULL;
2517                 if (list_op != relay_op) {
2518                     for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
2519                         remote_fencing_op_t *other = dup_iter->data;
2520                         if (other == relay_op) {
2521                             other->duplicates = g_list_remove(other->duplicates, relay_op);
2522                             break;
2523                         }
2524                     }
2525                 }
2526             }
2527             crm_info("Delete the relay op : %s - %s of %s for %s.(replaced by op : %s - %s of %s for %s)",
2528                   relay_op->id, relay_op->action, relay_op->target, relay_op->client_name,
2529                   op_id, relay_op->action, target, client_name);
2530 
2531             g_hash_table_remove(stonith_remote_op_list, relay_op_id);
2532         }
2533     }
2534 }
2535 
2536 static int
2537 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2538                xmlNode *request, const char *remote_peer)
2539 {
2540     int call_options = 0;
2541     int rc = -EOPNOTSUPP;
2542 
2543     xmlNode *data = NULL;
2544     xmlNode *reply = NULL;
2545 
2546     char *output = NULL;
2547     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2548     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2549 
2550 #if ENABLE_ACL
2551     /* IPC commands related to fencing configuration may be done only by
2552      * privileged users (i.e. root or hacluster) when ACLs are supported,
2553      * because all other users should go through the CIB to have ACLs applied.
2554      *
2555      * If no client was given, this is a peer request, which is always allowed.
2556      */
2557     bool allowed = (client == NULL)
2558                    || pcmk_is_set(client->flags, pcmk__client_privileged);
2559 #else
2560     bool allowed = true;
2561 #endif
2562 
2563     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2564 
2565     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2566         CRM_ASSERT(client == NULL || client->request_id == id);
2567     }
2568 
2569     if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
2570         xmlNode *reply = create_xml_node(NULL, "reply");
2571 
2572         CRM_ASSERT(client);
2573         crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
2574         crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
2575         pcmk__ipc_send_xml(client, id, reply, flags);
2576         client->request_id = 0;
2577         free_xml(reply);
2578         return 0;
2579 
2580     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
2581         rc = stonith_device_action(request, &output);
2582 
2583     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
2584         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
2585         const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2586         int op_timeout = 0;
2587 
2588         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
2589         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
2590         return 0;
2591 
2592     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2593         if (remote_peer) {
2594             create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
2595         }
2596 
2597         /* Delete the DC node RELAY operation. */
2598         remove_relay_op(request);
2599 
2600         stonith_query(request, remote_peer, client_id, call_options);
2601         return 0;
2602 
2603     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2604         const char *flag_name = NULL;
2605 
2606         CRM_ASSERT(client);
2607         flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
2608         if (flag_name) {
2609             crm_debug("Enabling %s callbacks for %s (%s)",
2610                       flag_name, client->name, client->id);
2611             pcmk__set_client_flags(client, get_stonith_flag(flag_name));
2612         }
2613 
2614         flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
2615         if (flag_name) {
2616             crm_debug("Disabling %s callbacks for %s (%s)",
2617                       flag_name, client->name, client->id);
2618             pcmk__clear_client_flags(client, get_stonith_flag(flag_name));
2619         }
2620 
2621         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
2622         return 0;
2623 
2624     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
2625         xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2626 
2627         crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s",
2628                    stonith_our_uname,
2629                    client ? client->name : remote_peer,
2630                    crm_element_value(dev, F_STONITH_ACTION),
2631                    crm_element_value(dev, F_STONITH_TARGET));
2632 
2633         if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
2634             rc = -EINPROGRESS;
2635         }
2636 
2637     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2638 
2639         if (remote_peer || stand_alone) {
2640             rc = stonith_fence(request);
2641 
2642         } else if (call_options & st_opt_manual_ack) {
2643             remote_fencing_op_t *rop = NULL;
2644             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2645             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2646 
2647             crm_notice("Received manual confirmation that %s is fenced", target);
2648             rop = initiate_remote_stonith_op(client, request, TRUE);
2649             rc = stonith_manual_ack(request, rop);
2650 
2651         } else {
2652             const char *alternate_host = NULL;
2653             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2654             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2655             const char *action = crm_element_value(dev, F_STONITH_ACTION);
2656             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
2657 
2658             if (client) {
2659                 int tolerance = 0;
2660 
2661                 crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'",
2662                            client->name, client->id, action, target, device ? device : "(any)");
2663 
2664                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
2665 
2666                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
2667                     rc = 0;
2668                     goto done;
2669                 }
2670 
2671             } else {
2672                 crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
2673                            remote_peer, action, target, device ? device : "(any)");
2674             }
2675 
2676             alternate_host = check_alternate_host(target);
2677 
2678             if (alternate_host && client) {
2679                 const char *client_id = NULL;
2680                 remote_fencing_op_t *op = NULL;
2681 
2682                 crm_notice("Forwarding complex self fencing request to peer %s", alternate_host);
2683 
2684                 if (client->id) {
2685                     client_id = client->id;
2686                 } else {
2687                     client_id = crm_element_value(request, F_STONITH_CLIENTID);
2688                 }
2689 
2690                 /* Create an operation for RELAY and send the ID in the RELAY message. */
2691                 /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */
2692                 op = create_remote_stonith_op(client_id, request, FALSE);
2693 
2694                 crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
2695                 crm_xml_add(request, F_STONITH_CLIENTID, client->id);
2696                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
2697                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
2698                                      FALSE);
2699                 rc = -EINPROGRESS;
2700 
2701             } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
2702                 rc = -EINPROGRESS;
2703             }
2704         }
2705 
2706     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
2707         rc = stonith_fence_history(request, &data, remote_peer, call_options);
2708         if (call_options & st_opt_discard_reply) {
2709             /* we don't expect answers to the broadcast
2710              * we might have sent out
2711              */
2712             free_xml(data);
2713             return pcmk_ok;
2714         }
2715 
2716     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
2717         const char *device_id = NULL;
2718 
2719         if (allowed) {
2720             rc = stonith_device_register(request, &device_id, FALSE);
2721         } else {
2722             rc = -EACCES;
2723         }
2724         do_stonith_notify_device(call_options, op, rc, device_id);
2725 
2726     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
2727         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
2728         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
2729 
2730         if (allowed) {
2731             rc = stonith_device_remove(device_id, FALSE);
2732         } else {
2733             rc = -EACCES;
2734         }
2735         do_stonith_notify_device(call_options, op, rc, device_id);
2736 
2737     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
2738         char *device_id = NULL;
2739 
2740         if (allowed) {
2741             rc = stonith_level_register(request, &device_id);
2742         } else {
2743             rc = -EACCES;
2744         }
2745         do_stonith_notify_level(call_options, op, rc, device_id);
2746         free(device_id);
2747 
2748     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
2749         char *device_id = NULL;
2750 
2751         if (allowed) {
2752             rc = stonith_level_remove(request, &device_id);
2753         } else {
2754             rc = -EACCES;
2755         }
2756         do_stonith_notify_level(call_options, op, rc, device_id);
2757 
2758     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
2759         int node_id = 0;
2760         const char *name = NULL;
2761 
2762         crm_element_value_int(request, XML_ATTR_ID, &node_id);
2763         name = crm_element_value(request, XML_ATTR_UNAME);
2764         reap_crm_member(node_id, name);
2765 
2766         return pcmk_ok;
2767 
2768     } else {
2769         crm_err("Unknown IPC request %s from %s",
2770                 op, (client? client->name : remote_peer));
2771     }
2772 
2773   done:
2774 
2775     if (rc == -EACCES) {
2776         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
2777                  crm_str(op), pcmk__client_name(client));
2778     }
2779 
2780     /* Always reply unless the request is in process still.
2781      * If in progress, a reply will happen async after the request
2782      * processing is finished */
2783     if (rc != -EINPROGRESS) {
2784         crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
2785                   id, pcmk_is_set(call_options, st_opt_sync_call), call_options,
2786                   crm_element_value(request, F_STONITH_CALLOPTS));
2787 
2788         if (pcmk_is_set(call_options, st_opt_sync_call)) {
2789             CRM_ASSERT(client == NULL || client->request_id == id);
2790         }
2791         reply = stonith_construct_reply(request, output, data, rc);
2792         stonith_send_reply(reply, call_options, remote_peer, client_id);
2793     }
2794 
2795     free(output);
2796     free_xml(data);
2797     free_xml(reply);
2798 
2799     return rc;
2800 }
2801 
2802 static void
2803 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2804 {
2805     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2806 
2807     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2808         process_remote_stonith_query(request);
2809     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2810         process_remote_stonith_exec(request);
2811     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2812         /* Reply to a complex fencing op */
2813         process_remote_stonith_exec(request);
2814     } else {
2815         crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer);
2816         crm_log_xml_warn(request, "UnknownOp");
2817     }
2818 }
2819 
2820 void
2821 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2822                 xmlNode *request, const char *remote_peer)
2823 {
2824     int call_options = 0;
2825     int rc = 0;
2826     gboolean is_reply = FALSE;
2827 
2828     /* Copy op for reporting. The original might get freed by handle_reply()
2829      * before we use it in crm_debug():
2830      *     handle_reply()
2831      *     |- process_remote_stonith_exec()
2832      *     |-- remote_op_done()
2833      *     |--- handle_local_reply_and_notify()
2834      *     |---- crm_xml_add(...F_STONITH_OPERATION...)
2835      *     |--- free_xml(op->request)
2836      */
2837     char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
2838 
2839     if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
2840         is_reply = TRUE;
2841     }
2842 
2843     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2844     crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "",
2845               id, client ? client->name : remote_peer, call_options);
2846 
2847     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2848         CRM_ASSERT(client == NULL || client->request_id == id);
2849     }
2850 
2851     if (is_reply) {
2852         handle_reply(client, request, remote_peer);
2853     } else {
2854         rc = handle_request(client, id, flags, request, remote_peer);
2855     }
2856 
2857     crm_debug("Processed %s%s from %s: %s (%d)", op,
2858               is_reply ? " reply" : "", client ? client->name : remote_peer,
2859               rc > 0 ? "" : pcmk_strerror(rc), rc);
2860 
2861     free(op);
2862 }

/* [previous][next][first][last][top][bottom][index][help] */