root/daemons/fenced/fenced_commands.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_action_required
  2. get_action_delay_max
  3. get_action_delay_base
  4. get_action_timeout
  5. free_async_command
  6. create_async_command
  7. get_action_limit
  8. get_active_cmds
  9. fork_cb
  10. get_agent_metadata_cb
  11. stonith_device_execute
  12. stonith_device_dispatch
  13. start_delay_helper
  14. schedule_stonith_command
  15. free_device
  16. free_device_list
  17. init_device_list
  18. build_port_aliases
  19. free_metadata_cache
  20. init_metadata_cache
  21. get_agent_metadata
  22. is_nodeid_required
  23. add_action
  24. read_action_metadata
  25. map_action
  26. xml2device_params
  27. target_list_type
  28. build_device_from_xml
  29. schedule_internal_command
  30. string_in_list
  31. status_search_cb
  32. dynamic_list_search_cb
  33. device_params_diff
  34. device_has_duplicate
  35. stonith_device_register
  36. stonith_device_remove
  37. count_active_levels
  38. free_topology_entry
  39. free_topology_list
  40. init_topology_list
  41. stonith_level_key
  42. stonith_level_kind
  43. parse_device_list
  44. stonith_level_register
  45. stonith_level_remove
  46. stonith_device_action
  47. search_devices_record_result
  48. localhost_is_eligible
  49. can_fence_host_with_device
  50. search_devices
  51. get_capable_devices
  52. add_action_specific_attributes
  53. add_disallowed
  54. add_action_reply
  55. stonith_query_capable_device_cb
  56. stonith_query
  57. log_operation
  58. stonith_send_async_reply
  59. cancel_stonith_command
  60. st_child_done
  61. sort_device_priority
  62. stonith_fence_get_devices_cb
  63. stonith_fence
  64. stonith_construct_reply
  65. stonith_construct_async_reply
  66. fencing_peer_active
  67. check_alternate_host
  68. stonith_send_reply
  69. remove_relay_op
  70. handle_request
  71. handle_reply
  72. stonith_command

   1 /*
   2  * Copyright 2009-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 #include <sys/stat.h>
  17 #include <unistd.h>
  18 #include <sys/utsname.h>
  19 
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <ctype.h>
  24 
  25 #include <crm/crm.h>
  26 #include <crm/msg_xml.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/cluster/internal.h>
  30 #include <crm/common/mainloop.h>
  31 
  32 #include <crm/stonith-ng.h>
  33 #include <crm/fencing/internal.h>
  34 #include <crm/common/xml.h>
  35 
  36 #include <pacemaker-fenced.h>
  37 
  38 GHashTable *device_list = NULL;
  39 GHashTable *topology = NULL;
  40 GList *cmd_list = NULL;
  41 
  42 struct device_search_s {
  43     /* target of fence action */
  44     char *host;
  45     /* requested fence action */
  46     char *action;
  47     /* timeout to use if a device is queried dynamically for possible targets */
  48     int per_device_timeout;
  49     /* number of registered fencing devices at time of request */
  50     int replies_needed;
  51     /* number of device replies received so far */
  52     int replies_received;
  53     /* whether the target is eligible to perform requested action (or off) */
  54     bool allow_suicide;
  55 
  56     /* private data to pass to search callback function */
  57     void *user_data;
  58     /* function to call when all replies have been received */
  59     void (*callback) (GList * devices, void *user_data);
  60     /* devices capable of performing requested action (or off if remapping) */
  61     GList *capable;
  62 };
  63 
  64 static gboolean stonith_device_dispatch(gpointer user_data);
  65 static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data);
  66 static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
  67                                const char *client_id);
  68 
  69 static void search_devices_record_result(struct device_search_s *search, const char *device,
  70                                          gboolean can_fence);
  71 
  72 static int get_agent_metadata(const char *agent, xmlNode **metadata);
  73 static void read_action_metadata(stonith_device_t *device);
  74 
  75 typedef struct async_command_s {
  76 
  77     int id;
  78     int pid;
  79     int fd_stdout;
  80     int options;
  81     int default_timeout; /* seconds */
  82     int timeout; /* seconds */
  83 
  84     int start_delay; /* seconds */
  85     int delay_id;
  86 
  87     char *op;
  88     char *origin;
  89     char *client;
  90     char *client_name;
  91     char *remote_op_id;
  92 
  93     char *victim;
  94     uint32_t victim_nodeid;
  95     char *action;
  96     char *device;
  97 
  98     GList *device_list;
  99     GList *device_next;
 100 
 101     void *internal_user_data;
 102     void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data);
 103     guint timer_sigterm;
 104     guint timer_sigkill;
 105     /*! If the operation timed out, this is the last signal
 106      *  we sent to the process to get it to terminate */
 107     int last_timeout_signo;
 108 
 109     stonith_device_t *active_on;
 110     stonith_device_t *activating_on;
 111 } async_command_t;
 112 
 113 static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
 114                                               xmlNode * data, int rc);
 115 
 116 static gboolean
 117 is_action_required(const char *action, stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119     return device && device->automatic_unfencing && pcmk__str_eq(action, "on",
 120                                                                  pcmk__str_casei);
 121 }
 122 
 123 static int
 124 get_action_delay_max(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 125 {
 126     const char *value = NULL;
 127     int delay_max = 0;
 128 
 129     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 130         return 0;
 131     }
 132 
 133     value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX);
 134     if (value) {
 135        delay_max = crm_parse_interval_spec(value) / 1000;
 136     }
 137 
 138     return delay_max;
 139 }
 140 
 141 static int
 142 get_action_delay_base(stonith_device_t * device, const char * action)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144     const char *value = NULL;
 145     int delay_base = 0;
 146 
 147     if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) {
 148         return 0;
 149     }
 150 
 151     value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE);
 152     if (value) {
 153        delay_base = crm_parse_interval_spec(value) / 1000;
 154     }
 155 
 156     return delay_base;
 157 }
 158 
 159 /*!
 160  * \internal
 161  * \brief Override STONITH timeout with pcmk_*_timeout if available
 162  *
 163  * \param[in] device           STONITH device to use
 164  * \param[in] action           STONITH action name
 165  * \param[in] default_timeout  Timeout to use if device does not have
 166  *                             a pcmk_*_timeout parameter for action
 167  *
 168  * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout
 169  * \note For consistency, it would be nice if reboot/off/on timeouts could be
 170  *       set the same way as start/stop/monitor timeouts, i.e. with an
 171  *       <operation> entry in the fencing resource configuration. However that
 172  *       is insufficient because fencing devices may be registered directly via
 173  *       the fencer's register_device() API instead of going through the CIB
 174  *       (e.g. stonith_admin uses it for its -R option, and the executor uses it
 175  *       to ensure a device is registered when a command is issued). As device
 176  *       properties, pcmk_*_timeout parameters can be grabbed by the fencer when
 177  *       the device is registered, whether by CIB change or API call.
 178  */
 179 static int
 180 get_action_timeout(stonith_device_t * device, const char *action, int default_timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182     if (action && device && device->params) {
 183         char buffer[64] = { 0, };
 184         const char *value = NULL;
 185 
 186         /* If "reboot" was requested but the device does not support it,
 187          * we will remap to "off", so check timeout for "off" instead
 188          */
 189         if (pcmk__str_eq(action, "reboot", pcmk__str_casei)
 190             && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 191             crm_trace("%s doesn't support reboot, using timeout for off instead",
 192                       device->id);
 193             action = "off";
 194         }
 195 
 196         /* If the device config specified an action-specific timeout, use it */
 197         snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action);
 198         value = g_hash_table_lookup(device->params, buffer);
 199         if (value) {
 200             return atoi(value);
 201         }
 202     }
 203     return default_timeout;
 204 }
 205 
 206 static void
 207 free_async_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209     if (!cmd) {
 210         return;
 211     }
 212 
 213     if (cmd->delay_id) {
 214         g_source_remove(cmd->delay_id);
 215     }
 216 
 217     cmd_list = g_list_remove(cmd_list, cmd);
 218 
 219     g_list_free_full(cmd->device_list, free);
 220     free(cmd->device);
 221     free(cmd->action);
 222     free(cmd->victim);
 223     free(cmd->remote_op_id);
 224     free(cmd->client);
 225     free(cmd->client_name);
 226     free(cmd->origin);
 227     free(cmd->op);
 228     free(cmd);
 229 }
 230 
 231 static async_command_t *
 232 create_async_command(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234     async_command_t *cmd = NULL;
 235     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
 236     const char *action = crm_element_value(op, F_STONITH_ACTION);
 237 
 238     CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL);
 239 
 240     crm_log_xml_trace(msg, "Command");
 241     cmd = calloc(1, sizeof(async_command_t));
 242     crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id));
 243     crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
 244     crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
 245     cmd->timeout = cmd->default_timeout;
 246     // Value -1 means disable any static/random fencing delays
 247     crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
 248 
 249     cmd->origin = crm_element_value_copy(msg, F_ORIG);
 250     cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
 251     cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID);
 252     cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME);
 253     cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION);
 254     cmd->action = strdup(action);
 255     cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET);
 256     cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE);
 257 
 258     CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL);
 259     CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient"));
 260 
 261     cmd->done_cb = st_child_done;
 262     cmd_list = g_list_append(cmd_list, cmd);
 263     return cmd;
 264 }
 265 
 266 static int
 267 get_action_limit(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 268 {
 269     const char *value = NULL;
 270     int action_limit = 1;
 271 
 272     value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT);
 273     if ((value == NULL)
 274         || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok)
 275         || (action_limit == 0)) {
 276         action_limit = 1;
 277     }
 278     return action_limit;
 279 }
 280 
 281 static int
 282 get_active_cmds(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284     int counter = 0;
 285     GList *gIter = NULL;
 286     GList *gIterNext = NULL;
 287 
 288     CRM_CHECK(device != NULL, return 0);
 289 
 290     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
 291         async_command_t *cmd = gIter->data;
 292 
 293         gIterNext = gIter->next;
 294 
 295         if (cmd->active_on == device) {
 296             counter++;
 297         }
 298     }
 299 
 300     return counter;
 301 }
 302 
 303 static void
 304 fork_cb(GPid pid, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     async_command_t *cmd = (async_command_t *) user_data;
 307     stonith_device_t * device =
 308         /* in case of a retry we've done the move from
 309            activating_on to active_on already
 310          */
 311         cmd->activating_on?cmd->activating_on:cmd->active_on;
 312 
 313     CRM_ASSERT(device);
 314     crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout",
 315               cmd->action, pid,
 316               ((cmd->victim == NULL)? "" : " targeting "),
 317               ((cmd->victim == NULL)? "" : cmd->victim),
 318               device->id, cmd->timeout);
 319     cmd->active_on = device;
 320     cmd->activating_on = NULL;
 321 }
 322 
 323 static int
 324 get_agent_metadata_cb(gpointer data) {
     /* [previous][next][first][last][top][bottom][index][help] */
 325     stonith_device_t *device = data;
 326     guint period_ms;
 327 
 328     switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
 329         case pcmk_rc_ok:
 330             if (device->agent_metadata) {
 331                 read_action_metadata(device);
 332                 stonith__device_parameter_flags(&(device->flags), device->id,
 333                                         device->agent_metadata);
 334             }
 335             return G_SOURCE_REMOVE;
 336 
 337         case EAGAIN:
 338             period_ms = pcmk__mainloop_timer_get_period(device->timer);
 339             if (period_ms < 160 * 1000) {
 340                 mainloop_timer_set_period(device->timer, 2 * period_ms);
 341             }
 342             return G_SOURCE_CONTINUE;
 343 
 344         default:
 345             return G_SOURCE_REMOVE;
 346     }
 347 }
 348 
 349 static gboolean
 350 stonith_device_execute(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 351 {
 352     int exec_rc = 0;
 353     const char *action_str = NULL;
 354     const char *host_arg = NULL;
 355     async_command_t *cmd = NULL;
 356     stonith_action_t *action = NULL;
 357     int active_cmds = 0;
 358     int action_limit = 0;
 359     GList *gIter = NULL;
 360     GList *gIterNext = NULL;
 361 
 362     CRM_CHECK(device != NULL, return FALSE);
 363 
 364     active_cmds = get_active_cmds(device);
 365     action_limit = get_action_limit(device);
 366     if (action_limit > -1 && active_cmds >= action_limit) {
 367         crm_trace("%s is over its action limit of %d (%u active action%s)",
 368                   device->id, action_limit, active_cmds,
 369                   pcmk__plural_s(active_cmds));
 370         return TRUE;
 371     }
 372 
 373     for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) {
 374         async_command_t *pending_op = gIter->data;
 375 
 376         gIterNext = gIter->next;
 377 
 378         if (pending_op && pending_op->delay_id) {
 379             crm_trace("Operation '%s'%s%s using %s was asked to run too early, "
 380                       "waiting for start delay of %ds",
 381                       pending_op->action,
 382                       ((pending_op->victim == NULL)? "" : " targeting "),
 383                       ((pending_op->victim == NULL)? "" : pending_op->victim),
 384                       device->id, pending_op->start_delay);
 385             continue;
 386         }
 387 
 388         device->pending_ops = g_list_remove_link(device->pending_ops, gIter);
 389         g_list_free_1(gIter);
 390 
 391         cmd = pending_op;
 392         break;
 393     }
 394 
 395     if (cmd == NULL) {
 396         crm_trace("No actions using %s are needed", device->id);
 397         return TRUE;
 398     }
 399 
 400     if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) {
 401         if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) {
 402             pcmk__panic(__func__);
 403             goto done;
 404 
 405         } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) {
 406             pcmk__panic(__func__);
 407             goto done;
 408 
 409         } else {
 410             crm_info("Faking success for %s watchdog operation", cmd->action);
 411             cmd->done_cb(0, 0, NULL, cmd);
 412             goto done;
 413         }
 414     }
 415 
 416 #if SUPPORT_CIBSECRETS
 417     if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) {
 418         /* replacing secrets failed! */
 419         if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 420             /* don't fail on stop! */
 421             crm_info("Proceeding with stop operation for %s", device->id);
 422 
 423         } else {
 424             crm_err("Considering %s unconfigured: Failed to get secrets",
 425                     device->id);
 426             exec_rc = PCMK_OCF_NOT_CONFIGURED;
 427             cmd->done_cb(0, exec_rc, NULL, cmd);
 428             goto done;
 429         }
 430     }
 431 #endif
 432 
 433     action_str = cmd->action;
 434     if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)
 435         && !pcmk_is_set(device->flags, st_device_supports_reboot)) {
 436 
 437         crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent);
 438         action_str = "off";
 439     }
 440 
 441     if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) {
 442         host_arg = "port";
 443 
 444     } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) {
 445         host_arg = "plug";
 446     }
 447 
 448     action = stonith_action_create(device->agent,
 449                                    action_str,
 450                                    cmd->victim,
 451                                    cmd->victim_nodeid,
 452                                    cmd->timeout, device->params,
 453                                    device->aliases, host_arg);
 454 
 455     /* for async exec, exec_rc is negative for early error exit
 456        otherwise handling of success/errors is done via callbacks */
 457     cmd->activating_on = device;
 458     exec_rc = stonith_action_execute_async(action, (void *)cmd,
 459                                            cmd->done_cb, fork_cb);
 460 
 461     if (exec_rc < 0) {
 462         crm_warn("Operation '%s'%s%s using %s failed: %s " CRM_XS " rc=%d",
 463                  cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 464                  device->id, pcmk_strerror(exec_rc), exec_rc);
 465         cmd->activating_on = NULL;
 466         cmd->done_cb(0, exec_rc, NULL, cmd);
 467     }
 468 
 469 done:
 470     /* Device might get triggered to work by multiple fencing commands
 471      * simultaneously. Trigger the device again to make sure any
 472      * remaining concurrent commands get executed. */
 473     if (device->pending_ops) {
 474         mainloop_set_trigger(device->work);
 475     }
 476     return TRUE;
 477 }
 478 
 479 static gboolean
 480 stonith_device_dispatch(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 481 {
 482     return stonith_device_execute(user_data);
 483 }
 484 
 485 static gboolean
 486 start_delay_helper(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488     async_command_t *cmd = data;
 489     stonith_device_t *device = NULL;
 490 
 491     cmd->delay_id = 0;
 492     device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
 493 
 494     if (device) {
 495         mainloop_set_trigger(device->work);
 496     }
 497 
 498     return FALSE;
 499 }
 500 
 501 static void
 502 schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504     int delay_max = 0;
 505     int delay_base = 0;
 506     int requested_delay = cmd->start_delay;
 507 
 508     CRM_CHECK(cmd != NULL, return);
 509     CRM_CHECK(device != NULL, return);
 510 
 511     if (cmd->device) {
 512         free(cmd->device);
 513     }
 514 
 515     if (device->include_nodeid && cmd->victim) {
 516         crm_node_t *node = crm_get_peer(0, cmd->victim);
 517 
 518         cmd->victim_nodeid = node->id;
 519     }
 520 
 521     cmd->device = strdup(device->id);
 522     cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout);
 523 
 524     if (cmd->remote_op_id) {
 525         crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s "
 526                   "with op id %.8s and timeout %ds",
 527                   cmd->action,
 528                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 529                   device->id, cmd->origin, cmd->remote_op_id, cmd->timeout);
 530     } else {
 531         crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds",
 532                   cmd->action,
 533                   cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 534                   device->id, cmd->client, cmd->timeout);
 535     }
 536 
 537     device->pending_ops = g_list_append(device->pending_ops, cmd);
 538     mainloop_set_trigger(device->work);
 539 
 540     // Value -1 means disable any static/random fencing delays
 541     if (requested_delay < 0) {
 542         return;
 543     }
 544 
 545     delay_max = get_action_delay_max(device, cmd->action);
 546     delay_base = get_action_delay_base(device, cmd->action);
 547     if (delay_max == 0) {
 548         delay_max = delay_base;
 549     }
 550     if (delay_max < delay_base) {
 551         crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than "
 552                  PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s "
 553                  "(limiting to maximum delay)",
 554                  delay_base, delay_max, cmd->action, device->id);
 555         delay_base = delay_max;
 556     }
 557     if (delay_max > 0) {
 558         // coverity[dont_call] We're not using rand() for security
 559         cmd->start_delay +=
 560             ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
 561             + delay_base;
 562     }
 563 
 564     if (cmd->start_delay > 0) {
 565         crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS
 566                    " timeout=%ds requested_delay=%ds base=%ds max=%ds",
 567                    cmd->action,
 568                    cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
 569                    device->id, cmd->start_delay, cmd->timeout,
 570                    requested_delay, delay_base, delay_max);
 571         cmd->delay_id =
 572             g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
 573     }
 574 }
 575 
 576 static void
 577 free_device(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 578 {
 579     GList *gIter = NULL;
 580     stonith_device_t *device = data;
 581 
 582     g_hash_table_destroy(device->params);
 583     g_hash_table_destroy(device->aliases);
 584 
 585     for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) {
 586         async_command_t *cmd = gIter->data;
 587 
 588         crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
 589         cmd->done_cb(0, -ENODEV, NULL, cmd);
 590     }
 591     g_list_free(device->pending_ops);
 592 
 593     g_list_free_full(device->targets, free);
 594 
 595     if (device->timer) {
 596         mainloop_timer_stop(device->timer);
 597         mainloop_timer_del(device->timer);
 598     }
 599 
 600     mainloop_destroy_trigger(device->work);
 601 
 602     free_xml(device->agent_metadata);
 603     free(device->namespace);
 604     free(device->on_target_actions);
 605     free(device->agent);
 606     free(device->id);
 607     free(device);
 608 }
 609 
 610 void free_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 611 {
 612     if (device_list != NULL) {
 613         g_hash_table_destroy(device_list);
 614         device_list = NULL;
 615     }
 616 }
 617 
 618 void
 619 init_device_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 620 {
 621     if (device_list == NULL) {
 622         device_list = pcmk__strkey_table(NULL, free_device);
 623     }
 624 }
 625 
 626 static GHashTable *
 627 build_port_aliases(const char *hostmap, GList ** targets)
     /* [previous][next][first][last][top][bottom][index][help] */
 628 {
 629     char *name = NULL;
 630     int last = 0, lpc = 0, max = 0, added = 0;
 631     GHashTable *aliases = pcmk__strikey_table(free, free);
 632 
 633     if (hostmap == NULL) {
 634         return aliases;
 635     }
 636 
 637     max = strlen(hostmap);
 638     for (; lpc <= max; lpc++) {
 639         switch (hostmap[lpc]) {
 640                 /* Assignment chars */
 641             case '=':
 642             case ':':
 643                 if (lpc > last) {
 644                     free(name);
 645                     name = calloc(1, 1 + lpc - last);
 646                     memcpy(name, hostmap + last, lpc - last);
 647                 }
 648                 last = lpc + 1;
 649                 break;
 650 
 651                 /* Delimeter chars */
 652                 /* case ',': Potentially used to specify multiple ports */
 653             case 0:
 654             case ';':
 655             case ' ':
 656             case '\t':
 657                 if (name) {
 658                     char *value = NULL;
 659 
 660                     value = calloc(1, 1 + lpc - last);
 661                     memcpy(value, hostmap + last, lpc - last);
 662 
 663                     crm_debug("Adding alias '%s'='%s'", name, value);
 664                     g_hash_table_replace(aliases, name, value);
 665                     if (targets) {
 666                         *targets = g_list_append(*targets, strdup(value));
 667                     }
 668                     value = NULL;
 669                     name = NULL;
 670                     added++;
 671 
 672                 } else if (lpc > last) {
 673                     crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last);
 674                 }
 675 
 676                 last = lpc + 1;
 677                 break;
 678         }
 679 
 680         if (hostmap[lpc] == 0) {
 681             break;
 682         }
 683     }
 684 
 685     if (added == 0) {
 686         crm_info("No host mappings detected in '%s'", hostmap);
 687     }
 688 
 689     free(name);
 690     return aliases;
 691 }
 692 
 693 GHashTable *metadata_cache = NULL;
 694 
 695 void
 696 free_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 697     if (metadata_cache != NULL) {
 698         g_hash_table_destroy(metadata_cache);
 699         metadata_cache = NULL;
 700     }
 701 }
 702 
 703 static void
 704 init_metadata_cache(void) {
     /* [previous][next][first][last][top][bottom][index][help] */
 705     if (metadata_cache == NULL) {
 706         metadata_cache = pcmk__strkey_table(free, free);
 707     }
 708 }
 709 
 710 int
 711 get_agent_metadata(const char *agent, xmlNode ** metadata)
     /* [previous][next][first][last][top][bottom][index][help] */
 712 {
 713     char *buffer = NULL;
 714 
 715     if (metadata == NULL) {
 716         return EINVAL;
 717     }
 718     *metadata = NULL;
 719     if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) {
 720         return pcmk_rc_ok;
 721     }
 722     init_metadata_cache();
 723     buffer = g_hash_table_lookup(metadata_cache, agent);
 724     if (buffer == NULL) {
 725         stonith_t *st = stonith_api_new();
 726         int rc;
 727 
 728         if (st == NULL) {
 729             crm_warn("Could not get agent meta-data: "
 730                      "API memory allocation failed");
 731             return EAGAIN;
 732         }
 733         rc = st->cmds->metadata(st, st_opt_sync_call, agent,
 734                                 NULL, &buffer, 10);
 735         stonith_api_delete(st);
 736         if (rc || !buffer) {
 737             crm_err("Could not retrieve metadata for fencing agent %s", agent);
 738             return EAGAIN;
 739         }
 740         g_hash_table_replace(metadata_cache, strdup(agent), buffer);
 741     }
 742 
 743     *metadata = string2xml(buffer);
 744     return pcmk_rc_ok;
 745 }
 746 
 747 static gboolean
 748 is_nodeid_required(xmlNode * xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750     xmlXPathObjectPtr xpath = NULL;
 751 
 752     if (stand_alone) {
 753         return FALSE;
 754     }
 755 
 756     if (!xml) {
 757         return FALSE;
 758     }
 759 
 760     xpath = xpath_search(xml, "//parameter[@name='nodeid']");
 761     if (numXpathResults(xpath)  <= 0) {
 762         freeXpathObject(xpath);
 763         return FALSE;
 764     }
 765 
 766     freeXpathObject(xpath);
 767     return TRUE;
 768 }
 769 
 770 #define MAX_ACTION_LEN 256
 771 
 772 static char *
 773 add_action(char *actions, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
 774 {
 775     int offset = 0;
 776 
 777     if (actions == NULL) {
 778         actions = calloc(1, MAX_ACTION_LEN);
 779     } else {
 780         offset = strlen(actions);
 781     }
 782 
 783     if (offset > 0) {
 784         offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " ");
 785     }
 786     offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action);
 787 
 788     return actions;
 789 }
 790 
 791 static void
 792 read_action_metadata(stonith_device_t *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 793 {
 794     xmlXPathObjectPtr xpath = NULL;
 795     int max = 0;
 796     int lpc = 0;
 797 
 798     if (device->agent_metadata == NULL) {
 799         return;
 800     }
 801 
 802     xpath = xpath_search(device->agent_metadata, "//action");
 803     max = numXpathResults(xpath);
 804 
 805     if (max <= 0) {
 806         freeXpathObject(xpath);
 807         return;
 808     }
 809 
 810     for (lpc = 0; lpc < max; lpc++) {
 811         const char *on_target = NULL;
 812         const char *action = NULL;
 813         xmlNode *match = getXpathResult(xpath, lpc);
 814 
 815         CRM_LOG_ASSERT(match != NULL);
 816         if(match == NULL) { continue; };
 817 
 818         on_target = crm_element_value(match, "on_target");
 819         action = crm_element_value(match, "name");
 820 
 821         if(pcmk__str_eq(action, "list", pcmk__str_casei)) {
 822             stonith__set_device_flags(device->flags, device->id,
 823                                       st_device_supports_list);
 824         } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) {
 825             stonith__set_device_flags(device->flags, device->id,
 826                                       st_device_supports_status);
 827         } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) {
 828             stonith__set_device_flags(device->flags, device->id,
 829                                       st_device_supports_reboot);
 830         } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) {
 831             /* "automatic" means the cluster will unfence node when it joins */
 832             const char *automatic = crm_element_value(match, "automatic");
 833 
 834             /* "required" is a deprecated synonym for "automatic" */
 835             const char *required = crm_element_value(match, "required");
 836 
 837             if (crm_is_true(automatic) || crm_is_true(required)) {
 838                 device->automatic_unfencing = TRUE;
 839             }
 840         }
 841 
 842         if (action && crm_is_true(on_target)) {
 843             device->on_target_actions = add_action(device->on_target_actions, action);
 844         }
 845     }
 846 
 847     freeXpathObject(xpath);
 848 }
 849 
 850 /*!
 851  * \internal
 852  * \brief Set a pcmk_*_action parameter if not already set
 853  *
 854  * \param[in,out] params  Device parameters
 855  * \param[in]     action  Name of action
 856  * \param[in]     value   Value to use if action is not already set
 857  */
 858 static void
 859 map_action(GHashTable *params, const char *action, const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
 860 {
 861     char *key = crm_strdup_printf("pcmk_%s_action", action);
 862 
 863     if (g_hash_table_lookup(params, key)) {
 864         crm_warn("Ignoring %s='%s', see %s instead",
 865                  STONITH_ATTR_ACTION_OP, value, key);
 866         free(key);
 867     } else {
 868         crm_warn("Mapping %s='%s' to %s='%s'",
 869                  STONITH_ATTR_ACTION_OP, value, key, value);
 870         g_hash_table_insert(params, key, strdup(value));
 871     }
 872 }
 873 
 874 /*!
 875  * \internal
 876  * \brief Create device parameter table from XML
 877  *
 878  * \param[in]     name    Device name (used for logging only)
 879  * \param[in,out] params  Device parameters
 880  */
 881 static GHashTable *
 882 xml2device_params(const char *name, xmlNode *dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 883 {
 884     GHashTable *params = xml2list(dev);
 885     const char *value;
 886 
 887     /* Action should never be specified in the device configuration,
 888      * but we support it for users who are familiar with other software
 889      * that worked that way.
 890      */
 891     value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP);
 892     if (value != NULL) {
 893         crm_warn("%s has '%s' parameter, which should never be specified in configuration",
 894                  name, STONITH_ATTR_ACTION_OP);
 895 
 896         if (*value == '\0') {
 897             crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP);
 898 
 899         } else if (strcmp(value, "reboot") == 0) {
 900             crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)",
 901                      STONITH_ATTR_ACTION_OP);
 902 
 903         } else if (strcmp(value, "off") == 0) {
 904             map_action(params, "reboot", value);
 905 
 906         } else {
 907             map_action(params, "off", value);
 908             map_action(params, "reboot", value);
 909         }
 910 
 911         g_hash_table_remove(params, STONITH_ATTR_ACTION_OP);
 912     }
 913 
 914     return params;
 915 }
 916 
 917 static const char *
 918 target_list_type(stonith_device_t * dev)
     /* [previous][next][first][last][top][bottom][index][help] */
 919 {
 920     const char *check_type = NULL;
 921 
 922     check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK);
 923 
 924     if (check_type == NULL) {
 925 
 926         if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) {
 927             check_type = "static-list";
 928         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) {
 929             check_type = "static-list";
 930         } else if (pcmk_is_set(dev->flags, st_device_supports_list)) {
 931             check_type = "dynamic-list";
 932         } else if (pcmk_is_set(dev->flags, st_device_supports_status)) {
 933             check_type = "status";
 934         } else {
 935             check_type = "none";
 936         }
 937     }
 938 
 939     return check_type;
 940 }
 941 
 942 static stonith_device_t *
 943 build_device_from_xml(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 944 {
 945     const char *value;
 946     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
 947     stonith_device_t *device = NULL;
 948     char *agent = crm_element_value_copy(dev, "agent");
 949 
 950     CRM_CHECK(agent != NULL, return device);
 951 
 952     device = calloc(1, sizeof(stonith_device_t));
 953 
 954     CRM_CHECK(device != NULL, {free(agent); return device;});
 955 
 956     device->id = crm_element_value_copy(dev, XML_ATTR_ID);
 957     device->agent = agent;
 958     device->namespace = crm_element_value_copy(dev, "namespace");
 959     device->params = xml2device_params(device->id, dev);
 960 
 961     value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST);
 962     if (value) {
 963         device->targets = stonith__parse_targets(value);
 964     }
 965 
 966     value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP);
 967     device->aliases = build_port_aliases(value, &(device->targets));
 968 
 969     value = target_list_type(device);
 970     if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) {
 971         /* Other than "static-list", dev-> targets is unnecessary. */
 972         g_list_free_full(device->targets, free);
 973         device->targets = NULL;
 974     }
 975     switch (get_agent_metadata(device->agent, &device->agent_metadata)) {
 976         case pcmk_rc_ok:
 977             if (device->agent_metadata) {
 978                 read_action_metadata(device);
 979                 stonith__device_parameter_flags(&(device->flags), device->id,
 980                                                 device->agent_metadata);
 981             }
 982             break;
 983 
 984         case EAGAIN:
 985             if (device->timer == NULL) {
 986                 device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
 987                                            TRUE, get_agent_metadata_cb, device);
 988             }
 989             if (!mainloop_timer_running(device->timer)) {
 990                 mainloop_timer_start(device->timer);
 991             }
 992             break;
 993 
 994         default:
 995             break;
 996     }
 997 
 998     value = g_hash_table_lookup(device->params, "nodeid");
 999     if (!value) {
1000         device->include_nodeid = is_nodeid_required(device->agent_metadata);
1001     }
1002 
1003     value = crm_element_value(dev, "rsc_provides");
1004     if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) {
1005         device->automatic_unfencing = TRUE;
1006     }
1007 
1008     if (is_action_required("on", device)) {
1009         crm_info("Fencing device '%s' requires unfencing", device->id);
1010     }
1011 
1012     if (device->on_target_actions) {
1013         crm_info("Fencing device '%s' requires actions (%s) to be executed "
1014                  "on target", device->id, device->on_target_actions);
1015     }
1016 
1017     device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device);
1018     /* TODO: Hook up priority */
1019 
1020     return device;
1021 }
1022 
1023 static void
1024 schedule_internal_command(const char *origin,
     /* [previous][next][first][last][top][bottom][index][help] */
1025                           stonith_device_t * device,
1026                           const char *action,
1027                           const char *victim,
1028                           int timeout,
1029                           void *internal_user_data,
1030                           void (*done_cb) (GPid pid, int rc, const char *output,
1031                                            gpointer user_data))
1032 {
1033     async_command_t *cmd = NULL;
1034 
1035     cmd = calloc(1, sizeof(async_command_t));
1036 
1037     cmd->id = -1;
1038     cmd->default_timeout = timeout ? timeout : 60;
1039     cmd->timeout = cmd->default_timeout;
1040     cmd->action = strdup(action);
1041     cmd->victim = victim ? strdup(victim) : NULL;
1042     cmd->device = strdup(device->id);
1043     cmd->origin = strdup(origin);
1044     cmd->client = strdup(crm_system_name);
1045     cmd->client_name = strdup(crm_system_name);
1046 
1047     cmd->internal_user_data = internal_user_data;
1048     cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */
1049 
1050     schedule_stonith_command(cmd, device);
1051 }
1052 
1053 gboolean
1054 string_in_list(GList *list, const char *item)
     /* [previous][next][first][last][top][bottom][index][help] */
1055 {
1056     int lpc = 0;
1057     int max = g_list_length(list);
1058 
1059     for (lpc = 0; lpc < max; lpc++) {
1060         const char *value = g_list_nth_data(list, lpc);
1061 
1062         if (pcmk__str_eq(item, value, pcmk__str_casei)) {
1063             return TRUE;
1064         } else {
1065             crm_trace("%d: '%s' != '%s'", lpc, item, value);
1066         }
1067     }
1068     return FALSE;
1069 }
1070 
1071 static void
1072 status_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1073 {
1074     async_command_t *cmd = user_data;
1075     struct device_search_s *search = cmd->internal_user_data;
1076     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1077     gboolean can = FALSE;
1078 
1079     free_async_command(cmd);
1080 
1081     if (!dev) {
1082         search_devices_record_result(search, NULL, FALSE);
1083         return;
1084     }
1085 
1086     mainloop_set_trigger(dev->work);
1087 
1088     if (rc == 1 /* unknown */ ) {
1089         crm_trace("Host %s is not known by %s", search->host, dev->id);
1090 
1091     } else if (rc == 0 /* active */  || rc == 2 /* inactive */ ) {
1092         crm_trace("Host %s is known by %s", search->host, dev->id);
1093         can = TRUE;
1094 
1095     } else {
1096         crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host,
1097                    rc);
1098     }
1099     search_devices_record_result(search, dev->id, can);
1100 }
1101 
1102 static void
1103 dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1104 {
1105     async_command_t *cmd = user_data;
1106     struct device_search_s *search = cmd->internal_user_data;
1107     stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL;
1108     gboolean can_fence = FALSE;
1109 
1110     free_async_command(cmd);
1111 
1112     /* Host/alias must be in the list output to be eligible to be fenced
1113      *
1114      * Will cause problems if down'd nodes aren't listed or (for virtual nodes)
1115      *  if the guest is still listed despite being moved to another machine
1116      */
1117     if (!dev) {
1118         search_devices_record_result(search, NULL, FALSE);
1119         return;
1120     }
1121 
1122     mainloop_set_trigger(dev->work);
1123 
1124     /* If we successfully got the targets earlier, don't disable. */
1125     if (rc != 0 && !dev->targets) {
1126         if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) {
1127             /*
1128                 If the operation fails if the user does not explicitly specify "dynamic-list", it will fall back to "status".
1129             */
1130             crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output);
1131             g_hash_table_replace(dev->params,
1132                              strdup(PCMK_STONITH_HOST_CHECK), strdup("status"));
1133         }
1134     } else if (!rc) {
1135         crm_info("Refreshing port list for %s", dev->id);
1136         g_list_free_full(dev->targets, free);
1137         dev->targets = stonith__parse_targets(output);
1138         dev->targets_age = time(NULL);
1139     }
1140 
1141     if (dev->targets) {
1142         const char *alias = g_hash_table_lookup(dev->aliases, search->host);
1143 
1144         if (!alias) {
1145             alias = search->host;
1146         }
1147         if (string_in_list(dev->targets, alias)) {
1148             can_fence = TRUE;
1149         }
1150     }
1151     search_devices_record_result(search, dev->id, can_fence);
1152 }
1153 
1154 /*!
1155  * \internal
1156  * \brief Returns true if any key in first is not in second or second has a different value for key
1157  */
1158 static int
1159 device_params_diff(GHashTable *first, GHashTable *second) {
     /* [previous][next][first][last][top][bottom][index][help] */
1160     char *key = NULL;
1161     char *value = NULL;
1162     GHashTableIter gIter;
1163 
1164     g_hash_table_iter_init(&gIter, first);
1165     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) {
1166 
1167         if(strstr(key, "CRM_meta") == key) {
1168             continue;
1169         } else if(strcmp(key, "crm_feature_set") == 0) {
1170             continue;
1171         } else {
1172             char *other_value = g_hash_table_lookup(second, key);
1173 
1174             if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) {
1175                 crm_trace("Different value for %s: %s != %s", key, other_value, value);
1176                 return 1;
1177             }
1178         }
1179     }
1180 
1181     return 0;
1182 }
1183 
1184 /*!
1185  * \internal
1186  * \brief Checks to see if an identical device already exists in the device_list
1187  */
1188 static stonith_device_t *
1189 device_has_duplicate(stonith_device_t * device)
     /* [previous][next][first][last][top][bottom][index][help] */
1190 {
1191     stonith_device_t *dup = g_hash_table_lookup(device_list, device->id);
1192 
1193     if (!dup) {
1194         crm_trace("No match for %s", device->id);
1195         return NULL;
1196 
1197     } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) {
1198         crm_trace("Different agent: %s != %s", dup->agent, device->agent);
1199         return NULL;
1200     }
1201 
1202     /* Use calculate_operation_digest() here? */
1203     if (device_params_diff(device->params, dup->params) ||
1204         device_params_diff(dup->params, device->params)) {
1205         return NULL;
1206     }
1207 
1208     crm_trace("Match");
1209     return dup;
1210 }
1211 
1212 int
1213 stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1214 {
1215     stonith_device_t *dup = NULL;
1216     stonith_device_t *device = build_device_from_xml(msg);
1217     guint ndevices = 0;
1218 
1219     CRM_CHECK(device != NULL, return -ENOMEM);
1220 
1221     dup = device_has_duplicate(device);
1222     if (dup) {
1223         ndevices = g_hash_table_size(device_list);
1224         crm_debug("Device '%s' already in device list (%d active device%s)",
1225                   device->id, ndevices, pcmk__plural_s(ndevices));
1226         free_device(device);
1227         device = dup;
1228         dup = g_hash_table_lookup(device_list, device->id);
1229         dup->dirty = FALSE;
1230 
1231     } else {
1232         stonith_device_t *old = g_hash_table_lookup(device_list, device->id);
1233 
1234         if (from_cib && old && old->api_registered) {
1235             /* If the cib is writing over an entry that is shared with a stonith client,
1236              * copy any pending ops that currently exist on the old entry to the new one.
1237              * Otherwise the pending ops will be reported as failures
1238              */
1239             crm_info("Overwriting existing entry for %s from CIB", device->id);
1240             device->pending_ops = old->pending_ops;
1241             device->api_registered = TRUE;
1242             old->pending_ops = NULL;
1243             if (device->pending_ops) {
1244                 mainloop_set_trigger(device->work);
1245             }
1246         }
1247         g_hash_table_replace(device_list, device->id, device);
1248 
1249         ndevices = g_hash_table_size(device_list);
1250         crm_notice("Added '%s' to device list (%d active device%s)",
1251                    device->id, ndevices, pcmk__plural_s(ndevices));
1252     }
1253     if (desc) {
1254         *desc = device->id;
1255     }
1256 
1257     if (from_cib) {
1258         device->cib_registered = TRUE;
1259     } else {
1260         device->api_registered = TRUE;
1261     }
1262 
1263     return pcmk_ok;
1264 }
1265 
1266 int
1267 stonith_device_remove(const char *id, gboolean from_cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1268 {
1269     stonith_device_t *device = g_hash_table_lookup(device_list, id);
1270     guint ndevices = 0;
1271 
1272     if (!device) {
1273         ndevices = g_hash_table_size(device_list);
1274         crm_info("Device '%s' not found (%d active device%s)",
1275                  id, ndevices, pcmk__plural_s(ndevices));
1276         return pcmk_ok;
1277     }
1278 
1279     if (from_cib) {
1280         device->cib_registered = FALSE;
1281     } else {
1282         device->verified = FALSE;
1283         device->api_registered = FALSE;
1284     }
1285 
1286     if (!device->cib_registered && !device->api_registered) {
1287         g_hash_table_remove(device_list, id);
1288         ndevices = g_hash_table_size(device_list);
1289         crm_info("Removed '%s' from device list (%d active device%s)",
1290                  id, ndevices, pcmk__plural_s(ndevices));
1291     } else {
1292         crm_trace("Not removing '%s' from device list (%d active) because "
1293                   "still registered via:%s%s",
1294                   id, g_hash_table_size(device_list),
1295                   (device->cib_registered? " cib" : ""),
1296                   (device->api_registered? " api" : ""));
1297     }
1298     return pcmk_ok;
1299 }
1300 
1301 /*!
1302  * \internal
1303  * \brief Return the number of stonith levels registered for a node
1304  *
1305  * \param[in] tp  Node's topology table entry
1306  *
1307  * \return Number of non-NULL levels in topology entry
1308  * \note This function is used only for log messages.
1309  */
1310 static int
1311 count_active_levels(stonith_topology_t * tp)
     /* [previous][next][first][last][top][bottom][index][help] */
1312 {
1313     int lpc = 0;
1314     int count = 0;
1315 
1316     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1317         if (tp->levels[lpc] != NULL) {
1318             count++;
1319         }
1320     }
1321     return count;
1322 }
1323 
1324 static void
1325 free_topology_entry(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
1326 {
1327     stonith_topology_t *tp = data;
1328 
1329     int lpc = 0;
1330 
1331     for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) {
1332         if (tp->levels[lpc] != NULL) {
1333             g_list_free_full(tp->levels[lpc], free);
1334         }
1335     }
1336     free(tp->target);
1337     free(tp->target_value);
1338     free(tp->target_pattern);
1339     free(tp->target_attribute);
1340     free(tp);
1341 }
1342 
1343 void
1344 free_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1345 {
1346     if (topology != NULL) {
1347         g_hash_table_destroy(topology);
1348         topology = NULL;
1349     }
1350 }
1351 
1352 void
1353 init_topology_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1354 {
1355     if (topology == NULL) {
1356         topology = pcmk__strkey_table(NULL, free_topology_entry);
1357     }
1358 }
1359 
1360 char *stonith_level_key(xmlNode *level, int mode)
     /* [previous][next][first][last][top][bottom][index][help] */
1361 {
1362     if(mode == -1) {
1363         mode = stonith_level_kind(level);
1364     }
1365 
1366     switch(mode) {
1367         case 0:
1368             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET);
1369         case 1:
1370             return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1371         case 2:
1372             {
1373                 const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1374                 const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE);
1375 
1376                 if(name && value) {
1377                     return crm_strdup_printf("%s=%s", name, value);
1378                 }
1379             }
1380         default:
1381             return crm_strdup_printf("Unknown-%d-%s", mode, ID(level));
1382     }
1383 }
1384 
1385 int stonith_level_kind(xmlNode * level)
     /* [previous][next][first][last][top][bottom][index][help] */
1386 {
1387     int mode = 0;
1388     const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET);
1389 
1390     if(target == NULL) {
1391         mode++;
1392         target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN);
1393     }
1394 
1395     if(stand_alone == FALSE && target == NULL) {
1396 
1397         mode++;
1398 
1399         if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) {
1400             mode++;
1401 
1402         } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) {
1403             mode++;
1404         }
1405     }
1406 
1407     return mode;
1408 }
1409 
1410 static stonith_key_value_t *
1411 parse_device_list(const char *devices)
     /* [previous][next][first][last][top][bottom][index][help] */
1412 {
1413     int lpc = 0;
1414     int max = 0;
1415     int last = 0;
1416     stonith_key_value_t *output = NULL;
1417 
1418     if (devices == NULL) {
1419         return output;
1420     }
1421 
1422     max = strlen(devices);
1423     for (lpc = 0; lpc <= max; lpc++) {
1424         if (devices[lpc] == ',' || devices[lpc] == 0) {
1425             char *line = strndup(devices + last, lpc - last);
1426 
1427             output = stonith_key_value_add(output, NULL, line);
1428             free(line);
1429 
1430             last = lpc + 1;
1431         }
1432     }
1433 
1434     return output;
1435 }
1436 
1437 /*!
1438  * \internal
1439  * \brief Register a STONITH level for a target
1440  *
1441  * Given an XML request specifying the target name, level index, and device IDs
1442  * for the level, this will create an entry for the target in the global topology
1443  * table if one does not already exist, then append the specified device IDs to
1444  * the entry's device list for the specified level.
1445  *
1446  * \param[in]  msg   XML request for STONITH level registration
1447  * \param[out] desc  If not NULL, will be set to string representation ("TARGET[LEVEL]")
1448  *
1449  * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index
1450  */
1451 int
1452 stonith_level_register(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1453 {
1454     int id = 0;
1455     xmlNode *level;
1456     int mode;
1457     char *target;
1458 
1459     stonith_topology_t *tp;
1460     stonith_key_value_t *dIter = NULL;
1461     stonith_key_value_t *devices = NULL;
1462 
1463     /* Allow the XML here to point to the level tag directly, or wrapped in
1464      * another tag. If directly, don't search by xpath, because it might give
1465      * multiple hits (e.g. if the XML is the CIB).
1466      */
1467     if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) {
1468         level = msg;
1469     } else {
1470         level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1471     }
1472     CRM_CHECK(level != NULL, return -EINVAL);
1473 
1474     mode = stonith_level_kind(level);
1475     target = stonith_level_key(level, mode);
1476     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1477 
1478     if (desc) {
1479         *desc = crm_strdup_printf("%s[%d]", target, id);
1480     }
1481 
1482     /* Sanity-check arguments */
1483     if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) {
1484         crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology));
1485         free(target);
1486         crm_log_xml_err(level, "Bad topology");
1487         return -EINVAL;
1488     }
1489 
1490     /* Find or create topology table entry */
1491     tp = g_hash_table_lookup(topology, target);
1492     if (tp == NULL) {
1493         tp = calloc(1, sizeof(stonith_topology_t));
1494         tp->kind = mode;
1495         tp->target = target;
1496         tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE);
1497         tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN);
1498         tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE);
1499 
1500         g_hash_table_replace(topology, tp->target, tp);
1501         crm_trace("Added %s (%d) to the topology (%d active entries)",
1502                   target, mode, g_hash_table_size(topology));
1503     } else {
1504         free(target);
1505     }
1506 
1507     if (tp->levels[id] != NULL) {
1508         crm_info("Adding to the existing %s[%d] topology entry",
1509                  tp->target, id);
1510     }
1511 
1512     devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES));
1513     for (dIter = devices; dIter; dIter = dIter->next) {
1514         const char *device = dIter->value;
1515 
1516         crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id);
1517         tp->levels[id] = g_list_append(tp->levels[id], strdup(device));
1518     }
1519     stonith_key_value_freeall(devices, 1, 1);
1520 
1521     {
1522         int nlevels = count_active_levels(tp);
1523 
1524         crm_info("Target %s has %d active fencing level%s",
1525                  tp->target, nlevels, pcmk__plural_s(nlevels));
1526     }
1527     return pcmk_ok;
1528 }
1529 
1530 int
1531 stonith_level_remove(xmlNode *msg, char **desc)
     /* [previous][next][first][last][top][bottom][index][help] */
1532 {
1533     int id = 0;
1534     stonith_topology_t *tp;
1535     char *target;
1536 
1537     /* Unlike additions, removal requests should always have one level tag */
1538     xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR);
1539 
1540     CRM_CHECK(level != NULL, return -EINVAL);
1541 
1542     target = stonith_level_key(level, -1);
1543     crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id);
1544     if (desc) {
1545         *desc = crm_strdup_printf("%s[%d]", target, id);
1546     }
1547 
1548     /* Sanity-check arguments */
1549     if (id >= ST_LEVEL_MAX) {
1550         free(target);
1551         return -EINVAL;
1552     }
1553 
1554     tp = g_hash_table_lookup(topology, target);
1555     if (tp == NULL) {
1556         guint nentries = g_hash_table_size(topology);
1557 
1558         crm_info("No fencing topology found for %s (%d active %s)",
1559                  target, nentries,
1560                  pcmk__plural_alt(nentries, "entry", "entries"));
1561 
1562     } else if (id == 0 && g_hash_table_remove(topology, target)) {
1563         guint nentries = g_hash_table_size(topology);
1564 
1565         crm_info("Removed all fencing topology entries related to %s "
1566                  "(%d active %s remaining)", target, nentries,
1567                  pcmk__plural_alt(nentries, "entry", "entries"));
1568 
1569     } else if (id > 0 && tp->levels[id] != NULL) {
1570         guint nlevels;
1571 
1572         g_list_free_full(tp->levels[id], free);
1573         tp->levels[id] = NULL;
1574 
1575         nlevels = count_active_levels(tp);
1576         crm_info("Removed level %d from fencing topology for %s "
1577                  "(%d active level%s remaining)",
1578                  id, target, nlevels, pcmk__plural_s(nlevels));
1579     }
1580 
1581     free(target);
1582     return pcmk_ok;
1583 }
1584 
1585 /*!
1586  * \internal
1587  * \brief Schedule an (asynchronous) action directly on a stonith device
1588  *
1589  * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action
1590  * directly on a specified device. Only list, monitor, and status actions are
1591  * expected to use this call, though it should work with any agent command.
1592  *
1593  * \param[in]  msg     API message XML with desired action
1594  * \param[out] output  Unused
1595  *
1596  * \return -EINPROGRESS on success, -errno otherwise
1597  * \note If the action is monitor, the device must be registered via the API
1598  *       (CIB registration is not sufficient), because monitor should not be
1599  *       possible unless the device is "started" (API registered).
1600  */
1601 static int
1602 stonith_device_action(xmlNode * msg, char **output)
     /* [previous][next][first][last][top][bottom][index][help] */
1603 {
1604     xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR);
1605     xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR);
1606     const char *id = crm_element_value(dev, F_STONITH_DEVICE);
1607     const char *action = crm_element_value(op, F_STONITH_ACTION);
1608     async_command_t *cmd = NULL;
1609     stonith_device_t *device = NULL;
1610 
1611     if ((id == NULL) || (action == NULL)) {
1612         crm_info("Malformed API action request: device %s, action %s",
1613                  (id? id : "not specified"),
1614                  (action? action : "not specified"));
1615         return -EPROTO;
1616     }
1617 
1618     device = g_hash_table_lookup(device_list, id);
1619     if ((device == NULL)
1620         || (!device->api_registered && !strcmp(action, "monitor"))) {
1621 
1622         // Monitors may run only on "started" (API-registered) devices
1623         crm_info("Ignoring API '%s' action request because device %s not found",
1624                  action, id);
1625         return -ENODEV;
1626     }
1627 
1628     cmd = create_async_command(msg);
1629     if (cmd == NULL) {
1630         return -EPROTO;
1631     }
1632 
1633     schedule_stonith_command(cmd, device);
1634     return -EINPROGRESS;
1635 }
1636 
1637 static void
1638 search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence)
     /* [previous][next][first][last][top][bottom][index][help] */
1639 {
1640     search->replies_received++;
1641 
1642     if (can_fence && device) {
1643         search->capable = g_list_append(search->capable, strdup(device));
1644     }
1645 
1646     if (search->replies_needed == search->replies_received) {
1647 
1648         guint ndevices = g_list_length(search->capable);
1649 
1650         crm_debug("Search found %d device%s that can perform '%s' targeting %s",
1651                   ndevices, pcmk__plural_s(ndevices),
1652                   (search->action? search->action : "unknown action"),
1653                   (search->host? search->host : "any node"));
1654 
1655         search->callback(search->capable, search->user_data);
1656         free(search->host);
1657         free(search->action);
1658         free(search);
1659     }
1660 }
1661 
1662 /*!
1663  * \internal
1664  * \brief Check whether the local host is allowed to execute a fencing action
1665  *
1666  * \param[in] device         Fence device to check
1667  * \param[in] action         Fence action to check
1668  * \param[in] target         Hostname of fence target
1669  * \param[in] allow_suicide  Whether self-fencing is allowed for this operation
1670  *
1671  * \return TRUE if local host is allowed to execute action, FALSE otherwise
1672  */
1673 static gboolean
1674 localhost_is_eligible(const stonith_device_t *device, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1675                       const char *target, gboolean allow_suicide)
1676 {
1677     gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname,
1678                                                 pcmk__str_casei);
1679 
1680     if (device && action && device->on_target_actions
1681         && strstr(device->on_target_actions, action)) {
1682         if (!localhost_is_target) {
1683             crm_trace("Operation '%s' using %s can only be executed for "
1684                       "local host, not %s", action, device->id, target);
1685             return FALSE;
1686         }
1687 
1688     } else if (localhost_is_target && !allow_suicide) {
1689         crm_trace("'%s' operation does not support self-fencing", action);
1690         return FALSE;
1691     }
1692     return TRUE;
1693 }
1694 
1695 static void
1696 can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search)
     /* [previous][next][first][last][top][bottom][index][help] */
1697 {
1698     gboolean can = FALSE;
1699     const char *check_type = NULL;
1700     const char *host = search->host;
1701     const char *alias = NULL;
1702 
1703     CRM_LOG_ASSERT(dev != NULL);
1704 
1705     if (dev == NULL) {
1706         goto search_report_results;
1707     } else if (host == NULL) {
1708         can = TRUE;
1709         goto search_report_results;
1710     }
1711 
1712     /* Short-circuit query if this host is not allowed to perform the action */
1713     if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) {
1714         /* A "reboot" *might* get remapped to "off" then "on", so short-circuit
1715          * only if all three are disallowed. If only one or two are disallowed,
1716          * we'll report that with the results. We never allow suicide for
1717          * remapped "on" operations because the host is off at that point.
1718          */
1719         if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide)
1720             && !localhost_is_eligible(dev, "off", host, search->allow_suicide)
1721             && !localhost_is_eligible(dev, "on", host, FALSE)) {
1722             goto search_report_results;
1723         }
1724     } else if (!localhost_is_eligible(dev, search->action, host,
1725                                       search->allow_suicide)) {
1726         goto search_report_results;
1727     }
1728 
1729     alias = g_hash_table_lookup(dev->aliases, host);
1730     if (alias == NULL) {
1731         alias = host;
1732     }
1733 
1734     check_type = target_list_type(dev);
1735 
1736     if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) {
1737         can = TRUE;
1738 
1739     } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) {
1740 
1741         /* Presence in the hostmap is sufficient
1742          * Only use if all hosts on which the device can be active can always fence all listed hosts
1743          */
1744 
1745         if (string_in_list(dev->targets, host)) {
1746             can = TRUE;
1747         } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)
1748                    && g_hash_table_lookup(dev->aliases, host)) {
1749             can = TRUE;
1750         }
1751 
1752     } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) {
1753         time_t now = time(NULL);
1754 
1755         if (dev->targets == NULL || dev->targets_age + 60 < now) {
1756             crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1757                       check_type, dev->id, search->host, search->action);
1758 
1759             schedule_internal_command(__func__, dev, "list", NULL,
1760                                       search->per_device_timeout, search, dynamic_list_search_cb);
1761 
1762             /* we'll respond to this search request async in the cb */
1763             return;
1764         }
1765 
1766         if (string_in_list(dev->targets, alias)) {
1767             can = TRUE;
1768         }
1769 
1770     } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) {
1771         crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)",
1772                   check_type, dev->id, search->host, search->action);
1773         schedule_internal_command(__func__, dev, "status", search->host,
1774                                   search->per_device_timeout, search, status_search_cb);
1775         /* we'll respond to this search request async in the cb */
1776         return;
1777     } else {
1778         crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type);
1779         check_type = "Invalid " PCMK_STONITH_HOST_CHECK;
1780     }
1781 
1782     if (pcmk__str_eq(host, alias, pcmk__str_casei)) {
1783         crm_notice("%s is%s eligible to fence (%s) %s: %s",
1784                    dev->id, (can? "" : " not"), search->action, host,
1785                    check_type);
1786     } else {
1787         crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s",
1788                    dev->id, (can? "" : " not"), search->action, host, alias,
1789                    check_type);
1790     }
1791 
1792   search_report_results:
1793     search_devices_record_result(search, dev ? dev->id : NULL, can);
1794 }
1795 
1796 static void
1797 search_devices(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1798 {
1799     stonith_device_t *dev = value;
1800     struct device_search_s *search = user_data;
1801 
1802     can_fence_host_with_device(dev, search);
1803 }
1804 
1805 #define DEFAULT_QUERY_TIMEOUT 20
1806 static void
1807 get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data,
     /* [previous][next][first][last][top][bottom][index][help] */
1808                     void (*callback) (GList * devices, void *user_data))
1809 {
1810     struct device_search_s *search;
1811     int per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1812     int devices_needing_async_query = 0;
1813     char *key = NULL;
1814     const char *check_type = NULL;
1815     GHashTableIter gIter;
1816     stonith_device_t *device = NULL;
1817     guint ndevices = g_hash_table_size(device_list);
1818 
1819     if (ndevices == 0) {
1820         callback(NULL, user_data);
1821         return;
1822     }
1823 
1824     search = calloc(1, sizeof(struct device_search_s));
1825     if (!search) {
1826         callback(NULL, user_data);
1827         return;
1828     }
1829 
1830     g_hash_table_iter_init(&gIter, device_list);
1831     while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) {
1832         check_type = target_list_type(device);
1833         if (pcmk__strcase_any_of(check_type, "status", "dynamic-list", NULL)) {
1834             devices_needing_async_query++;
1835         }
1836     }
1837 
1838     /* If we have devices that require an async event in order to know what
1839      * nodes they can fence, we have to give the events a timeout. The total
1840      * query timeout is divided among those events. */
1841     if (devices_needing_async_query) {
1842         per_device_timeout = timeout / devices_needing_async_query;
1843         if (!per_device_timeout) {
1844             crm_err("Fencing timeout %ds is too low; using %ds, "
1845                     "but consider raising to at least %ds",
1846                     timeout, DEFAULT_QUERY_TIMEOUT,
1847                     DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1848             per_device_timeout = DEFAULT_QUERY_TIMEOUT;
1849         } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) {
1850             crm_notice("Fencing timeout %ds is low for the current "
1851                        "configuration; consider raising to at least %ds",
1852                        timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query);
1853         }
1854     }
1855 
1856     search->host = host ? strdup(host) : NULL;
1857     search->action = action ? strdup(action) : NULL;
1858     search->per_device_timeout = per_device_timeout;
1859     /* We are guaranteed this many replies. Even if a device gets
1860      * unregistered some how during the async search, we will get
1861      * the correct number of replies. */
1862     search->replies_needed = ndevices;
1863     search->allow_suicide = suicide;
1864     search->callback = callback;
1865     search->user_data = user_data;
1866     /* kick off the search */
1867 
1868     crm_debug("Searching %d device%s to see which can execute '%s' targeting %s",
1869               ndevices, pcmk__plural_s(ndevices),
1870               (search->action? search->action : "unknown action"),
1871               (search->host? search->host : "any node"));
1872     g_hash_table_foreach(device_list, search_devices, search);
1873 }
1874 
1875 struct st_query_data {
1876     xmlNode *reply;
1877     char *remote_peer;
1878     char *client_id;
1879     char *target;
1880     char *action;
1881     int call_options;
1882 };
1883 
1884 /*!
1885  * \internal
1886  * \brief Add action-specific attributes to query reply XML
1887  *
1888  * \param[in,out] xml     XML to add attributes to
1889  * \param[in]     action  Fence action
1890  * \param[in]     device  Fence device
1891  */
1892 static void
1893 add_action_specific_attributes(xmlNode *xml, const char *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1894                                stonith_device_t *device)
1895 {
1896     int action_specific_timeout;
1897     int delay_max;
1898     int delay_base;
1899 
1900     CRM_CHECK(xml && action && device, return);
1901 
1902     if (is_action_required(action, device)) {
1903         crm_trace("Action '%s' is required using %s", action, device->id);
1904         crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1);
1905     }
1906 
1907     action_specific_timeout = get_action_timeout(device, action, 0);
1908     if (action_specific_timeout) {
1909         crm_trace("Action '%s' has timeout %dms using %s",
1910                   action, action_specific_timeout, device->id);
1911         crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout);
1912     }
1913 
1914     delay_max = get_action_delay_max(device, action);
1915     if (delay_max > 0) {
1916         crm_trace("Action '%s' has maximum random delay %dms using %s",
1917                   action, delay_max, device->id);
1918         crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000);
1919     }
1920 
1921     delay_base = get_action_delay_base(device, action);
1922     if (delay_base > 0) {
1923         crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000);
1924     }
1925 
1926     if ((delay_max > 0) && (delay_base == 0)) {
1927         crm_trace("Action '%s' has maximum random delay %dms using %s",
1928                   action, delay_max, device->id);
1929     } else if ((delay_max == 0) && (delay_base > 0)) {
1930         crm_trace("Action '%s' has a static delay of %dms using %s",
1931                   action, delay_base, device->id);
1932     } else if ((delay_max > 0) && (delay_base > 0)) {
1933         crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen "
1934                   "maximum delay of %dms using %s",
1935                   action, delay_base, delay_max, device->id);
1936     }
1937 }
1938 
1939 /*!
1940  * \internal
1941  * \brief Add "disallowed" attribute to query reply XML if appropriate
1942  *
1943  * \param[in,out] xml            XML to add attribute to
1944  * \param[in]     action         Fence action
1945  * \param[in]     device         Fence device
1946  * \param[in]     target         Fence target
1947  * \param[in]     allow_suicide  Whether self-fencing is allowed
1948  */
1949 static void
1950 add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1951                const char *target, gboolean allow_suicide)
1952 {
1953     if (!localhost_is_eligible(device, action, target, allow_suicide)) {
1954         crm_trace("Action '%s' using %s is disallowed for local host",
1955                   action, device->id);
1956         crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE);
1957     }
1958 }
1959 
1960 /*!
1961  * \internal
1962  * \brief Add child element with action-specific values to query reply XML
1963  *
1964  * \param[in,out] xml            XML to add attribute to
1965  * \param[in]     action         Fence action
1966  * \param[in]     device         Fence device
1967  * \param[in]     target         Fence target
1968  * \param[in]     allow_suicide  Whether self-fencing is allowed
1969  */
1970 static void
1971 add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1972                const char *target, gboolean allow_suicide)
1973 {
1974     xmlNode *child = create_xml_node(xml, F_STONITH_ACTION);
1975 
1976     crm_xml_add(child, XML_ATTR_ID, action);
1977     add_action_specific_attributes(child, action, device);
1978     add_disallowed(child, action, device, target, allow_suicide);
1979 }
1980 
1981 static void
1982 stonith_query_capable_device_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1983 {
1984     struct st_query_data *query = user_data;
1985     int available_devices = 0;
1986     xmlNode *dev = NULL;
1987     xmlNode *list = NULL;
1988     GList *lpc = NULL;
1989 
1990     /* Pack the results into XML */
1991     list = create_xml_node(NULL, __func__);
1992     crm_xml_add(list, F_STONITH_TARGET, query->target);
1993     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
1994         stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data);
1995         const char *action = query->action;
1996 
1997         if (!device) {
1998             /* It is possible the device got unregistered while
1999              * determining who can fence the target */
2000             continue;
2001         }
2002 
2003         available_devices++;
2004 
2005         dev = create_xml_node(list, F_STONITH_DEVICE);
2006         crm_xml_add(dev, XML_ATTR_ID, device->id);
2007         crm_xml_add(dev, "namespace", device->namespace);
2008         crm_xml_add(dev, "agent", device->agent);
2009         crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified);
2010 
2011         /* If the originating fencer wants to reboot the node, and we have a
2012          * capable device that doesn't support "reboot", remap to "off" instead.
2013          */
2014         if (!pcmk_is_set(device->flags, st_device_supports_reboot)
2015             && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
2016             crm_trace("%s doesn't support reboot, using values for off instead",
2017                       device->id);
2018             action = "off";
2019         }
2020 
2021         /* Add action-specific values if available */
2022         add_action_specific_attributes(dev, action, device);
2023         if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) {
2024             /* A "reboot" *might* get remapped to "off" then "on", so after
2025              * sending the "reboot"-specific values in the main element, we add
2026              * sub-elements for "off" and "on" values.
2027              *
2028              * We short-circuited earlier if "reboot", "off" and "on" are all
2029              * disallowed for the local host. However if only one or two are
2030              * disallowed, we send back the results and mark which ones are
2031              * disallowed. If "reboot" is disallowed, this might cause problems
2032              * with older fencer versions, which won't check for it. Older
2033              * versions will ignore "off" and "on", so they are not a problem.
2034              */
2035             add_disallowed(dev, action, device, query->target,
2036                            pcmk_is_set(query->call_options, st_opt_allow_suicide));
2037             add_action_reply(dev, "off", device, query->target,
2038                              pcmk_is_set(query->call_options, st_opt_allow_suicide));
2039             add_action_reply(dev, "on", device, query->target, FALSE);
2040         }
2041 
2042         /* A query without a target wants device parameters */
2043         if (query->target == NULL) {
2044             xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS);
2045 
2046             g_hash_table_foreach(device->params, hash2field, attrs);
2047         }
2048     }
2049 
2050     crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices);
2051     if (query->target) {
2052         crm_debug("Found %d matching device%s for target '%s'",
2053                   available_devices, pcmk__plural_s(available_devices),
2054                   query->target);
2055     } else {
2056         crm_debug("%d device%s installed",
2057                   available_devices, pcmk__plural_s(available_devices));
2058     }
2059 
2060     if (list != NULL) {
2061         crm_log_xml_trace(list, "Add query results");
2062         add_message_xml(query->reply, F_STONITH_CALLDATA, list);
2063     }
2064     stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id);
2065 
2066     free_xml(query->reply);
2067     free(query->remote_peer);
2068     free(query->client_id);
2069     free(query->target);
2070     free(query->action);
2071     free(query);
2072     free_xml(list);
2073     g_list_free_full(devices, free);
2074 }
2075 
2076 static void
2077 stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options)
     /* [previous][next][first][last][top][bottom][index][help] */
2078 {
2079     struct st_query_data *query = NULL;
2080     const char *action = NULL;
2081     const char *target = NULL;
2082     int timeout = 0;
2083     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER);
2084 
2085     crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout);
2086     if (dev) {
2087         const char *device = crm_element_value(dev, F_STONITH_DEVICE);
2088 
2089         target = crm_element_value(dev, F_STONITH_TARGET);
2090         action = crm_element_value(dev, F_STONITH_ACTION);
2091         if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) {
2092             /* No query or reply necessary */
2093             return;
2094         }
2095     }
2096 
2097     crm_log_xml_debug(msg, "Query");
2098     query = calloc(1, sizeof(struct st_query_data));
2099 
2100     query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok);
2101     query->remote_peer = remote_peer ? strdup(remote_peer) : NULL;
2102     query->client_id = client_id ? strdup(client_id) : NULL;
2103     query->target = target ? strdup(target) : NULL;
2104     query->action = action ? strdup(action) : NULL;
2105     query->call_options = call_options;
2106 
2107     get_capable_devices(target, action, timeout,
2108                         pcmk_is_set(call_options, st_opt_allow_suicide),
2109                         query, stonith_query_capable_device_cb);
2110 }
2111 
2112 #define ST_LOG_OUTPUT_MAX 512
2113 static void
2114 log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged)
     /* [previous][next][first][last][top][bottom][index][help] */
2115 {
2116     if (rc == 0) {
2117         next = NULL;
2118     }
2119 
2120     if (cmd->victim != NULL) {
2121         do_crm_log(((rc == 0)? LOG_NOTICE : LOG_ERR),
2122                    "Operation '%s' [%d] (%scall %d from %s) targeting %s "
2123                    "using %s returned %d (%s)%s%s",
2124                    cmd->action, pid, (op_merged? "merged " : ""), cmd->id,
2125                    cmd->client_name, cmd->victim,
2126                    cmd->device, rc, pcmk_strerror(rc),
2127                    (next? ", retrying with " : ""), (next ? next : ""));
2128     } else {
2129         do_crm_log_unlikely(((rc == 0)? LOG_DEBUG : LOG_NOTICE),
2130                             "Operation '%s' [%d]%s using %s returned %d (%s)%s%s",
2131                             cmd->action, pid, (op_merged? " (merged)" : ""),
2132                             cmd->device, rc, pcmk_strerror(rc),
2133                             (next? ", retrying with " : ""), (next ? next : ""));
2134     }
2135 
2136     if (output) {
2137         // Output may have multiple lines
2138         char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
2139 
2140         crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output);
2141         free(prefix);
2142     }
2143 }
2144 
2145 static void
2146 stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options)
     /* [previous][next][first][last][top][bottom][index][help] */
2147 {
2148     xmlNode *reply = NULL;
2149     gboolean bcast = FALSE;
2150 
2151     reply = stonith_construct_async_reply(cmd, output, NULL, rc);
2152 
2153     if (pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
2154         /* Too verbose to log */
2155         crm_trace("Metadata query for %s", cmd->device);
2156         output = NULL;
2157 
2158     } else if (pcmk__str_any_of(cmd->action, "monitor", "list", "status", NULL)) {
2159         crm_trace("Never broadcast '%s' replies", cmd->action);
2160 
2161     } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) {
2162         crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim);
2163         crm_xml_add(reply, F_SUBTYPE, "broadcast");
2164         bcast = TRUE;
2165     }
2166 
2167     log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE));
2168     crm_log_xml_trace(reply, "Reply");
2169 
2170     if (options & st_reply_opt_merged) {
2171         crm_xml_add(reply, F_STONITH_MERGED, "true");
2172     }
2173 
2174     if (bcast) {
2175         crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY);
2176         send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE);
2177 
2178     } else if (cmd->origin) {
2179         crm_trace("Directed reply to %s", cmd->origin);
2180         send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE);
2181 
2182     } else {
2183         crm_trace("Directed local %ssync reply to %s",
2184                   (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name);
2185         do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE);
2186     }
2187 
2188     if (stand_alone) {
2189         /* Do notification with a clean data object */
2190         xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
2191 
2192         crm_xml_add_int(notify_data, F_STONITH_RC, rc);
2193         crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
2194         crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op);
2195         crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost");
2196         crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device);
2197         crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2198         crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client);
2199 
2200         do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data);
2201         do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL);
2202     }
2203 
2204     free_xml(reply);
2205 }
2206 
2207 static void
2208 cancel_stonith_command(async_command_t * cmd)
     /* [previous][next][first][last][top][bottom][index][help] */
2209 {
2210     stonith_device_t *device;
2211 
2212     CRM_CHECK(cmd != NULL, return);
2213 
2214     if (!cmd->device) {
2215         return;
2216     }
2217 
2218     device = g_hash_table_lookup(device_list, cmd->device);
2219 
2220     if (device) {
2221         crm_trace("Cancel scheduled '%s' action using %s",
2222                   cmd->action, device->id);
2223         device->pending_ops = g_list_remove(device->pending_ops, cmd);
2224     }
2225 }
2226 
2227 static void
2228 st_child_done(GPid pid, int rc, const char *output, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2229 {
2230     stonith_device_t *device = NULL;
2231     stonith_device_t *next_device = NULL;
2232     async_command_t *cmd = user_data;
2233 
2234     GList *gIter = NULL;
2235     GList *gIterNext = NULL;
2236 
2237     CRM_CHECK(cmd != NULL, return);
2238 
2239     cmd->active_on = NULL;
2240 
2241     /* The device is ready to do something else now */
2242     device = g_hash_table_lookup(device_list, cmd->device);
2243     if (device) {
2244         if (!device->verified && (rc == pcmk_ok) &&
2245             (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
2246 
2247             device->verified = TRUE;
2248         }
2249 
2250         mainloop_set_trigger(device->work);
2251     }
2252 
2253     crm_debug("Operation '%s' using %s returned %d (%d devices remaining)",
2254               cmd->action, cmd->device, rc, g_list_length(cmd->device_next));
2255 
2256     if (rc == 0) {
2257         GList *iter;
2258         /* see if there are any required devices left to execute for this op */
2259         for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
2260             next_device = g_hash_table_lookup(device_list, iter->data);
2261 
2262             if (next_device != NULL && is_action_required(cmd->action, next_device)) {
2263                 cmd->device_next = iter->next;
2264                 break;
2265             }
2266             next_device = NULL;
2267         }
2268 
2269     } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
2270         /* if this device didn't work out, see if there are any others we can try.
2271          * if the failed device was 'required', we can't pick another device. */
2272         next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
2273         cmd->device_next = cmd->device_next->next;
2274     }
2275 
2276     /* this operation requires more fencing, hooray! */
2277     if (next_device) {
2278         log_operation(cmd, rc, pid, next_device->id, output, FALSE);
2279 
2280         schedule_stonith_command(cmd, next_device);
2281         /* Prevent cmd from being freed */
2282         cmd = NULL;
2283         goto done;
2284     }
2285 
2286     stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none);
2287 
2288     if (rc != 0) {
2289         goto done;
2290     }
2291 
2292     /* Check to see if any operations are scheduled to do the exact
2293      * same thing that just completed.  If so, rather than
2294      * performing the same fencing operation twice, return the result
2295      * of this operation for all pending commands it matches. */
2296     for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) {
2297         async_command_t *cmd_other = gIter->data;
2298 
2299         gIterNext = gIter->next;
2300 
2301         if (cmd == cmd_other) {
2302             continue;
2303         }
2304 
2305         /* A pending scheduled command matches the command that just finished if.
2306          * 1. The client connections are different.
2307          * 2. The node victim is the same.
2308          * 3. The fencing action is the same.
2309          * 4. The device scheduled to execute the action is the same.
2310          */
2311         if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) ||
2312             !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) ||
2313             !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) ||
2314             !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) {
2315 
2316             continue;
2317         }
2318 
2319         /* Duplicate merging will do the right thing for either type of remapped
2320          * reboot. If the executing fencer remapped an unsupported reboot to
2321          * off, then cmd->action will be reboot and will be merged with any
2322          * other reboot requests. If the originating fencer remapped a
2323          * topology reboot to off then on, we will get here once with
2324          * cmd->action "off" and once with "on", and they will be merged
2325          * separately with similar requests.
2326          */
2327         crm_notice("Merging fencing action '%s' targeting %s originating from "
2328                    "client %s with identical fencing request from client %s",
2329                    cmd_other->action, cmd_other->victim, cmd_other->client_name,
2330                    cmd->client_name);
2331 
2332         cmd_list = g_list_remove_link(cmd_list, gIter);
2333 
2334         stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged);
2335         cancel_stonith_command(cmd_other);
2336 
2337         free_async_command(cmd_other);
2338         g_list_free_1(gIter);
2339     }
2340 
2341   done:
2342     free_async_command(cmd);
2343 }
2344 
2345 static gint
2346 sort_device_priority(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
2347 {
2348     const stonith_device_t *dev_a = a;
2349     const stonith_device_t *dev_b = b;
2350 
2351     if (dev_a->priority > dev_b->priority) {
2352         return -1;
2353     } else if (dev_a->priority < dev_b->priority) {
2354         return 1;
2355     }
2356     return 0;
2357 }
2358 
2359 static void
2360 stonith_fence_get_devices_cb(GList * devices, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
2361 {
2362     async_command_t *cmd = user_data;
2363     stonith_device_t *device = NULL;
2364     guint ndevices = g_list_length(devices);
2365 
2366     crm_info("Found %d matching device%s for target '%s'",
2367              ndevices, pcmk__plural_s(ndevices), cmd->victim);
2368 
2369     if (devices != NULL) {
2370         /* Order based on priority */
2371         devices = g_list_sort(devices, sort_device_priority);
2372         device = g_hash_table_lookup(device_list, devices->data);
2373 
2374         if (device) {
2375             cmd->device_list = devices;
2376             cmd->device_next = devices->next;
2377             devices = NULL;     /* list owned by cmd now */
2378         }
2379     }
2380 
2381     /* we have a device, schedule it for fencing. */
2382     if (device) {
2383         schedule_stonith_command(cmd, device);
2384         /* in progress */
2385         return;
2386     }
2387 
2388     /* no device found! */
2389     stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none);
2390 
2391     free_async_command(cmd);
2392     g_list_free_full(devices, free);
2393 }
2394 
2395 static int
2396 stonith_fence(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2397 {
2398     const char *device_id = NULL;
2399     stonith_device_t *device = NULL;
2400     async_command_t *cmd = create_async_command(msg);
2401     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
2402 
2403     if (cmd == NULL) {
2404         return -EPROTO;
2405     }
2406 
2407     device_id = crm_element_value(dev, F_STONITH_DEVICE);
2408     if (device_id) {
2409         device = g_hash_table_lookup(device_list, device_id);
2410         if (device == NULL) {
2411             crm_err("Requested device '%s' is not available", device_id);
2412             return -ENODEV;
2413         }
2414         schedule_stonith_command(cmd, device);
2415 
2416     } else {
2417         const char *host = crm_element_value(dev, F_STONITH_TARGET);
2418 
2419         if (cmd->options & st_opt_cs_nodeid) {
2420             int nodeid;
2421             crm_node_t *node;
2422 
2423             pcmk__scan_min_int(host, &nodeid, 0);
2424             node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
2425             if (node) {
2426                 host = node->uname;
2427             }
2428         }
2429 
2430         /* If we get to here, then self-fencing is implicitly allowed */
2431         get_capable_devices(host, cmd->action, cmd->default_timeout,
2432                             TRUE, cmd, stonith_fence_get_devices_cb);
2433     }
2434 
2435     return -EINPROGRESS;
2436 }
2437 
2438 xmlNode *
2439 stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2440 {
2441     xmlNode *reply = NULL;
2442 
2443     reply = create_xml_node(NULL, T_STONITH_REPLY);
2444 
2445     crm_xml_add(reply, "st_origin", __func__);
2446     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2447     crm_xml_add(reply, "st_output", output);
2448     crm_xml_add_int(reply, F_STONITH_RC, rc);
2449 
2450     if (request == NULL) {
2451         /* Most likely, this is the result of a stonith operation that was
2452          * initiated before we came up. Unfortunately that means we lack enough
2453          * information to provide clients with a full result.
2454          *
2455          * @TODO Maybe synchronize this information at start-up?
2456          */
2457         crm_warn("Missing request information for client notifications for "
2458                  "operation with result %d (initiated before we came up?)", rc);
2459 
2460     } else {
2461         const char *name = NULL;
2462         const char *value = NULL;
2463 
2464         const char *names[] = {
2465             F_STONITH_OPERATION,
2466             F_STONITH_CALLID,
2467             F_STONITH_CLIENTID,
2468             F_STONITH_CLIENTNAME,
2469             F_STONITH_REMOTE_OP_ID,
2470             F_STONITH_CALLOPTS
2471         };
2472 
2473         crm_trace("Creating a result reply with%s reply output (rc=%d)",
2474                   (data? "" : "out"), rc);
2475         for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) {
2476             name = names[lpc];
2477             value = crm_element_value(request, name);
2478             crm_xml_add(reply, name, value);
2479         }
2480         if (data != NULL) {
2481             add_message_xml(reply, F_STONITH_CALLDATA, data);
2482         }
2483     }
2484     return reply;
2485 }
2486 
2487 static xmlNode *
2488 stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
2489 {
2490     xmlNode *reply = NULL;
2491 
2492     crm_trace("Creating a basic reply");
2493     reply = create_xml_node(NULL, T_STONITH_REPLY);
2494 
2495     crm_xml_add(reply, "st_origin", __func__);
2496     crm_xml_add(reply, F_TYPE, T_STONITH_NG);
2497 
2498     crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
2499     crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
2500     crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
2501     crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client);
2502     crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name);
2503     crm_xml_add(reply, F_STONITH_TARGET, cmd->victim);
2504     crm_xml_add(reply, F_STONITH_ACTION, cmd->op);
2505     crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
2506     crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
2507     crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
2508 
2509     crm_xml_add_int(reply, F_STONITH_RC, rc);
2510 
2511     crm_xml_add(reply, "st_output", output);
2512 
2513     if (data != NULL) {
2514         crm_info("Attaching reply output");
2515         add_message_xml(reply, F_STONITH_CALLDATA, data);
2516     }
2517     return reply;
2518 }
2519 
2520 bool fencing_peer_active(crm_node_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2521 {
2522     if (peer == NULL) {
2523         return FALSE;
2524     } else if (peer->uname == NULL) {
2525         return FALSE;
2526     } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
2527         return TRUE;
2528     }
2529     return FALSE;
2530 }
2531 
2532 /*!
2533  * \internal
2534  * \brief Determine if we need to use an alternate node to
2535  * fence the target. If so return that node's uname
2536  *
2537  * \retval NULL, no alternate host
2538  * \retval uname, uname of alternate host to use
2539  */
2540 static const char *
2541 check_alternate_host(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
2542 {
2543     const char *alternate_host = NULL;
2544 
2545     crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target);
2546     if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2547         GHashTableIter gIter;
2548         crm_node_t *entry = NULL;
2549 
2550         g_hash_table_iter_init(&gIter, crm_peer_cache);
2551         while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2552             crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target);
2553             if (fencing_peer_active(entry)
2554                 && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) {
2555                 alternate_host = entry->uname;
2556                 break;
2557             }
2558         }
2559         if (alternate_host == NULL) {
2560             crm_err("No alternate host available to handle request "
2561                     "for self-fencing with topology");
2562             g_hash_table_iter_init(&gIter, crm_peer_cache);
2563             while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
2564                 crm_notice("Peer[%d] %s", entry->id, entry->uname);
2565             }
2566         }
2567     }
2568 
2569     return alternate_host;
2570 }
2571 
2572 static void
2573 stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
     /* [previous][next][first][last][top][bottom][index][help] */
2574                    const char *client_id)
2575 {
2576     if (remote_peer) {
2577         send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE);
2578     } else {
2579         do_local_reply(reply, client_id,
2580                        pcmk_is_set(call_options, st_opt_sync_call),
2581                        (remote_peer != NULL));
2582     }
2583 }
2584 
2585 static void 
2586 remove_relay_op(xmlNode * request)
     /* [previous][next][first][last][top][bottom][index][help] */
2587 {
2588     xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE);
2589     const char *relay_op_id = NULL; 
2590     const char *op_id = NULL;
2591     const char *client_name = NULL;
2592     const char *target = NULL; 
2593     remote_fencing_op_t *relay_op = NULL; 
2594 
2595     if (dev) { 
2596         target = crm_element_value(dev, F_STONITH_TARGET); 
2597     }
2598 
2599     relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY);
2600     op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
2601     client_name = crm_element_value(request, F_STONITH_CLIENTNAME);
2602 
2603     /* Delete RELAY operation. */
2604     if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) {
2605         relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id);
2606 
2607         if (relay_op) {
2608             GHashTableIter iter;
2609             remote_fencing_op_t *list_op = NULL; 
2610             g_hash_table_iter_init(&iter, stonith_remote_op_list);
2611 
2612             /* If the operation to be deleted is registered as a duplicate, delete the registration. */
2613             while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) {
2614                 GList *dup_iter = NULL;
2615                 if (list_op != relay_op) {
2616                     for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) {
2617                         remote_fencing_op_t *other = dup_iter->data;
2618                         if (other == relay_op) {
2619                             other->duplicates = g_list_remove(other->duplicates, relay_op);
2620                             break;
2621                         }
2622                     }
2623                 }
2624             }
2625             crm_debug("Deleting relay op %s ('%s' targeting %s for %s), "
2626                       "replaced by op %s ('%s' targeting %s for %s)",
2627                       relay_op->id, relay_op->action, relay_op->target,
2628                       relay_op->client_name, op_id, relay_op->action, target,
2629                       client_name);
2630 
2631             g_hash_table_remove(stonith_remote_op_list, relay_op_id);
2632         }
2633     }
2634 }
2635 
2636 static int
2637 handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2638                xmlNode *request, const char *remote_peer)
2639 {
2640     int call_options = 0;
2641     int rc = -EOPNOTSUPP;
2642 
2643     xmlNode *data = NULL;
2644     xmlNode *reply = NULL;
2645 
2646     char *output = NULL;
2647     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2648     const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2649 
2650     /* IPC commands related to fencing configuration may be done only by
2651      * privileged users (i.e. root or hacluster), because all other users should
2652      * go through the CIB to have ACLs applied.
2653      *
2654      * If no client was given, this is a peer request, which is always allowed.
2655      */
2656     bool allowed = (client == NULL)
2657                    || pcmk_is_set(client->flags, pcmk__client_privileged);
2658 
2659     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2660 
2661     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2662         CRM_ASSERT(client == NULL || client->request_id == id);
2663     }
2664 
2665     if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) {
2666         xmlNode *reply = create_xml_node(NULL, "reply");
2667 
2668         CRM_ASSERT(client);
2669         crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER);
2670         crm_xml_add(reply, F_STONITH_CLIENTID, client->id);
2671         pcmk__ipc_send_xml(client, id, reply, flags);
2672         client->request_id = 0;
2673         free_xml(reply);
2674         return 0;
2675 
2676     } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) {
2677         rc = stonith_device_action(request, &output);
2678 
2679     } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) {
2680         const char *call_id = crm_element_value(request, F_STONITH_CALLID);
2681         const char *client_id = crm_element_value(request, F_STONITH_CLIENTID);
2682         int op_timeout = 0;
2683 
2684         crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout);
2685         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
2686         return 0;
2687 
2688     } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2689         if (remote_peer) {
2690             create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */
2691         }
2692 
2693         /* Delete the DC node RELAY operation. */
2694         remove_relay_op(request);
2695 
2696         stonith_query(request, remote_peer, client_id, call_options);
2697         return 0;
2698 
2699     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2700         const char *flag_name = NULL;
2701 
2702         CRM_ASSERT(client);
2703         flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE);
2704         if (flag_name) {
2705             crm_debug("Enabling %s callbacks for client %s",
2706                       flag_name, pcmk__client_name(client));
2707             pcmk__set_client_flags(client, get_stonith_flag(flag_name));
2708         }
2709 
2710         flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE);
2711         if (flag_name) {
2712             crm_debug("Disabling %s callbacks for client %s",
2713                       flag_name, pcmk__client_name(client));
2714             pcmk__clear_client_flags(client, get_stonith_flag(flag_name));
2715         }
2716 
2717         pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK);
2718         return 0;
2719 
2720     } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) {
2721         xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2722 
2723         crm_notice("Received forwarded fencing request from "
2724                    "%s %s to fence (%s) peer %s",
2725                    ((client == NULL)? "peer" : "client"),
2726                    ((client == NULL)? remote_peer : pcmk__client_name(client)),
2727                    crm_element_value(dev, F_STONITH_ACTION),
2728                    crm_element_value(dev, F_STONITH_TARGET));
2729 
2730         if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) {
2731             rc = -EINPROGRESS;
2732         }
2733 
2734     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2735 
2736         if (remote_peer || stand_alone) {
2737             rc = stonith_fence(request);
2738 
2739         } else if (call_options & st_opt_manual_ack) {
2740             remote_fencing_op_t *rop = NULL;
2741             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2742             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2743 
2744             crm_notice("Received manual confirmation that %s is fenced", target);
2745             rop = initiate_remote_stonith_op(client, request, TRUE);
2746             rc = stonith_manual_ack(request, rop);
2747 
2748         } else {
2749             const char *alternate_host = NULL;
2750             xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE);
2751             const char *target = crm_element_value(dev, F_STONITH_TARGET);
2752             const char *action = crm_element_value(dev, F_STONITH_ACTION);
2753             const char *device = crm_element_value(dev, F_STONITH_DEVICE);
2754 
2755             if (client) {
2756                 int tolerance = 0;
2757 
2758                 crm_notice("Client %s wants to fence (%s) %s using %s",
2759                            pcmk__client_name(client), action,
2760                            target, (device? device : "any device"));
2761 
2762                 crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance);
2763 
2764                 if (stonith_check_fence_tolerance(tolerance, target, action)) {
2765                     rc = 0;
2766                     goto done;
2767                 }
2768 
2769             } else {
2770                 crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'",
2771                            remote_peer, action, target, device ? device : "(any)");
2772             }
2773 
2774             alternate_host = check_alternate_host(target);
2775 
2776             if (alternate_host && client) {
2777                 const char *client_id = NULL;
2778                 remote_fencing_op_t *op = NULL;
2779 
2780                 crm_notice("Forwarding self-fencing request to peer %s"
2781                            "due to topology", alternate_host);
2782 
2783                 if (client->id) {
2784                     client_id = client->id;
2785                 } else {
2786                     client_id = crm_element_value(request, F_STONITH_CLIENTID);
2787                 }
2788 
2789                 /* Create an operation for RELAY and send the ID in the RELAY message. */
2790                 /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */
2791                 op = create_remote_stonith_op(client_id, request, FALSE);
2792 
2793                 crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY);
2794                 crm_xml_add(request, F_STONITH_CLIENTID, client->id);
2795                 crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id);
2796                 send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request,
2797                                      FALSE);
2798                 rc = -EINPROGRESS;
2799 
2800             } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) {
2801                 rc = -EINPROGRESS;
2802             }
2803         }
2804 
2805     } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) {
2806         rc = stonith_fence_history(request, &data, remote_peer, call_options);
2807         if (call_options & st_opt_discard_reply) {
2808             /* we don't expect answers to the broadcast
2809              * we might have sent out
2810              */
2811             free_xml(data);
2812             return pcmk_ok;
2813         }
2814 
2815     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
2816         const char *device_id = NULL;
2817 
2818         if (allowed) {
2819             rc = stonith_device_register(request, &device_id, FALSE);
2820         } else {
2821             rc = -EACCES;
2822         }
2823         do_stonith_notify_device(call_options, op, rc, device_id);
2824 
2825     } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
2826         xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR);
2827         const char *device_id = crm_element_value(dev, XML_ATTR_ID);
2828 
2829         if (allowed) {
2830             rc = stonith_device_remove(device_id, FALSE);
2831         } else {
2832             rc = -EACCES;
2833         }
2834         do_stonith_notify_device(call_options, op, rc, device_id);
2835 
2836     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) {
2837         char *device_id = NULL;
2838 
2839         if (allowed) {
2840             rc = stonith_level_register(request, &device_id);
2841         } else {
2842             rc = -EACCES;
2843         }
2844         do_stonith_notify_level(call_options, op, rc, device_id);
2845         free(device_id);
2846 
2847     } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) {
2848         char *device_id = NULL;
2849 
2850         if (allowed) {
2851             rc = stonith_level_remove(request, &device_id);
2852         } else {
2853             rc = -EACCES;
2854         }
2855         do_stonith_notify_level(call_options, op, rc, device_id);
2856 
2857     } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
2858         int node_id = 0;
2859         const char *name = NULL;
2860 
2861         crm_element_value_int(request, XML_ATTR_ID, &node_id);
2862         name = crm_element_value(request, XML_ATTR_UNAME);
2863         reap_crm_member(node_id, name);
2864 
2865         return pcmk_ok;
2866 
2867     } else {
2868         crm_err("Unknown IPC request %s from %s %s", op,
2869                 ((client == NULL)? "peer" : "client"),
2870                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
2871     }
2872 
2873   done:
2874 
2875     if (rc == -EACCES) {
2876         crm_warn("Rejecting IPC request '%s' from unprivileged client %s",
2877                  crm_str(op), pcmk__client_name(client));
2878     }
2879 
2880     /* Always reply unless the request is in process still.
2881      * If in progress, a reply will happen async after the request
2882      * processing is finished */
2883     if (rc != -EINPROGRESS) {
2884         crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0,
2885                   id, pcmk_is_set(call_options, st_opt_sync_call), call_options,
2886                   crm_element_value(request, F_STONITH_CALLOPTS));
2887 
2888         if (pcmk_is_set(call_options, st_opt_sync_call)) {
2889             CRM_ASSERT(client == NULL || client->request_id == id);
2890         }
2891         reply = stonith_construct_reply(request, output, data, rc);
2892         stonith_send_reply(reply, call_options, remote_peer, client_id);
2893     }
2894 
2895     free(output);
2896     free_xml(data);
2897     free_xml(reply);
2898 
2899     return rc;
2900 }
2901 
2902 static void
2903 handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer)
     /* [previous][next][first][last][top][bottom][index][help] */
2904 {
2905     const char *op = crm_element_value(request, F_STONITH_OPERATION);
2906 
2907     if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) {
2908         process_remote_stonith_query(request);
2909     } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) {
2910         process_remote_stonith_exec(request);
2911     } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) {
2912         /* Reply to a complex fencing op */
2913         process_remote_stonith_exec(request);
2914     } else {
2915         crm_err("Unknown %s reply from %s %s", op,
2916                 ((client == NULL)? "peer" : "client"),
2917                 ((client == NULL)? remote_peer : pcmk__client_name(client)));
2918         crm_log_xml_warn(request, "UnknownOp");
2919     }
2920 }
2921 
2922 void
2923 stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
     /* [previous][next][first][last][top][bottom][index][help] */
2924                 xmlNode *request, const char *remote_peer)
2925 {
2926     int call_options = 0;
2927     int rc = 0;
2928     gboolean is_reply = FALSE;
2929 
2930     /* Copy op for reporting. The original might get freed by handle_reply()
2931      * before we use it in crm_debug():
2932      *     handle_reply()
2933      *     |- process_remote_stonith_exec()
2934      *     |-- remote_op_done()
2935      *     |--- handle_local_reply_and_notify()
2936      *     |---- crm_xml_add(...F_STONITH_OPERATION...)
2937      *     |--- free_xml(op->request)
2938      */
2939     char *op = crm_element_value_copy(request, F_STONITH_OPERATION);
2940 
2941     if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) {
2942         is_reply = TRUE;
2943     }
2944 
2945     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
2946     crm_debug("Processing %s%s %u from %s %s with call options 0x%08x",
2947               op, (is_reply? " reply" : ""), id,
2948               ((client == NULL)? "peer" : "client"),
2949               ((client == NULL)? remote_peer : pcmk__client_name(client)),
2950               call_options);
2951 
2952     if (pcmk_is_set(call_options, st_opt_sync_call)) {
2953         CRM_ASSERT(client == NULL || client->request_id == id);
2954     }
2955 
2956     if (is_reply) {
2957         handle_reply(client, request, remote_peer);
2958     } else {
2959         rc = handle_request(client, id, flags, request, remote_peer);
2960     }
2961 
2962     crm_debug("Processed %s%s from %s %s: %s (rc=%d)",
2963               op, (is_reply? " reply" : ""),
2964               ((client == NULL)? "peer" : "client"),
2965               ((client == NULL)? remote_peer : pcmk__client_name(client)),
2966               ((rc > 0)? "" : pcmk_strerror(rc)), rc);
2967     free(op);
2968 }

/* [previous][next][first][last][top][bottom][index][help] */