root/daemons/fenced/fenced_remote.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sort_strings
  2. free_remote_query
  3. free_stonith_remote_op_list
  4. count_peer_device
  5. count_peer_devices
  6. find_peer_device
  7. grab_peer_device
  8. clear_remote_op_timers
  9. free_remote_op
  10. init_stonith_remote_op_hash_table
  11. op_requested_action
  12. op_phase_off
  13. op_phase_on
  14. undo_op_remap
  15. fencing_result2xml
  16. fenced_broadcast_op_result
  17. handle_local_reply_and_notify
  18. finalize_op_duplicates
  19. delegate_from_xml
  20. finalize_op
  21. remote_op_watchdog_done
  22. remote_op_timeout_one
  23. finalize_timed_out_op
  24. remote_op_timeout
  25. remote_op_query_timeout
  26. topology_is_empty
  27. add_required_device
  28. remove_required_device
  29. set_op_device_list
  30. topology_matches
  31. find_topology_for_host
  32. advance_topology_level
  33. merge_duplicates
  34. fencing_active_peers
  35. fenced_handle_manual_confirmation
  36. create_remote_stonith_op
  37. initiate_remote_stonith_op
  38. is_watchdog_fencing
  39. find_best_peer
  40. stonith_choose_peer
  41. valid_fencing_timeout
  42. get_device_timeout
  43. add_device_timeout
  44. get_peer_timeout
  45. get_op_total_timeout
  46. report_timeout_period
  47. advance_topology_device_in_level
  48. check_watchdog_fencing_and_wait
  49. request_peer_fencing
  50. sort_peers
  51. all_topology_devices_found
  52. parse_action_specific
  53. add_device_properties
  54. add_result
  55. process_remote_stonith_query
  56. fenced_process_fencing_reply
  57. stonith_check_fence_tolerance

   1 /*
   2  * Copyright 2009-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 #include <sys/stat.h>
  17 #include <unistd.h>
  18 #include <sys/utsname.h>
  19 
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <ctype.h>
  24 #include <regex.h>
  25 
  26 #include <crm/crm.h>
  27 #include <crm/common/ipc.h>
  28 #include <crm/common/ipc_internal.h>
  29 #include <crm/cluster/internal.h>
  30 
  31 #include <crm/stonith-ng.h>
  32 #include <crm/fencing/internal.h>
  33 #include <crm/common/xml.h>
  34 #include <crm/common/xml_internal.h>
  35 
  36 #include <crm/common/util.h>
  37 #include <pacemaker-fenced.h>
  38 
  39 #define TIMEOUT_MULTIPLY_FACTOR 1.2
  40 
  41 /* When one fencer queries its peers for devices able to handle a fencing
  42  * request, each peer will reply with a list of such devices available to it.
  43  * Each reply will be parsed into a peer_device_info_t, with each device's
  44  * information kept in a device_properties_t.
  45  */
  46 
  47 typedef struct device_properties_s {
  48     /* Whether access to this device has been verified */
  49     gboolean verified;
  50 
  51     /* The remaining members are indexed by the operation's "phase" */
  52 
  53     /* Whether this device has been executed in each phase */
  54     gboolean executed[st_phase_max];
  55     /* Whether this device is disallowed from executing in each phase */
  56     gboolean disallowed[st_phase_max];
  57     /* Action-specific timeout for each phase */
  58     int custom_action_timeout[st_phase_max];
  59     /* Action-specific maximum random delay for each phase */
  60     int delay_max[st_phase_max];
  61     /* Action-specific base delay for each phase */
  62     int delay_base[st_phase_max];
  63     /* Group of enum st_device_flags */
  64     uint32_t device_support_flags;
  65 } device_properties_t;
  66 
  67 typedef struct {
  68     /* Name of peer that sent this result */
  69     char *host;
  70     /* Only try peers for non-topology based operations once */
  71     gboolean tried;
  72     /* Number of entries in the devices table */
  73     int ndevices;
  74     /* Devices available to this host that are capable of fencing the target */
  75     GHashTable *devices;
  76 } peer_device_info_t;
  77 
  78 GHashTable *stonith_remote_op_list = NULL;
  79 
  80 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
  81                                   int call_options);
  82 
  83 static void request_peer_fencing(remote_fencing_op_t *op,
  84                                  peer_device_info_t *peer);
  85 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
  86 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
  87 static int get_op_total_timeout(const remote_fencing_op_t *op,
  88                                 const peer_device_info_t *chosen_peer);
  89 
  90 static gint
  91 sort_strings(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93     return strcmp(a, b);
  94 }
  95 
  96 static void
  97 free_remote_query(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  98 {
  99     if (data != NULL) {
 100         peer_device_info_t *peer = data;
 101 
 102         g_hash_table_destroy(peer->devices);
 103         free(peer->host);
 104         free(peer);
 105     }
 106 }
 107 
 108 void
 109 free_stonith_remote_op_list(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 110 {
 111     if (stonith_remote_op_list != NULL) {
 112         g_hash_table_destroy(stonith_remote_op_list);
 113         stonith_remote_op_list = NULL;
 114     }
 115 }
 116 
 117 struct peer_count_data {
 118     const remote_fencing_op_t *op;
 119     gboolean verified_only;
 120     uint32_t support_action_only;
 121     int count;
 122 };
 123 
 124 /*!
 125  * \internal
 126  * \brief Increment a counter if a device has not been executed yet
 127  *
 128  * \param[in]     key        Device ID (ignored)
 129  * \param[in]     value      Device properties
 130  * \param[in,out] user_data  Peer count data
 131  */
 132 static void
 133 count_peer_device(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 134 {
 135     device_properties_t *props = (device_properties_t*)value;
 136     struct peer_count_data *data = user_data;
 137 
 138     if (!props->executed[data->op->phase]
 139         && (!data->verified_only || props->verified)
 140         && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) {
 141         ++(data->count);
 142     }
 143 }
 144 
 145 /*!
 146  * \internal
 147  * \brief Check the number of available devices in a peer's query results
 148  *
 149  * \param[in] op             Operation that results are for
 150  * \param[in] peer           Peer to count
 151  * \param[in] verified_only  Whether to count only verified devices
 152  * \param[in] support_action_only Whether to count only devices that support action
 153  *
 154  * \return Number of devices available to peer that were not already executed
 155  */
 156 static int
 157 count_peer_devices(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 158                    const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only)
 159 {
 160     struct peer_count_data data;
 161 
 162     data.op = op;
 163     data.verified_only = verified_only;
 164     data.support_action_only = support_on_action_only;
 165     data.count = 0;
 166     if (peer) {
 167         g_hash_table_foreach(peer->devices, count_peer_device, &data);
 168     }
 169     return data.count;
 170 }
 171 
 172 /*!
 173  * \internal
 174  * \brief Search for a device in a query result
 175  *
 176  * \param[in] op      Operation that result is for
 177  * \param[in] peer    Query result for a peer
 178  * \param[in] device  Device ID to search for
 179  *
 180  * \return Device properties if found, NULL otherwise
 181  */
 182 static device_properties_t *
 183 find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 184                  const char *device, uint32_t support_action_only)
 185 {
 186     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
 187 
 188     if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) {
 189         return NULL;
 190     }
 191     return (props && !props->executed[op->phase]
 192            && !props->disallowed[op->phase])? props : NULL;
 193 }
 194 
 195 /*!
 196  * \internal
 197  * \brief Find a device in a peer's device list and mark it as executed
 198  *
 199  * \param[in]     op                     Operation that peer result is for
 200  * \param[in,out] peer                   Peer with results to search
 201  * \param[in]     device                 ID of device to mark as done
 202  * \param[in]     verified_devices_only  Only consider verified devices
 203  *
 204  * \return TRUE if device was found and marked, FALSE otherwise
 205  */
 206 static gboolean
 207 grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 208                  const char *device, gboolean verified_devices_only)
 209 {
 210     device_properties_t *props = find_peer_device(op, peer, device,
 211                                                   fenced_support_flag(op->action));
 212 
 213     if ((props == NULL) || (verified_devices_only && !props->verified)) {
 214         return FALSE;
 215     }
 216 
 217     crm_trace("Removing %s from %s (%d remaining)",
 218               device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none));
 219     props->executed[op->phase] = TRUE;
 220     return TRUE;
 221 }
 222 
 223 static void
 224 clear_remote_op_timers(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 225 {
 226     if (op->query_timer) {
 227         g_source_remove(op->query_timer);
 228         op->query_timer = 0;
 229     }
 230     if (op->op_timer_total) {
 231         g_source_remove(op->op_timer_total);
 232         op->op_timer_total = 0;
 233     }
 234     if (op->op_timer_one) {
 235         g_source_remove(op->op_timer_one);
 236         op->op_timer_one = 0;
 237     }
 238 }
 239 
 240 static void
 241 free_remote_op(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 242 {
 243     remote_fencing_op_t *op = data;
 244 
 245     crm_log_xml_debug(op->request, "Destroying");
 246 
 247     clear_remote_op_timers(op);
 248 
 249     free(op->id);
 250     free(op->action);
 251     free(op->delegate);
 252     free(op->target);
 253     free(op->client_id);
 254     free(op->client_name);
 255     free(op->originator);
 256 
 257     if (op->query_results) {
 258         g_list_free_full(op->query_results, free_remote_query);
 259     }
 260     if (op->request) {
 261         free_xml(op->request);
 262         op->request = NULL;
 263     }
 264     if (op->devices_list) {
 265         g_list_free_full(op->devices_list, free);
 266         op->devices_list = NULL;
 267     }
 268     g_list_free_full(op->automatic_list, free);
 269     g_list_free(op->duplicates);
 270 
 271     pcmk__reset_result(&op->result);
 272     free(op);
 273 }
 274 
 275 void
 276 init_stonith_remote_op_hash_table(GHashTable **table)
     /* [previous][next][first][last][top][bottom][index][help] */
 277 {
 278     if (*table == NULL) {
 279         *table = pcmk__strkey_table(NULL, free_remote_op);
 280     }
 281 }
 282 
 283 /*!
 284  * \internal
 285  * \brief Return an operation's originally requested action (before any remap)
 286  *
 287  * \param[in] op  Operation to check
 288  *
 289  * \return Operation's original action
 290  */
 291 static const char *
 292 op_requested_action(const remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294     return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action);
 295 }
 296 
 297 /*!
 298  * \internal
 299  * \brief Remap a "reboot" operation to the "off" phase
 300  *
 301  * \param[in,out] op      Operation to remap
 302  */
 303 static void
 304 op_phase_off(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     crm_info("Remapping multiple-device reboot targeting %s to 'off' "
 307              CRM_XS " id=%.8s", op->target, op->id);
 308     op->phase = st_phase_off;
 309 
 310     /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
 311      * memory allocation at each phase.
 312      */
 313     strcpy(op->action, PCMK_ACTION_OFF);
 314 }
 315 
 316 /*!
 317  * \internal
 318  * \brief Advance a remapped reboot operation to the "on" phase
 319  *
 320  * \param[in,out] op  Operation to remap
 321  */
 322 static void
 323 op_phase_on(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325     GList *iter = NULL;
 326 
 327     crm_info("Remapped 'off' targeting %s complete, "
 328              "remapping to 'on' for %s " CRM_XS " id=%.8s",
 329              op->target, op->client_name, op->id);
 330     op->phase = st_phase_on;
 331     strcpy(op->action, PCMK_ACTION_ON);
 332 
 333     /* Skip devices with automatic unfencing, because the cluster will handle it
 334      * when the node rejoins.
 335      */
 336     for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
 337         GList *match = g_list_find_custom(op->devices_list, iter->data,
 338                                             sort_strings);
 339 
 340         if (match) {
 341             op->devices_list = g_list_remove(op->devices_list, match->data);
 342         }
 343     }
 344     g_list_free_full(op->automatic_list, free);
 345     op->automatic_list = NULL;
 346 
 347     /* Rewind device list pointer */
 348     op->devices = op->devices_list;
 349 }
 350 
 351 /*!
 352  * \internal
 353  * \brief Reset a remapped reboot operation
 354  *
 355  * \param[in,out] op  Operation to reset
 356  */
 357 static void
 358 undo_op_remap(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 359 {
 360     if (op->phase > 0) {
 361         crm_info("Undoing remap of reboot targeting %s for %s "
 362                  CRM_XS " id=%.8s", op->target, op->client_name, op->id);
 363         op->phase = st_phase_requested;
 364         strcpy(op->action, PCMK_ACTION_REBOOT);
 365     }
 366 }
 367 
 368 /*!
 369  * \internal
 370  * \brief Create notification data XML for a fencing operation result
 371  *
 372  * \param[in,out] parent  Parent XML element for newly created element
 373  * \param[in]     op      Fencer operation that completed
 374  *
 375  * \return Newly created XML to add as notification data
 376  * \note The caller is responsible for freeing the result.
 377  */
 378 static xmlNode *
 379 fencing_result2xml(xmlNode *parent, const remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381     xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE);
 382 
 383     crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state);
 384     crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target);
 385     crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action);
 386     crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate);
 387     crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id);
 388     crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator);
 389     crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id);
 390     crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name);
 391 
 392     return notify_data;
 393 }
 394 
 395 /*!
 396  * \internal
 397  * \brief Broadcast a fence result notification to all CPG peers
 398  *
 399  * \param[in] op         Fencer operation that completed
 400  * \param[in] op_merged  Whether this operation is a duplicate of another
 401  */
 402 void
 403 fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged)
     /* [previous][next][first][last][top][bottom][index][help] */
 404 {
 405     static int count = 0;
 406     xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY);
 407     xmlNode *wrapper = NULL;
 408     xmlNode *notify_data = NULL;
 409 
 410     count++;
 411     crm_trace("Broadcasting result to peers");
 412     crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
 413     crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST);
 414     crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY);
 415     crm_xml_add_int(bcast, PCMK_XA_COUNT, count);
 416 
 417     if (op_merged) {
 418         pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true);
 419     }
 420 
 421     wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA);
 422     notify_data = fencing_result2xml(wrapper, op);
 423     stonith__xe_set_result(notify_data, &op->result);
 424 
 425     pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, bcast);
 426     free_xml(bcast);
 427 
 428     return;
 429 }
 430 
 431 /*!
 432  * \internal
 433  * \brief Reply to a local request originator and notify all subscribed clients
 434  *
 435  * \param[in,out] op    Fencer operation that completed
 436  * \param[in,out] data  Top-level XML to add notification to
 437  */
 438 static void
 439 handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 440 {
 441     xmlNode *notify_data = NULL;
 442     xmlNode *reply = NULL;
 443     pcmk__client_t *client = NULL;
 444 
 445     if (op->notify_sent == TRUE) {
 446         /* nothing to do */
 447         return;
 448     }
 449 
 450     /* Do notification with a clean data object */
 451     crm_xml_add_int(data, PCMK_XA_STATE, op->state);
 452     crm_xml_add(data, PCMK__XA_ST_TARGET, op->target);
 453     crm_xml_add(data, PCMK__XA_ST_OP, op->action);
 454 
 455     reply = fenced_construct_reply(op->request, data, &op->result);
 456     crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate);
 457 
 458     /* Send fencing OP reply to local client that initiated fencing */
 459     client = pcmk__find_client_by_id(op->client_id);
 460     if (client == NULL) {
 461         crm_trace("Skipping reply to %s: no longer a client", op->client_id);
 462     } else {
 463         do_local_reply(reply, client, op->call_options);
 464     }
 465 
 466     /* bcast to all local clients that the fencing operation happend */
 467     notify_data = fencing_result2xml(NULL, op);
 468     fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result,
 469                              notify_data);
 470     free_xml(notify_data);
 471     fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
 472 
 473     /* mark this op as having notify's already sent */
 474     op->notify_sent = TRUE;
 475     free_xml(reply);
 476 }
 477 
 478 /*!
 479  * \internal
 480  * \brief Finalize all duplicates of a given fencer operation
 481  *
 482  * \param[in,out] op    Fencer operation that completed
 483  * \param[in,out] data  Top-level XML to add notification to
 484  */
 485 static void
 486 finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488     for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
 489         remote_fencing_op_t *other = iter->data;
 490 
 491         if (other->state == st_duplicate) {
 492             other->state = op->state;
 493             crm_debug("Performing duplicate notification for %s@%s: %s "
 494                       CRM_XS " id=%.8s",
 495                       other->client_name, other->originator,
 496                       pcmk_exec_status_str(op->result.execution_status),
 497                       other->id);
 498             pcmk__copy_result(&op->result, &other->result);
 499             finalize_op(other, data, true);
 500 
 501         } else {
 502             // Possible if (for example) it timed out already
 503             crm_err("Skipping duplicate notification for %s@%s "
 504                     CRM_XS " state=%s id=%.8s",
 505                     other->client_name, other->originator,
 506                     stonith_op_state_str(other->state), other->id);
 507         }
 508     }
 509 }
 510 
 511 static char *
 512 delegate_from_xml(xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 513 {
 514     xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml,
 515                                       LOG_NEVER);
 516 
 517     if (match == NULL) {
 518         return crm_element_value_copy(xml, PCMK__XA_SRC);
 519     } else {
 520         return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE);
 521     }
 522 }
 523 
 524 /*!
 525  * \internal
 526  * \brief Finalize a peer fencing operation
 527  *
 528  * Clean up after a fencing operation completes. This function has two code
 529  * paths: the executioner uses it to broadcast the result to CPG peers, and then
 530  * each peer (including the executioner) uses it to process that broadcast and
 531  * notify its IPC clients of the result.
 532  *
 533  * \param[in,out] op      Fencer operation that completed
 534  * \param[in,out] data    If not NULL, XML reply of last delegated operation
 535  * \param[in]     dup     Whether this operation is a duplicate of another
 536  *                        (in which case, do not broadcast the result)
 537  *
 538  *  \note The operation result should be set before calling this function.
 539  */
 540 static void
 541 finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543     int level = LOG_ERR;
 544     const char *subt = NULL;
 545     xmlNode *local_data = NULL;
 546     gboolean op_merged = FALSE;
 547 
 548     CRM_CHECK((op != NULL), return);
 549 
 550     // This is a no-op if timers have already been cleared
 551     clear_remote_op_timers(op);
 552 
 553     if (op->notify_sent) {
 554         // Most likely, this is a timed-out action that eventually completed
 555         crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
 556                    "Result arrived too late " CRM_XS " id=%.8s",
 557                    op->action, (op->target? " targeting " : ""),
 558                    (op->target? op->target : ""),
 559                    (op->delegate? op->delegate : "unknown node"),
 560                    op->client_name, op->originator,
 561                    (op_merged? " (merged)" : ""),
 562                    op->id);
 563         return;
 564     }
 565 
 566     set_fencing_completed(op);
 567     undo_op_remap(op);
 568 
 569     if (data == NULL) {
 570         data = pcmk__xe_create(NULL, "remote-op");
 571         local_data = data;
 572 
 573     } else if (op->delegate == NULL) {
 574         switch (op->result.execution_status) {
 575             case PCMK_EXEC_NO_FENCE_DEVICE:
 576                 break;
 577 
 578             case PCMK_EXEC_INVALID:
 579                 if (op->result.exit_status != CRM_EX_EXPIRED) {
 580                     op->delegate = delegate_from_xml(data);
 581                 }
 582                 break;
 583 
 584             default:
 585                 op->delegate = delegate_from_xml(data);
 586                 break;
 587         }
 588     }
 589 
 590     if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) {
 591         op_merged = true;
 592     }
 593 
 594     /* Tell everyone the operation is done, we will continue
 595      * with doing the local notifications once we receive
 596      * the broadcast back. */
 597     subt = crm_element_value(data, PCMK__XA_SUBT);
 598     if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) {
 599         /* Defer notification until the bcast message arrives */
 600         fenced_broadcast_op_result(op, op_merged);
 601         free_xml(local_data);
 602         return;
 603     }
 604 
 605     if (pcmk__result_ok(&op->result) || dup
 606         || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
 607         level = LOG_NOTICE;
 608     }
 609     do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
 610                CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
 611                (op->target? op->target : ""),
 612                (op->delegate? op->delegate : "unknown node"),
 613                op->client_name, op->originator,
 614                (op_merged? " (merged)" : ""),
 615                crm_exit_str(op->result.exit_status),
 616                pcmk_exec_status_str(op->result.execution_status),
 617                ((op->result.exit_reason == NULL)? "" : ": "),
 618                ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
 619                op->id);
 620 
 621     handle_local_reply_and_notify(op, data);
 622 
 623     if (!dup) {
 624         finalize_op_duplicates(op, data);
 625     }
 626 
 627     /* Free non-essential parts of the record
 628      * Keep the record around so we can query the history
 629      */
 630     if (op->query_results) {
 631         g_list_free_full(op->query_results, free_remote_query);
 632         op->query_results = NULL;
 633     }
 634     if (op->request) {
 635         free_xml(op->request);
 636         op->request = NULL;
 637     }
 638 
 639     free_xml(local_data);
 640 }
 641 
 642 /*!
 643  * \internal
 644  * \brief Finalize a watchdog fencer op after the waiting time expires
 645  *
 646  * \param[in,out] userdata  Fencer operation that completed
 647  *
 648  * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
 649  */
 650 static gboolean
 651 remote_op_watchdog_done(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 652 {
 653     remote_fencing_op_t *op = userdata;
 654 
 655     op->op_timer_one = 0;
 656 
 657     crm_notice("Self-fencing (%s) by %s for %s assumed complete "
 658                CRM_XS " id=%.8s",
 659                op->action, op->target, op->client_name, op->id);
 660     op->state = st_done;
 661     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 662     finalize_op(op, NULL, false);
 663     return G_SOURCE_REMOVE;
 664 }
 665 
 666 static gboolean
 667 remote_op_timeout_one(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 668 {
 669     remote_fencing_op_t *op = userdata;
 670 
 671     op->op_timer_one = 0;
 672 
 673     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
 674                " id=%.8s", op->action, op->target, op->client_name, op->id);
 675     pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
 676                      "Peer did not return fence result within timeout");
 677 
 678     // The requested delay has been applied for the first device
 679     if (op->client_delay > 0) {
 680         op->client_delay = 0;
 681         crm_trace("Try another device for '%s' action targeting %s "
 682                   "for client %s without delay " CRM_XS " id=%.8s",
 683                   op->action, op->target, op->client_name, op->id);
 684     }
 685 
 686     // Try another device, if appropriate
 687     request_peer_fencing(op, NULL);
 688     return G_SOURCE_REMOVE;
 689 }
 690 
 691 /*!
 692  * \internal
 693  * \brief Finalize a remote fencer operation that timed out
 694  *
 695  * \param[in,out] op      Fencer operation that timed out
 696  * \param[in]     reason  Readable description of what step timed out
 697  */
 698 static void
 699 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
     /* [previous][next][first][last][top][bottom][index][help] */
 700 {
 701     crm_debug("Action '%s' targeting %s for client %s timed out "
 702               CRM_XS " id=%.8s",
 703               op->action, op->target, op->client_name, op->id);
 704 
 705     if (op->phase == st_phase_on) {
 706         /* A remapped reboot operation timed out in the "on" phase, but the
 707          * "off" phase completed successfully, so quit trying any further
 708          * devices, and return success.
 709          */
 710         op->state = st_done;
 711         pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 712     } else {
 713         op->state = st_failed;
 714         pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
 715     }
 716     finalize_op(op, NULL, false);
 717 }
 718 
 719 /*!
 720  * \internal
 721  * \brief Finalize a remote fencer operation that timed out
 722  *
 723  * \param[in,out] userdata  Fencer operation that timed out
 724  *
 725  * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
 726  */
 727 static gboolean
 728 remote_op_timeout(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 729 {
 730     remote_fencing_op_t *op = userdata;
 731 
 732     op->op_timer_total = 0;
 733 
 734     if (op->state == st_done) {
 735         crm_debug("Action '%s' targeting %s for client %s already completed "
 736                   CRM_XS " id=%.8s",
 737                   op->action, op->target, op->client_name, op->id);
 738     } else {
 739         finalize_timed_out_op(userdata, "Fencing did not complete within a "
 740                                         "total timeout based on the "
 741                                         "configured timeout and retries for "
 742                                         "any devices attempted");
 743     }
 744     return G_SOURCE_REMOVE;
 745 }
 746 
 747 static gboolean
 748 remote_op_query_timeout(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750     remote_fencing_op_t *op = data;
 751 
 752     op->query_timer = 0;
 753 
 754     if (op->state == st_done) {
 755         crm_debug("Operation %.8s targeting %s already completed",
 756                   op->id, op->target);
 757     } else if (op->state == st_exec) {
 758         crm_debug("Operation %.8s targeting %s already in progress",
 759                   op->id, op->target);
 760     } else if (op->query_results) {
 761         // Query succeeded, so attempt the actual fencing
 762         crm_debug("Query %.8s targeting %s complete (state=%s)",
 763                   op->id, op->target, stonith_op_state_str(op->state));
 764         request_peer_fencing(op, NULL);
 765     } else {
 766         crm_debug("Query %.8s targeting %s timed out (state=%s)",
 767                   op->id, op->target, stonith_op_state_str(op->state));
 768         finalize_timed_out_op(op, "No capable peers replied to device query "
 769                                   "within timeout");
 770     }
 771 
 772     return G_SOURCE_REMOVE;
 773 }
 774 
 775 static gboolean
 776 topology_is_empty(stonith_topology_t *tp)
     /* [previous][next][first][last][top][bottom][index][help] */
 777 {
 778     int i;
 779 
 780     if (tp == NULL) {
 781         return TRUE;
 782     }
 783 
 784     for (i = 0; i < ST__LEVEL_COUNT; i++) {
 785         if (tp->levels[i] != NULL) {
 786             return FALSE;
 787         }
 788     }
 789     return TRUE;
 790 }
 791 
 792 /*!
 793  * \internal
 794  * \brief Add a device to an operation's automatic unfencing list
 795  *
 796  * \param[in,out] op      Operation to modify
 797  * \param[in]     device  Device ID to add
 798  */
 799 static void
 800 add_required_device(remote_fencing_op_t *op, const char *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 801 {
 802     GList *match  = g_list_find_custom(op->automatic_list, device,
 803                                          sort_strings);
 804 
 805     if (!match) {
 806         op->automatic_list = g_list_prepend(op->automatic_list,
 807                                             pcmk__str_copy(device));
 808     }
 809 }
 810 
 811 /*!
 812  * \internal
 813  * \brief Remove a device from the automatic unfencing list
 814  *
 815  * \param[in,out] op      Operation to modify
 816  * \param[in]     device  Device ID to remove
 817  */
 818 static void
 819 remove_required_device(remote_fencing_op_t *op, const char *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 820 {
 821     GList *match = g_list_find_custom(op->automatic_list, device,
 822                                         sort_strings);
 823 
 824     if (match) {
 825         op->automatic_list = g_list_remove(op->automatic_list, match->data);
 826     }
 827 }
 828 
 829 /* deep copy the device list */
 830 static void
 831 set_op_device_list(remote_fencing_op_t * op, GList *devices)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833     GList *lpc = NULL;
 834 
 835     if (op->devices_list) {
 836         g_list_free_full(op->devices_list, free);
 837         op->devices_list = NULL;
 838     }
 839     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
 840         const char *device = lpc->data;
 841 
 842         op->devices_list = g_list_append(op->devices_list,
 843                                          pcmk__str_copy(device));
 844     }
 845     op->devices = op->devices_list;
 846 }
 847 
 848 /*!
 849  * \internal
 850  * \brief Check whether a node matches a topology target
 851  *
 852  * \param[in] tp    Topology table entry to check
 853  * \param[in] node  Name of node to check
 854  *
 855  * \return TRUE if node matches topology target
 856  */
 857 static gboolean
 858 topology_matches(const stonith_topology_t *tp, const char *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 859 {
 860     regex_t r_patt;
 861 
 862     CRM_CHECK(node && tp && tp->target, return FALSE);
 863     switch (tp->kind) {
 864         case fenced_target_by_attribute:
 865             /* This level targets by attribute, so tp->target is a NAME=VALUE pair
 866              * of a permanent attribute applied to targeted nodes. The test below
 867              * relies on the locally cached copy of the CIB, so if fencing needs to
 868              * be done before the initial CIB is received or after a malformed CIB
 869              * is received, then the topology will be unable to be used.
 870              */
 871             if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
 872                 crm_notice("Matched %s with %s by attribute", node, tp->target);
 873                 return TRUE;
 874             }
 875             break;
 876 
 877         case fenced_target_by_pattern:
 878             /* This level targets node names matching a pattern, so tp->target
 879              * (and tp->target_pattern) is a regular expression.
 880              */
 881             if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
 882                 crm_info("Bad regex '%s' for fencing level", tp->target);
 883             } else {
 884                 int status = regexec(&r_patt, node, 0, NULL, 0);
 885 
 886                 regfree(&r_patt);
 887                 if (status == 0) {
 888                     crm_notice("Matched %s with %s by name", node, tp->target);
 889                     return TRUE;
 890                 }
 891             }
 892             break;
 893 
 894         case fenced_target_by_name:
 895             crm_trace("Testing %s against %s", node, tp->target);
 896             return pcmk__str_eq(tp->target, node, pcmk__str_casei);
 897 
 898         default:
 899             break;
 900     }
 901     crm_trace("No match for %s with %s", node, tp->target);
 902     return FALSE;
 903 }
 904 
 905 stonith_topology_t *
 906 find_topology_for_host(const char *host) 
     /* [previous][next][first][last][top][bottom][index][help] */
 907 {
 908     GHashTableIter tIter;
 909     stonith_topology_t *tp = g_hash_table_lookup(topology, host);
 910 
 911     if(tp != NULL) {
 912         crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
 913         return tp;
 914     }
 915 
 916     g_hash_table_iter_init(&tIter, topology);
 917     while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
 918         if (topology_matches(tp, host)) {
 919             crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
 920             return tp;
 921         }
 922     }
 923 
 924     crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
 925     return NULL;
 926 }
 927 
 928 /*!
 929  * \internal
 930  * \brief Set fencing operation's device list to target's next topology level
 931  *
 932  * \param[in,out] op        Remote fencing operation to modify
 933  * \param[in]     empty_ok  If true, an operation without a target (i.e.
 934  *                          queries) or a target without a topology will get a
 935  *                          pcmk_rc_ok return value instead of ENODEV
 936  *
 937  * \return Standard Pacemaker return value
 938  */
 939 static int
 940 advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
     /* [previous][next][first][last][top][bottom][index][help] */
 941 {
 942     stonith_topology_t *tp = NULL;
 943 
 944     if (op->target) {
 945         tp = find_topology_for_host(op->target);
 946     }
 947     if (topology_is_empty(tp)) {
 948         return empty_ok? pcmk_rc_ok : ENODEV;
 949     }
 950 
 951     CRM_ASSERT(tp->levels != NULL);
 952 
 953     stonith__set_call_options(op->call_options, op->id, st_opt_topology);
 954 
 955     /* This is a new level, so undo any remapping left over from previous */
 956     undo_op_remap(op);
 957 
 958     do {
 959         op->level++;
 960 
 961     } while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL);
 962 
 963     if (op->level < ST__LEVEL_COUNT) {
 964         crm_trace("Attempting fencing level %d targeting %s (%d devices) "
 965                   "for client %s@%s (id=%.8s)",
 966                   op->level, op->target, g_list_length(tp->levels[op->level]),
 967                   op->client_name, op->originator, op->id);
 968         set_op_device_list(op, tp->levels[op->level]);
 969 
 970         // The requested delay has been applied for the first fencing level
 971         if ((op->level > 1) && (op->client_delay > 0)) {
 972             op->client_delay = 0;
 973         }
 974 
 975         if ((g_list_next(op->devices_list) != NULL)
 976             && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) {
 977             /* A reboot has been requested for a topology level with multiple
 978              * devices. Instead of rebooting the devices sequentially, we will
 979              * turn them all off, then turn them all on again. (Think about
 980              * switched power outlets for redundant power supplies.)
 981              */
 982             op_phase_off(op);
 983         }
 984         return pcmk_rc_ok;
 985     }
 986 
 987     crm_info("All %sfencing options targeting %s for client %s@%s failed "
 988              CRM_XS " id=%.8s",
 989              (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
 990              op->target, op->client_name, op->originator, op->id);
 991     return ENODEV;
 992 }
 993 
 994 /*!
 995  * \internal
 996  * \brief If fencing operation is a duplicate, merge it into the other one
 997  *
 998  * \param[in,out] op  Fencing operation to check
 999  */
1000 static void
1001 merge_duplicates(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
1002 {
1003     GHashTableIter iter;
1004     remote_fencing_op_t *other = NULL;
1005 
1006     time_t now = time(NULL);
1007 
1008     g_hash_table_iter_init(&iter, stonith_remote_op_list);
1009     while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
1010         const char *other_action = op_requested_action(other);
1011         crm_node_t *node = NULL;
1012 
1013         if (!strcmp(op->id, other->id)) {
1014             continue; // Don't compare against self
1015         }
1016         if (other->state > st_exec) {
1017             crm_trace("%.8s not duplicate of %.8s: not in progress",
1018                       op->id, other->id);
1019             continue;
1020         }
1021         if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
1022             crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
1023                       op->id, other->id, op->target, other->target);
1024             continue;
1025         }
1026         if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) {
1027             crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
1028                       op->id, other->id, op->action, other_action);
1029             continue;
1030         }
1031         if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
1032             crm_trace("%.8s not duplicate of %.8s: same client %s",
1033                       op->id, other->id, op->client_name);
1034             continue;
1035         }
1036         if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
1037             crm_trace("%.8s not duplicate of %.8s: suicide for %s",
1038                       op->id, other->id, other->target);
1039             continue;
1040         }
1041 
1042         node = pcmk__get_node(0, other->originator, NULL,
1043                               pcmk__node_search_cluster_member);
1044 
1045         if (!fencing_peer_active(node)) {
1046             crm_notice("Failing action '%s' targeting %s originating from "
1047                        "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
1048                        other->action, other->target, other->client_name,
1049                        other->originator, other->id);
1050             crm_trace("%.8s not duplicate of %.8s: originator dead",
1051                       op->id, other->id);
1052             other->state = st_failed;
1053             continue;
1054         }
1055         if ((other->total_timeout > 0)
1056             && (now > (other->total_timeout + other->created))) {
1057             crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)",
1058                       op->id, other->id, (long long)now, (long long)other->created,
1059                       other->total_timeout);
1060             continue;
1061         }
1062 
1063         /* There is another in-flight request to fence the same host
1064          * Piggyback on that instead.  If it fails, so do we.
1065          */
1066         other->duplicates = g_list_append(other->duplicates, op);
1067         if (other->total_timeout == 0) {
1068             other->total_timeout = op->total_timeout =
1069                 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
1070             crm_trace("Best guess as to timeout used for %.8s: %ds",
1071                       other->id, other->total_timeout);
1072         }
1073         crm_notice("Merging fencing action '%s' targeting %s originating from "
1074                    "client %s with identical request from %s@%s "
1075                    CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
1076                    op->action, op->target, op->client_name,
1077                    other->client_name, other->originator,
1078                    op->id, other->id, other->total_timeout);
1079         report_timeout_period(op, other->total_timeout);
1080         op->state = st_duplicate;
1081     }
1082 }
1083 
1084 static uint32_t fencing_active_peers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1085 {
1086     uint32_t count = 0;
1087     crm_node_t *entry;
1088     GHashTableIter gIter;
1089 
1090     g_hash_table_iter_init(&gIter, crm_peer_cache);
1091     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1092         if(fencing_peer_active(entry)) {
1093             count++;
1094         }
1095     }
1096     return count;
1097 }
1098 
1099 /*!
1100  * \internal
1101  * \brief Process a manual confirmation of a pending fence action
1102  *
1103  * \param[in]     client  IPC client that sent confirmation
1104  * \param[in,out] msg     Request XML with manual confirmation
1105  *
1106  * \return Standard Pacemaker return code
1107  */
1108 int
1109 fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
1110 {
1111     remote_fencing_op_t *op = NULL;
1112     xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR);
1113 
1114     CRM_CHECK(dev != NULL, return EPROTO);
1115 
1116     crm_notice("Received manual confirmation that %s has been fenced",
1117                pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET),
1118                        "unknown target"));
1119     op = initiate_remote_stonith_op(client, msg, TRUE);
1120     if (op == NULL) {
1121         return EPROTO;
1122     }
1123     op->state = st_done;
1124     set_fencing_completed(op);
1125     op->delegate = pcmk__str_copy("a human");
1126 
1127     // For the fencer's purposes, the fencing operation is done
1128     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1129     finalize_op(op, msg, false);
1130 
1131     /* For the requester's purposes, the operation is still pending. The
1132      * actual result will be sent asynchronously via the operation's done_cb().
1133      */
1134     return EINPROGRESS;
1135 }
1136 
1137 /*!
1138  * \internal
1139  * \brief Create a new remote stonith operation
1140  *
1141  * \param[in] client   ID of local stonith client that initiated the operation
1142  * \param[in] request  The request from the client that started the operation
1143  * \param[in] peer     TRUE if this operation is owned by another stonith peer
1144  *                     (an operation owned by one peer is stored on all peers,
1145  *                     but only the owner executes it; all nodes get the results
1146  *                     once the owner finishes execution)
1147  */
1148 void *
1149 create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1150 {
1151     remote_fencing_op_t *op = NULL;
1152     xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request,
1153                                     LOG_NEVER);
1154     int call_options = 0;
1155     const char *operation = NULL;
1156 
1157     init_stonith_remote_op_hash_table(&stonith_remote_op_list);
1158 
1159     /* If this operation is owned by another node, check to make
1160      * sure we haven't already created this operation. */
1161     if (peer && dev) {
1162         const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
1163 
1164         CRM_CHECK(op_id != NULL, return NULL);
1165 
1166         op = g_hash_table_lookup(stonith_remote_op_list, op_id);
1167         if (op) {
1168             crm_debug("Reusing existing remote fencing op %.8s for %s",
1169                       op_id, ((client == NULL)? "unknown client" : client));
1170             return op;
1171         }
1172     }
1173 
1174     op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t));
1175 
1176     crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout));
1177     // Value -1 means disable any static/random fencing delays
1178     crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay));
1179 
1180     if (peer && dev) {
1181         op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP);
1182     } else {
1183         op->id = crm_generate_uuid();
1184     }
1185 
1186     g_hash_table_replace(stonith_remote_op_list, op->id, op);
1187 
1188     op->state = st_query;
1189     op->replies_expected = fencing_active_peers();
1190     op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION);
1191 
1192     /* The node initiating the stonith operation. If an operation is relayed,
1193      * this is the last node the operation lands on. When in standalone mode,
1194      * origin is the ID of the client that originated the operation.
1195      *
1196      * Or may be the name of the function that created the operation.
1197      */
1198     op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN);
1199     if (op->originator == NULL) {
1200         /* Local or relayed request */
1201         op->originator = pcmk__str_copy(stonith_our_uname);
1202     }
1203 
1204     // Delegate may not be set
1205     op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE);
1206     op->created = time(NULL);
1207 
1208     CRM_LOG_ASSERT(client != NULL);
1209     op->client_id = pcmk__str_copy(client);
1210 
1211     /* For a RELAY operation, set fenced on the client. */
1212     operation = crm_element_value(request, PCMK__XA_ST_OP);
1213 
1214     if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1215         op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1216                                          (unsigned long) getpid());
1217     } else {
1218         op->client_name = crm_element_value_copy(request,
1219                                                  PCMK__XA_ST_CLIENTNAME);
1220     }
1221 
1222     op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET);
1223 
1224     // @TODO Figure out how to avoid copying XML here
1225     op->request = pcmk__xml_copy(NULL, request);
1226     crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options);
1227     op->call_options = call_options;
1228 
1229     crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid));
1230 
1231     crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
1232               "base timeout %ds, %u %s expected)",
1233               (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1234               op->target, op->client_name, op->base_timeout,
1235               op->replies_expected,
1236               pcmk__plural_alt(op->replies_expected, "reply", "replies"));
1237 
1238     if (op->call_options & st_opt_cs_nodeid) {
1239         int nodeid;
1240         crm_node_t *node;
1241 
1242         pcmk__scan_min_int(op->target, &nodeid, 0);
1243         node = pcmk__search_node_caches(nodeid, NULL,
1244                                         pcmk__node_search_any
1245                                         |pcmk__node_search_cluster_cib);
1246 
1247         /* Ensure the conversion only happens once */
1248         stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
1249 
1250         if (node && node->uname) {
1251             pcmk__str_update(&(op->target), node->uname);
1252 
1253         } else {
1254             crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1255         }
1256     }
1257 
1258     /* check to see if this is a duplicate operation of another in-flight operation */
1259     merge_duplicates(op);
1260 
1261     if (op->state != st_duplicate) {
1262         /* kick history readers */
1263         fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL);
1264     }
1265 
1266     /* safe to trim as long as that doesn't touch pending ops */
1267     stonith_fence_history_trim();
1268 
1269     return op;
1270 }
1271 
1272 /*!
1273  * \internal
1274  * \brief Create a peer fencing operation from a request, and initiate it
1275  *
1276  * \param[in] client     IPC client that made request (NULL to get from request)
1277  * \param[in] request    Request XML
1278  * \param[in] manual_ack Whether this is a manual action confirmation
1279  *
1280  * \return Newly created operation on success, otherwise NULL
1281  */
1282 remote_fencing_op_t *
1283 initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request,
     /* [previous][next][first][last][top][bottom][index][help] */
1284                            gboolean manual_ack)
1285 {
1286     int query_timeout = 0;
1287     xmlNode *query = NULL;
1288     const char *client_id = NULL;
1289     remote_fencing_op_t *op = NULL;
1290     const char *relay_op_id = NULL;
1291     const char *operation = NULL;
1292 
1293     if (client) {
1294         client_id = client->id;
1295     } else {
1296         client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID);
1297     }
1298 
1299     CRM_LOG_ASSERT(client_id != NULL);
1300     op = create_remote_stonith_op(client_id, request, FALSE);
1301     op->owner = TRUE;
1302     if (manual_ack) {
1303         return op;
1304     }
1305 
1306     CRM_CHECK(op->action, return NULL);
1307 
1308     if (advance_topology_level(op, true) != pcmk_rc_ok) {
1309         op->state = st_failed;
1310     }
1311 
1312     switch (op->state) {
1313         case st_failed:
1314             // advance_topology_level() exhausted levels
1315             pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
1316                              "All topology levels failed");
1317             crm_warn("Could not request peer fencing (%s) targeting %s "
1318                      CRM_XS " id=%.8s", op->action, op->target, op->id);
1319             finalize_op(op, NULL, false);
1320             return op;
1321 
1322         case st_duplicate:
1323             crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1324                      CRM_XS " id=%.8s", op->action, op->target, op->id);
1325             return op;
1326 
1327         default:
1328             crm_notice("Requesting peer fencing (%s) targeting %s "
1329                        CRM_XS " id=%.8s state=%s base_timeout=%ds",
1330                        op->action, op->target, op->id,
1331                        stonith_op_state_str(op->state), op->base_timeout);
1332     }
1333 
1334     query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1335                               NULL, op->call_options);
1336 
1337     crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id);
1338     crm_xml_add(query, PCMK__XA_ST_TARGET, op->target);
1339     crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op));
1340     crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator);
1341     crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id);
1342     crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name);
1343     crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout);
1344 
1345     /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
1346     operation = crm_element_value(request, PCMK__XA_ST_OP);
1347     if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1348         relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP);
1349         if (relay_op_id) {
1350             crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id);
1351         }
1352     }
1353 
1354     pcmk__cluster_send_message(NULL, crm_msg_stonith_ng, query);
1355     free_xml(query);
1356 
1357     query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1358     op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1359 
1360     return op;
1361 }
1362 
1363 enum find_best_peer_options {
1364     /*! Skip checking the target peer for capable fencing devices */
1365     FIND_PEER_SKIP_TARGET = 0x0001,
1366     /*! Only check the target peer for capable fencing devices */
1367     FIND_PEER_TARGET_ONLY = 0x0002,
1368     /*! Skip peers and devices that are not verified */
1369     FIND_PEER_VERIFIED_ONLY = 0x0004,
1370 };
1371 
1372 static bool
1373 is_watchdog_fencing(const remote_fencing_op_t *op, const char *device)
     /* [previous][next][first][last][top][bottom][index][help] */
1374 {
1375     return (stonith_watchdog_timeout_ms > 0
1376             // Only an explicit mismatch is considered not a watchdog fencing.
1377             && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)
1378             && pcmk__is_fencing_action(op->action)
1379             && node_does_watchdog_fencing(op->target));
1380 }
1381 
1382 static peer_device_info_t *
1383 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
     /* [previous][next][first][last][top][bottom][index][help] */
1384 {
1385     GList *iter = NULL;
1386     gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1387 
1388     if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
1389         return NULL;
1390     }
1391 
1392     for (iter = op->query_results; iter != NULL; iter = iter->next) {
1393         peer_device_info_t *peer = iter->data;
1394 
1395         crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
1396                   peer->host, op->target, peer->ndevices,
1397                   pcmk__plural_s(peer->ndevices), peer->tried, options);
1398         if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1399             continue;
1400         }
1401         if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1402             continue;
1403         }
1404 
1405         if (pcmk_is_set(op->call_options, st_opt_topology)) {
1406 
1407             if (grab_peer_device(op, peer, device, verified_devices_only)) {
1408                 return peer;
1409             }
1410 
1411         } else if (!peer->tried
1412                    && count_peer_devices(op, peer, verified_devices_only,
1413                                          fenced_support_flag(op->action))) {
1414             /* No topology: Use the current best peer */
1415             crm_trace("Simple fencing");
1416             return peer;
1417         }
1418     }
1419 
1420     return NULL;
1421 }
1422 
1423 static peer_device_info_t *
1424 stonith_choose_peer(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
1425 {
1426     const char *device = NULL;
1427     peer_device_info_t *peer = NULL;
1428     uint32_t active = fencing_active_peers();
1429 
1430     do {
1431         if (op->devices) {
1432             device = op->devices->data;
1433             crm_trace("Checking for someone to fence (%s) %s using %s",
1434                       op->action, op->target, device);
1435         } else {
1436             crm_trace("Checking for someone to fence (%s) %s",
1437                       op->action, op->target);
1438         }
1439 
1440         /* Best choice is a peer other than the target with verified access */
1441         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1442         if (peer) {
1443             crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1444             return peer;
1445         }
1446 
1447         if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1448             crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1449             return NULL;
1450         }
1451 
1452         /* If no other peer has verified access, next best is unverified access */
1453         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1454         if (peer) {
1455             crm_trace("Found best unverified peer %s", peer->host);
1456             return peer;
1457         }
1458 
1459         /* If no other peer can do it, last option is self-fencing
1460          * (which is never allowed for the "on" phase of a remapped reboot)
1461          */
1462         if (op->phase != st_phase_on) {
1463             peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1464             if (peer) {
1465                 crm_trace("%s will fence itself", peer->host);
1466                 return peer;
1467             }
1468         }
1469 
1470         /* Try the next fencing level if there is one (unless we're in the "on"
1471          * phase of a remapped "reboot", because we ignore errors in that case)
1472          */
1473     } while ((op->phase != st_phase_on)
1474              && pcmk_is_set(op->call_options, st_opt_topology)
1475              && (advance_topology_level(op, false) == pcmk_rc_ok));
1476 
1477     /* With a simple watchdog fencing configuration without a topology,
1478      * "device" is NULL here. Consider it should be done with watchdog fencing.
1479      */
1480     if (is_watchdog_fencing(op, device)) {
1481         crm_info("Couldn't contact watchdog-fencing target-node (%s)",
1482                  op->target);
1483         /* check_watchdog_fencing_and_wait will log additional info */
1484     } else {
1485         crm_notice("Couldn't find anyone to fence (%s) %s using %s",
1486                    op->action, op->target, (device? device : "any device"));
1487     }
1488     return NULL;
1489 }
1490 
1491 static int
1492 valid_fencing_timeout(int specified_timeout, bool action_specific,
     /* [previous][next][first][last][top][bottom][index][help] */
1493                       const remote_fencing_op_t *op, const char *device)
1494 {
1495     int timeout = specified_timeout;
1496 
1497     if (!is_watchdog_fencing(op, device)) {
1498         return timeout;
1499     }
1500 
1501     timeout = (int) QB_MIN(QB_MAX(specified_timeout,
1502                                   stonith_watchdog_timeout_ms / 1000), INT_MAX);
1503 
1504     if (timeout > specified_timeout) {
1505         if (action_specific) {
1506             crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= "
1507                      PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
1508                      "instead",
1509                      op->action, specified_timeout, device? device : "watchdog",
1510                      timeout, timeout);
1511 
1512         } else {
1513             crm_warn("Fencing timeout %ds is too short (must be >= "
1514                      PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds "
1515                      "instead",
1516                      specified_timeout, timeout, timeout);
1517         }
1518     }
1519 
1520     return timeout;
1521 }
1522 
1523 static int
1524 get_device_timeout(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
1525                    const peer_device_info_t *peer, const char *device,
1526                    bool with_delay)
1527 {
1528     int timeout = op->base_timeout;
1529     device_properties_t *props;
1530 
1531     timeout = valid_fencing_timeout(op->base_timeout, false, op, device);
1532 
1533     if (!peer || !device) {
1534         return timeout;
1535     }
1536 
1537     props = g_hash_table_lookup(peer->devices, device);
1538     if (!props) {
1539         return timeout;
1540     }
1541 
1542     if (props->custom_action_timeout[op->phase]) {
1543         timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase],
1544                                         true, op, device);
1545     }
1546 
1547     // op->client_delay < 0 means disable any static/random fencing delays
1548     if (with_delay && (op->client_delay >= 0)) {
1549         // delay_base is eventually limited by delay_max
1550         timeout += (props->delay_max[op->phase] > 0 ?
1551                     props->delay_max[op->phase] : props->delay_base[op->phase]);
1552     }
1553 
1554     return timeout;
1555 }
1556 
1557 struct timeout_data {
1558     const remote_fencing_op_t *op;
1559     const peer_device_info_t *peer;
1560     int total_timeout;
1561 };
1562 
1563 /*!
1564  * \internal
1565  * \brief Add timeout to a total if device has not been executed yet
1566  *
1567  * \param[in]     key        GHashTable key (device ID)
1568  * \param[in]     value      GHashTable value (device properties)
1569  * \param[in,out] user_data  Timeout data
1570  */
1571 static void
1572 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1573 {
1574     const char *device_id = key;
1575     device_properties_t *props = value;
1576     struct timeout_data *timeout = user_data;
1577 
1578     if (!props->executed[timeout->op->phase]
1579         && !props->disallowed[timeout->op->phase]) {
1580         timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer,
1581                                                      device_id, true);
1582     }
1583 }
1584 
1585 static int
1586 get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1587 {
1588     struct timeout_data timeout;
1589 
1590     timeout.op = op;
1591     timeout.peer = peer;
1592     timeout.total_timeout = 0;
1593 
1594     g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1595 
1596     return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1597 }
1598 
1599 static int
1600 get_op_total_timeout(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
1601                      const peer_device_info_t *chosen_peer)
1602 {
1603     long long total_timeout = 0;
1604     stonith_topology_t *tp = find_topology_for_host(op->target);
1605 
1606     if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
1607         int i;
1608         GList *device_list = NULL;
1609         GList *iter = NULL;
1610         GList *auto_list = NULL;
1611 
1612         if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)
1613             && (op->automatic_list != NULL)) {
1614             auto_list = g_list_copy(op->automatic_list);
1615         }
1616 
1617         /* Yep, this looks scary, nested loops all over the place.
1618          * Here is what is going on.
1619          * Loop1: Iterate through fencing levels.
1620          * Loop2: If a fencing level has devices, loop through each device
1621          * Loop3: For each device in a fencing level, see what peer owns it
1622          *        and what that peer has reported the timeout is for the device.
1623          */
1624         for (i = 0; i < ST__LEVEL_COUNT; i++) {
1625             if (!tp->levels[i]) {
1626                 continue;
1627             }
1628             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1629                 bool found = false;
1630 
1631                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1632                     const peer_device_info_t *peer = iter->data;
1633 
1634                     if (auto_list) {
1635                         GList *match = g_list_find_custom(auto_list, device_list->data,
1636                                         sort_strings);
1637                         if (match) {
1638                             auto_list = g_list_remove(auto_list, match->data);
1639                         }
1640                     }
1641 
1642                     if (find_peer_device(op, peer, device_list->data,
1643                                          fenced_support_flag(op->action))) {
1644                         total_timeout += get_device_timeout(op, peer,
1645                                                             device_list->data,
1646                                                             true);
1647                         found = true;
1648                         break;
1649                     }
1650                 }               /* End Loop3: match device with peer that owns device, find device's timeout period */
1651 
1652                 /* in case of watchdog-device we add the timeout to the budget
1653                    if didn't get a reply
1654                  */
1655                 if (!found && is_watchdog_fencing(op, device_list->data)) {
1656                     total_timeout += stonith_watchdog_timeout_ms / 1000;
1657                 }
1658             }                   /* End Loop2: iterate through devices at a specific level */
1659         }                       /*End Loop1: iterate through fencing levels */
1660 
1661         //Add only exists automatic_list device timeout
1662         if (auto_list) {
1663             for (iter = auto_list; iter != NULL; iter = iter->next) {
1664                 GList *iter2 = NULL;
1665 
1666                 for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) {
1667                     peer_device_info_t *peer = iter2->data;
1668                     if (find_peer_device(op, peer, iter->data, st_device_supports_on)) {
1669                         total_timeout += get_device_timeout(op, peer,
1670                                                             iter->data, true);
1671                         break;
1672                     }
1673                 }
1674             }
1675         }
1676 
1677         g_list_free(auto_list);
1678 
1679     } else if (chosen_peer) {
1680         total_timeout = get_peer_timeout(op, chosen_peer);
1681 
1682     } else {
1683         total_timeout = valid_fencing_timeout(op->base_timeout, false, op,
1684                                               NULL);
1685     }
1686 
1687     if (total_timeout <= 0) {
1688         total_timeout = op->base_timeout;
1689     }
1690 
1691     /* Take any requested fencing delay into account to prevent it from eating
1692      * up the total timeout.
1693      */
1694     if (op->client_delay > 0) {
1695         total_timeout += op->client_delay;
1696     }
1697     return (int) QB_MIN(total_timeout, INT_MAX);
1698 }
1699 
1700 static void
1701 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
1702 {
1703     GList *iter = NULL;
1704     xmlNode *update = NULL;
1705     const char *client_node = NULL;
1706     const char *client_id = NULL;
1707     const char *call_id = NULL;
1708 
1709     if (op->call_options & st_opt_sync_call) {
1710         /* There is no reason to report the timeout for a synchronous call. It
1711          * is impossible to use the reported timeout to do anything when the client
1712          * is blocking for the response.  This update is only important for
1713          * async calls that require a callback to report the results in. */
1714         return;
1715     } else if (!op->request) {
1716         return;
1717     }
1718 
1719     crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
1720     client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE);
1721     call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID);
1722     client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID);
1723     if (!client_node || !call_id || !client_id) {
1724         return;
1725     }
1726 
1727     if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
1728         // Client is connected to this node, so send update directly to them
1729         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1730         return;
1731     }
1732 
1733     /* The client is connected to another node, relay this update to them */
1734     update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1735     crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id);
1736     crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id);
1737     crm_xml_add(update, PCMK__XA_ST_CALLID, call_id);
1738     crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout);
1739 
1740     pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL,
1741                                               pcmk__node_search_cluster_member),
1742                                crm_msg_stonith_ng, update);
1743 
1744     free_xml(update);
1745 
1746     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1747         remote_fencing_op_t *dup = iter->data;
1748 
1749         crm_trace("Reporting timeout for duplicate %.8s to client %s",
1750                   dup->id, dup->client_name);
1751         report_timeout_period(iter->data, op_timeout);
1752     }
1753 }
1754 
1755 /*!
1756  * \internal
1757  * \brief Advance an operation to the next device in its topology
1758  *
1759  * \param[in,out] op      Fencer operation to advance
1760  * \param[in]     device  ID of device that just completed
1761  * \param[in,out] msg     If not NULL, XML reply of last delegated operation
1762  */
1763 static void
1764 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1765                                  xmlNode *msg)
1766 {
1767     /* Advance to the next device at this topology level, if any */
1768     if (op->devices) {
1769         op->devices = op->devices->next;
1770     }
1771 
1772     /* Handle automatic unfencing if an "on" action was requested */
1773     if ((op->phase == st_phase_requested)
1774         && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) {
1775         /* If the device we just executed was required, it's not anymore */
1776         remove_required_device(op, device);
1777 
1778         /* If there are no more devices at this topology level, run through any
1779          * remaining devices with automatic unfencing
1780          */
1781         if (op->devices == NULL) {
1782             op->devices = op->automatic_list;
1783         }
1784     }
1785 
1786     if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1787         /* We're done with this level and with required devices, but we had
1788          * remapped "reboot" to "off", so start over with "on". If any devices
1789          * need to be turned back on, op->devices will be non-NULL after this.
1790          */
1791         op_phase_on(op);
1792     }
1793 
1794     // This function is only called if the previous device succeeded
1795     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1796 
1797     if (op->devices) {
1798         /* Necessary devices remain, so execute the next one */
1799         crm_trace("Next targeting %s on behalf of %s@%s",
1800                   op->target, op->client_name, op->originator);
1801 
1802         // The requested delay has been applied for the first device
1803         if (op->client_delay > 0) {
1804             op->client_delay = 0;
1805         }
1806 
1807         request_peer_fencing(op, NULL);
1808     } else {
1809         /* We're done with all devices and phases, so finalize operation */
1810         crm_trace("Marking complex fencing op targeting %s as complete",
1811                   op->target);
1812         op->state = st_done;
1813         finalize_op(op, msg, false);
1814     }
1815 }
1816 
1817 static gboolean
1818 check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
1819 {
1820     if (node_does_watchdog_fencing(op->target)) {
1821         guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX);
1822 
1823         crm_notice("Waiting %s for %s to self-fence (%s) for "
1824                    "client %s " CRM_XS " id=%.8s",
1825                    pcmk__readable_interval(timeout_ms), op->target, op->action,
1826                    op->client_name, op->id);
1827 
1828         if (op->op_timer_one) {
1829             g_source_remove(op->op_timer_one);
1830         }
1831         op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done,
1832                                          op);
1833         return TRUE;
1834     } else {
1835         crm_debug("Skipping fallback to watchdog-fencing as %s is "
1836                  "not in host-list", op->target);
1837     }
1838     return FALSE;
1839 }
1840 
1841 /*!
1842  * \internal
1843  * \brief Ask a peer to execute a fencing operation
1844  *
1845  * \param[in,out] op      Fencing operation to be executed
1846  * \param[in,out] peer    If NULL or topology is in use, choose best peer to
1847  *                        execute the fencing, otherwise use this peer
1848  */
1849 static void
1850 request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1851 {
1852     const char *device = NULL;
1853     int timeout;
1854 
1855     CRM_CHECK(op != NULL, return);
1856 
1857     crm_trace("Action %.8s targeting %s for %s is %s",
1858               op->id, op->target, op->client_name,
1859               stonith_op_state_str(op->state));
1860 
1861     if ((op->phase == st_phase_on) && (op->devices != NULL)) {
1862         /* We are in the "on" phase of a remapped topology reboot. If this
1863          * device has pcmk_reboot_action="off", or doesn't support the "on"
1864          * action, skip it.
1865          *
1866          * We can't check device properties at this point because we haven't
1867          * chosen a peer for this stage yet. Instead, we check the local node's
1868          * knowledge about the device. If different versions of the fence agent
1869          * are installed on different nodes, there's a chance this could be
1870          * mistaken, but the worst that could happen is we don't try turning the
1871          * node back on when we should.
1872          */
1873         device = op->devices->data;
1874         if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF,
1875                          pcmk__str_none)) {
1876             crm_info("Not turning %s back on using %s because the device is "
1877                      "configured to stay off (pcmk_reboot_action='off')",
1878                      op->target, device);
1879             advance_topology_device_in_level(op, device, NULL);
1880             return;
1881         }
1882         if (!fenced_device_supports_on(device)) {
1883             crm_info("Not turning %s back on using %s because the agent "
1884                      "doesn't support 'on'", op->target, device);
1885             advance_topology_device_in_level(op, device, NULL);
1886             return;
1887         }
1888     }
1889 
1890     timeout = op->base_timeout;
1891     if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
1892         peer = stonith_choose_peer(op);
1893     }
1894 
1895     if (!op->op_timer_total) {
1896         op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
1897         op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1898         report_timeout_period(op, op->total_timeout);
1899         crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s"
1900                  CRM_XS "id=%.8s",
1901                  op->total_timeout, op->target, op->client_name, op->id);
1902     }
1903 
1904     if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
1905         /* Ignore the caller's peer preference if topology is in use, because
1906          * that peer might not have access to the required device. With
1907          * topology, stonith_choose_peer() removes the device from further
1908          * consideration, so the timeout must be calculated beforehand.
1909          *
1910          * @TODO Basing the total timeout on the caller's preferred peer (above)
1911          *       is less than ideal.
1912          */
1913         peer = stonith_choose_peer(op);
1914 
1915         device = op->devices->data;
1916         /* Fencing timeout sent to peer takes no delay into account.
1917          * The peer will add a dedicated timer for any delay upon
1918          * schedule_stonith_command().
1919          */
1920         timeout = get_device_timeout(op, peer, device, false);
1921     }
1922 
1923     if (peer) {
1924         int timeout_one = 0;
1925         xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1926         const crm_node_t *peer_node =
1927             pcmk__get_node(0, peer->host, NULL,
1928                            pcmk__node_search_cluster_member);
1929 
1930         if (op->client_delay > 0) {
1931            /* Take requested fencing delay into account to prevent it from
1932             * eating up the timeout.
1933             */
1934             timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay;
1935         }
1936 
1937         crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id);
1938         crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target);
1939         crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action);
1940         crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator);
1941         crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id);
1942         crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name);
1943         crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout);
1944         crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options);
1945         crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay);
1946 
1947         if (device) {
1948             timeout_one += TIMEOUT_MULTIPLY_FACTOR *
1949                            get_device_timeout(op, peer, device, true);
1950             crm_notice("Requesting that %s perform '%s' action targeting %s "
1951                        "using %s " CRM_XS " for client %s (%ds)",
1952                        peer->host, op->action, op->target, device,
1953                        op->client_name, timeout_one);
1954             crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device);
1955 
1956         } else {
1957             timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1958             crm_notice("Requesting that %s perform '%s' action targeting %s "
1959                        CRM_XS " for client %s (%ds, %s)",
1960                        peer->host, op->action, op->target, op->client_name,
1961                        timeout_one,
1962                        pcmk__readable_interval(stonith_watchdog_timeout_ms));
1963         }
1964 
1965         op->state = st_exec;
1966         if (op->op_timer_one) {
1967             g_source_remove(op->op_timer_one);
1968             op->op_timer_one = 0;
1969         }
1970 
1971         if (!is_watchdog_fencing(op, device)
1972             || !check_watchdog_fencing_and_wait(op)) {
1973 
1974             /* Some thoughts about self-fencing cases reaching this point:
1975                - Actually check in check_watchdog_fencing_and_wait
1976                  shouldn't fail if STONITH_WATCHDOG_ID is
1977                  chosen as fencing-device and it being present implies
1978                  watchdog-fencing is enabled anyway
1979                - If watchdog-fencing is disabled either in general or for
1980                  a specific target - detected in check_watchdog_fencing_and_wait -
1981                  for some other kind of self-fencing we can't expect
1982                  a success answer but timeout is fine if the node doesn't
1983                  come back in between
1984                - Delicate might be the case where we have watchdog-fencing
1985                  enabled for a node but the watchdog-fencing-device isn't
1986                  explicitly chosen for suicide. Local pe-execution in sbd
1987                  may detect the node as unclean and lead to timely suicide.
1988                  Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT
1989                  at least is questionable.
1990              */
1991 
1992             /* coming here we're not waiting for watchdog timeout -
1993                thus engage timer with timout evaluated before */
1994             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1995         }
1996 
1997         pcmk__cluster_send_message(peer_node, crm_msg_stonith_ng, remote_op);
1998         peer->tried = TRUE;
1999         free_xml(remote_op);
2000         return;
2001 
2002     } else if (op->phase == st_phase_on) {
2003         /* A remapped "on" cannot be executed, but the node was already
2004          * turned off successfully, so ignore the error and continue.
2005          */
2006         crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
2007                  "after successful 'off'", device, op->target);
2008         advance_topology_device_in_level(op, device, NULL);
2009         return;
2010 
2011     } else if (op->owner == FALSE) {
2012         crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
2013                 op->action, op->target, op->client_name);
2014 
2015     } else if (op->query_timer == 0) {
2016         /* We've exhausted all available peers */
2017         crm_info("No remaining peers capable of fencing (%s) %s for client %s "
2018                  CRM_XS " state=%s", op->action, op->target, op->client_name,
2019                  stonith_op_state_str(op->state));
2020         CRM_CHECK(op->state < st_done, return);
2021         finalize_timed_out_op(op, "All nodes failed, or are unable, to "
2022                                   "fence target");
2023 
2024     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
2025         /* if the operation never left the query state,
2026          * but we have all the expected replies, then no devices
2027          * are available to execute the fencing operation. */
2028 
2029         if (is_watchdog_fencing(op, device)
2030             && check_watchdog_fencing_and_wait(op)) {
2031             /* Consider a watchdog fencing targeting an offline node executing
2032              * once it starts waiting for the target to self-fence. So that when
2033              * the query timer pops, remote_op_query_timeout() considers the
2034              * fencing already in progress.
2035              */
2036             op->state = st_exec;
2037             return;
2038         }
2039 
2040         if (op->state == st_query) {
2041             crm_info("No peers (out of %d) have devices capable of fencing "
2042                      "(%s) %s for client %s " CRM_XS " state=%s",
2043                      op->replies, op->action, op->target, op->client_name,
2044                      stonith_op_state_str(op->state));
2045 
2046             pcmk__reset_result(&op->result);
2047             pcmk__set_result(&op->result, CRM_EX_ERROR,
2048                              PCMK_EXEC_NO_FENCE_DEVICE, NULL);
2049         } else {
2050             if (pcmk_is_set(op->call_options, st_opt_topology)) {
2051                 pcmk__reset_result(&op->result);
2052                 pcmk__set_result(&op->result, CRM_EX_ERROR,
2053                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
2054             }
2055             /* ... else use existing result from previous failed attempt
2056              * (topology is not in use, and no devices remain to be attempted).
2057              * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
2058              * prevent finalize_op() from setting the correct delegate if
2059              * needed.
2060              */
2061 
2062             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
2063                      "for client %s " CRM_XS " state=%s",
2064                      op->replies, op->action, op->target, op->client_name,
2065                      stonith_op_state_str(op->state));
2066         }
2067 
2068         op->state = st_failed;
2069         finalize_op(op, NULL, false);
2070 
2071     } else {
2072         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
2073                  "for client %s " CRM_XS " id=%.8s",
2074                  op->action, op->target, (device? " using " : ""),
2075                  (device? device : ""), op->client_name, op->id);
2076     }
2077 }
2078 
2079 /*!
2080  * \internal
2081  * \brief Comparison function for sorting query results
2082  *
2083  * \param[in] a  GList item to compare
2084  * \param[in] b  GList item to compare
2085  *
2086  * \return Per the glib documentation, "a negative integer if the first value
2087  *         comes before the second, 0 if they are equal, or a positive integer
2088  *         if the first value comes after the second."
2089  */
2090 static gint
2091 sort_peers(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
2092 {
2093     const peer_device_info_t *peer_a = a;
2094     const peer_device_info_t *peer_b = b;
2095 
2096     return (peer_b->ndevices - peer_a->ndevices);
2097 }
2098 
2099 /*!
2100  * \internal
2101  * \brief Determine if all the devices in the topology are found or not
2102  *
2103  * \param[in] op  Fencing operation with topology to check
2104  */
2105 static gboolean
2106 all_topology_devices_found(const remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
2107 {
2108     GList *device = NULL;
2109     GList *iter = NULL;
2110     device_properties_t *match = NULL;
2111     stonith_topology_t *tp = NULL;
2112     gboolean skip_target = FALSE;
2113     int i;
2114 
2115     tp = find_topology_for_host(op->target);
2116     if (!tp) {
2117         return FALSE;
2118     }
2119     if (pcmk__is_fencing_action(op->action)) {
2120         /* Don't count the devices on the target node if we are killing
2121          * the target node. */
2122         skip_target = TRUE;
2123     }
2124 
2125     for (i = 0; i < ST__LEVEL_COUNT; i++) {
2126         for (device = tp->levels[i]; device; device = device->next) {
2127             match = NULL;
2128             for (iter = op->query_results; iter && !match; iter = iter->next) {
2129                 peer_device_info_t *peer = iter->data;
2130 
2131                 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
2132                     continue;
2133                 }
2134                 match = find_peer_device(op, peer, device->data, st_device_supports_none);
2135             }
2136             if (!match) {
2137                 return FALSE;
2138             }
2139         }
2140     }
2141 
2142     return TRUE;
2143 }
2144 
2145 /*!
2146  * \internal
2147  * \brief Parse action-specific device properties from XML
2148  *
2149  * \param[in]     xml     XML element containing the properties
2150  * \param[in]     peer    Name of peer that sent XML (for logs)
2151  * \param[in]     device  Device ID (for logs)
2152  * \param[in]     action  Action the properties relate to (for logs)
2153  * \param[in,out] op      Fencing operation that properties are being parsed for
2154  * \param[in]     phase   Phase the properties relate to
2155  * \param[in,out] props   Device properties to update
2156  */
2157 static void
2158 parse_action_specific(const xmlNode *xml, const char *peer, const char *device,
     /* [previous][next][first][last][top][bottom][index][help] */
2159                       const char *action, remote_fencing_op_t *op,
2160                       enum st_remap_phase phase, device_properties_t *props)
2161 {
2162     props->custom_action_timeout[phase] = 0;
2163     crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT,
2164                           &props->custom_action_timeout[phase]);
2165     if (props->custom_action_timeout[phase]) {
2166         crm_trace("Peer %s with device %s returned %s action timeout %ds",
2167                   peer, device, action, props->custom_action_timeout[phase]);
2168     }
2169 
2170     props->delay_max[phase] = 0;
2171     crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]);
2172     if (props->delay_max[phase]) {
2173         crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s",
2174                   peer, device, props->delay_max[phase], action);
2175     }
2176 
2177     props->delay_base[phase] = 0;
2178     crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE,
2179                           &props->delay_base[phase]);
2180     if (props->delay_base[phase]) {
2181         crm_trace("Peer %s with device %s returned base delay %ds for %s",
2182                   peer, device, props->delay_base[phase], action);
2183     }
2184 
2185     /* Handle devices with automatic unfencing */
2186     if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
2187         int required = 0;
2188 
2189         crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required);
2190         if (required) {
2191             crm_trace("Peer %s requires device %s to execute for action %s",
2192                       peer, device, action);
2193             add_required_device(op, device);
2194         }
2195     }
2196 
2197     /* If a reboot is remapped to off+on, it's possible that a node is allowed
2198      * to perform one action but not another.
2199      */
2200     if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) {
2201         props->disallowed[phase] = TRUE;
2202         crm_trace("Peer %s is disallowed from executing %s for device %s",
2203                   peer, action, device);
2204     }
2205 }
2206 
2207 /*!
2208  * \internal
2209  * \brief Parse one device's properties from peer's XML query reply
2210  *
2211  * \param[in]     xml       XML node containing device properties
2212  * \param[in,out] op        Operation that query and reply relate to
2213  * \param[in,out] peer      Peer's device information
2214  * \param[in]     device    ID of device being parsed
2215  */
2216 static void
2217 add_device_properties(const xmlNode *xml, remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
2218                       peer_device_info_t *peer, const char *device)
2219 {
2220     xmlNode *child;
2221     int verified = 0;
2222     device_properties_t *props =
2223         pcmk__assert_alloc(1, sizeof(device_properties_t));
2224     int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */
2225 
2226     /* Add a new entry to this peer's devices list */
2227     g_hash_table_insert(peer->devices, pcmk__str_copy(device), props);
2228 
2229     /* Peers with verified (monitored) access will be preferred */
2230     crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified);
2231     if (verified) {
2232         crm_trace("Peer %s has confirmed a verified device %s",
2233                   peer->host, device);
2234         props->verified = TRUE;
2235     }
2236 
2237     crm_element_value_int(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &flags);
2238     props->device_support_flags = flags;
2239 
2240     /* Parse action-specific device properties */
2241     parse_action_specific(xml, peer->host, device, op_requested_action(op),
2242                           op, st_phase_requested, props);
2243     for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
2244          child = pcmk__xe_next(child)) {
2245         /* Replies for "reboot" operations will include the action-specific
2246          * values for "off" and "on" in child elements, just in case the reboot
2247          * winds up getting remapped.
2248          */
2249         if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) {
2250             parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF,
2251                                   op, st_phase_off, props);
2252 
2253         } else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON,
2254                                 pcmk__str_none)) {
2255             parse_action_specific(child, peer->host, device, PCMK_ACTION_ON,
2256                                   op, st_phase_on, props);
2257         }
2258     }
2259 }
2260 
2261 /*!
2262  * \internal
2263  * \brief Parse a peer's XML query reply and add it to operation's results
2264  *
2265  * \param[in,out] op        Operation that query and reply relate to
2266  * \param[in]     host      Name of peer that sent this reply
2267  * \param[in]     ndevices  Number of devices expected in reply
2268  * \param[in]     xml       XML node containing device list
2269  *
2270  * \return Newly allocated result structure with parsed reply
2271  */
2272 static peer_device_info_t *
2273 add_result(remote_fencing_op_t *op, const char *host, int ndevices,
     /* [previous][next][first][last][top][bottom][index][help] */
2274            const xmlNode *xml)
2275 {
2276     peer_device_info_t *peer = pcmk__assert_alloc(1,
2277                                                   sizeof(peer_device_info_t));
2278     xmlNode *child;
2279 
2280     peer->host = pcmk__str_copy(host);
2281     peer->devices = pcmk__strkey_table(free, free);
2282 
2283     /* Each child element describes one capable device available to the peer */
2284     for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL;
2285          child = pcmk__xe_next(child)) {
2286         const char *device = pcmk__xe_id(child);
2287 
2288         if (device) {
2289             add_device_properties(child, op, peer, device);
2290         }
2291     }
2292 
2293     peer->ndevices = g_hash_table_size(peer->devices);
2294     CRM_CHECK(ndevices == peer->ndevices,
2295               crm_err("Query claimed to have %d device%s but %d found",
2296                       ndevices, pcmk__plural_s(ndevices), peer->ndevices));
2297 
2298     op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
2299     return peer;
2300 }
2301 
2302 /*!
2303  * \internal
2304  * \brief Handle a peer's reply to our fencing query
2305  *
2306  * Parse a query result from XML and store it in the remote operation
2307  * table, and when enough replies have been received, issue a fencing request.
2308  *
2309  * \param[in] msg  XML reply received
2310  *
2311  * \return pcmk_ok on success, -errno on error
2312  *
2313  * \note See initiate_remote_stonith_op() for how the XML query was initially
2314  *       formed, and stonith_query() for how the peer formed its XML reply.
2315  */
2316 int
2317 process_remote_stonith_query(xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2318 {
2319     int ndevices = 0;
2320     gboolean host_is_target = FALSE;
2321     gboolean have_all_replies = FALSE;
2322     const char *id = NULL;
2323     const char *host = NULL;
2324     remote_fencing_op_t *op = NULL;
2325     peer_device_info_t *peer = NULL;
2326     uint32_t replies_expected;
2327     xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
2328 
2329     CRM_CHECK(dev != NULL, return -EPROTO);
2330 
2331     id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
2332     CRM_CHECK(id != NULL, return -EPROTO);
2333 
2334     dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR);
2335     CRM_CHECK(dev != NULL, return -EPROTO);
2336     crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices);
2337 
2338     op = g_hash_table_lookup(stonith_remote_op_list, id);
2339     if (op == NULL) {
2340         crm_debug("Received query reply for unknown or expired operation %s",
2341                   id);
2342         return -EOPNOTSUPP;
2343     }
2344 
2345     replies_expected = fencing_active_peers();
2346     if (op->replies_expected < replies_expected) {
2347         replies_expected = op->replies_expected;
2348     }
2349     if ((++op->replies >= replies_expected) && (op->state == st_query)) {
2350         have_all_replies = TRUE;
2351     }
2352     host = crm_element_value(msg, PCMK__XA_SRC);
2353     host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
2354 
2355     crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
2356              op->replies, replies_expected, host,
2357              op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
2358     if (ndevices > 0) {
2359         peer = add_result(op, host, ndevices, dev);
2360     }
2361 
2362     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2363 
2364     if (pcmk_is_set(op->call_options, st_opt_topology)) {
2365         /* If we start the fencing before all the topology results are in,
2366          * it is possible fencing levels will be skipped because of the missing
2367          * query results. */
2368         if (op->state == st_query && all_topology_devices_found(op)) {
2369             /* All the query results are in for the topology, start the fencing ops. */
2370             crm_trace("All topology devices found");
2371             request_peer_fencing(op, peer);
2372 
2373         } else if (have_all_replies) {
2374             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
2375                      replies_expected, op->replies);
2376             request_peer_fencing(op, NULL);
2377         }
2378 
2379     } else if (op->state == st_query) {
2380         int nverified = count_peer_devices(op, peer, TRUE,
2381                                            fenced_support_flag(op->action));
2382 
2383         /* We have a result for a non-topology fencing op that looks promising,
2384          * go ahead and start fencing before query timeout */
2385         if ((peer != NULL) && !host_is_target && nverified) {
2386             /* we have a verified device living on a peer that is not the target */
2387             crm_trace("Found %d verified device%s",
2388                       nverified, pcmk__plural_s(nverified));
2389             request_peer_fencing(op, peer);
2390 
2391         } else if (have_all_replies) {
2392             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
2393                      replies_expected, op->replies);
2394             request_peer_fencing(op, NULL);
2395 
2396         } else {
2397             crm_trace("Waiting for more peer results before launching fencing operation");
2398         }
2399 
2400     } else if ((peer != NULL) && (op->state == st_done)) {
2401         crm_info("Discarding query result from %s (%d device%s): "
2402                  "Operation is %s", peer->host,
2403                  peer->ndevices, pcmk__plural_s(peer->ndevices),
2404                  stonith_op_state_str(op->state));
2405     }
2406 
2407     return pcmk_ok;
2408 }
2409 
2410 /*!
2411  * \internal
2412  * \brief Handle a peer's reply to a fencing request
2413  *
2414  * Parse a fencing reply from XML, and either finalize the operation
2415  * or attempt another device as appropriate.
2416  *
2417  * \param[in] msg  XML reply received
2418  */
2419 void
2420 fenced_process_fencing_reply(xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2421 {
2422     const char *id = NULL;
2423     const char *device = NULL;
2424     remote_fencing_op_t *op = NULL;
2425     xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR);
2426     pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2427 
2428     CRM_CHECK(dev != NULL, return);
2429 
2430     id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP);
2431     CRM_CHECK(id != NULL, return);
2432 
2433     dev = stonith__find_xe_with_result(msg);
2434     CRM_CHECK(dev != NULL, return);
2435 
2436     stonith__xe_get_result(dev, &result);
2437 
2438     device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID);
2439 
2440     if (stonith_remote_op_list) {
2441         op = g_hash_table_lookup(stonith_remote_op_list, id);
2442     }
2443 
2444     if ((op == NULL) && pcmk__result_ok(&result)) {
2445         /* Record successful fencing operations */
2446         const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID);
2447 
2448         op = create_remote_stonith_op(client_id, dev, TRUE);
2449     }
2450 
2451     if (op == NULL) {
2452         /* Could be for an event that began before we started */
2453         /* TODO: Record the op for later querying */
2454         crm_info("Received peer result of unknown or expired operation %s", id);
2455         pcmk__reset_result(&result);
2456         return;
2457     }
2458 
2459     pcmk__reset_result(&op->result);
2460     op->result = result; // The operation takes ownership of the result
2461 
2462     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
2463         crm_err("Received outdated reply for device %s (instead of %s) to "
2464                 "fence (%s) %s. Operation already timed out at peer level.",
2465                 device, (const char *) op->devices->data, op->action, op->target);
2466         return;
2467     }
2468 
2469     if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT),
2470                      PCMK__VALUE_BROADCAST, pcmk__str_none)) {
2471 
2472         if (pcmk__result_ok(&op->result)) {
2473             op->state = st_done;
2474         } else {
2475             op->state = st_failed;
2476         }
2477         finalize_op(op, msg, false);
2478         return;
2479 
2480     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
2481         /* If this isn't a remote level broadcast, and we are not the
2482          * originator of the operation, we should not be receiving this msg. */
2483         crm_err("Received non-broadcast fencing result for operation %.8s "
2484                 "we do not own (device %s targeting %s)",
2485                 op->id, device, op->target);
2486         return;
2487     }
2488 
2489     if (pcmk_is_set(op->call_options, st_opt_topology)) {
2490         const char *device = NULL;
2491         const char *reason = op->result.exit_reason;
2492 
2493         /* We own the op, and it is complete. broadcast the result to all nodes
2494          * and notify our local clients. */
2495         if (op->state == st_done) {
2496             finalize_op(op, msg, false);
2497             return;
2498         }
2499 
2500         device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID);
2501 
2502         if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
2503             /* A remapped "on" failed, but the node was already turned off
2504              * successfully, so ignore the error and continue.
2505              */
2506             crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
2507                      "after successful 'off'",
2508                      device, pcmk_exec_status_str(op->result.execution_status),
2509                      (reason == NULL)? "" : ": ",
2510                      (reason == NULL)? "" : reason,
2511                      op->target);
2512             pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2513         } else {
2514             crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: "
2515                        "%s%s%s%s",
2516                        op->action, op->target,
2517                        ((device == NULL)? "" : " using "),
2518                        ((device == NULL)? "" : device),
2519                        op->client_name,
2520                        op->originator,
2521                        pcmk_exec_status_str(op->result.execution_status),
2522                        (reason == NULL)? "" : " (",
2523                        (reason == NULL)? "" : reason,
2524                        (reason == NULL)? "" : ")");
2525         }
2526 
2527         if (pcmk__result_ok(&op->result)) {
2528             /* An operation completed successfully. Try another device if
2529              * necessary, otherwise mark the operation as done. */
2530             advance_topology_device_in_level(op, device, msg);
2531             return;
2532         } else {
2533             /* This device failed, time to try another topology level. If no other
2534              * levels are available, mark this operation as failed and report results. */
2535             if (advance_topology_level(op, false) != pcmk_rc_ok) {
2536                 op->state = st_failed;
2537                 finalize_op(op, msg, false);
2538                 return;
2539             }
2540         }
2541 
2542     } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
2543         op->state = st_done;
2544         finalize_op(op, msg, false);
2545         return;
2546 
2547     } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
2548                && (op->devices == NULL)) {
2549         /* If the operation timed out don't bother retrying other peers. */
2550         op->state = st_failed;
2551         finalize_op(op, msg, false);
2552         return;
2553 
2554     } else {
2555         /* fall-through and attempt other fencing action using another peer */
2556     }
2557 
2558     /* Retry on failure */
2559     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
2560               op->target, op->originator, op->client_name,
2561               pcmk_exec_status_str(op->result.execution_status));
2562     request_peer_fencing(op, NULL);
2563 }
2564 
2565 gboolean
2566 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
2567 {
2568     GHashTableIter iter;
2569     time_t now = time(NULL);
2570     remote_fencing_op_t *rop = NULL;
2571 
2572     if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2573         action == NULL) {
2574         return FALSE;
2575     }
2576 
2577     g_hash_table_iter_init(&iter, stonith_remote_op_list);
2578     while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2579         if (strcmp(rop->target, target) != 0) {
2580             continue;
2581         } else if (rop->state != st_done) {
2582             continue;
2583         /* We don't have to worry about remapped reboots here
2584          * because if state is done, any remapping has been undone
2585          */
2586         } else if (strcmp(rop->action, action) != 0) {
2587             continue;
2588         } else if ((rop->completed + tolerance) < now) {
2589             continue;
2590         }
2591 
2592         crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2593                    target, action, tolerance, rop->delegate, rop->originator);
2594         return TRUE;
2595     }
2596     return FALSE;
2597 }

/* [previous][next][first][last][top][bottom][index][help] */