root/daemons/fenced/fenced_remote.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sort_strings
  2. free_remote_query
  3. free_stonith_remote_op_list
  4. count_peer_device
  5. count_peer_devices
  6. find_peer_device
  7. grab_peer_device
  8. clear_remote_op_timers
  9. free_remote_op
  10. init_stonith_remote_op_hash_table
  11. op_requested_action
  12. op_phase_off
  13. op_phase_on
  14. undo_op_remap
  15. fencing_result2xml
  16. fenced_broadcast_op_result
  17. handle_local_reply_and_notify
  18. finalize_op_duplicates
  19. delegate_from_xml
  20. finalize_op
  21. remote_op_watchdog_done
  22. remote_op_timeout_one
  23. finalize_timed_out_op
  24. remote_op_timeout
  25. remote_op_query_timeout
  26. topology_is_empty
  27. add_required_device
  28. remove_required_device
  29. set_op_device_list
  30. topology_matches
  31. find_topology_for_host
  32. advance_topology_level
  33. merge_duplicates
  34. fencing_active_peers
  35. fenced_handle_manual_confirmation
  36. create_remote_stonith_op
  37. initiate_remote_stonith_op
  38. find_best_peer
  39. stonith_choose_peer
  40. get_device_timeout
  41. add_device_timeout
  42. get_peer_timeout
  43. get_op_total_timeout
  44. report_timeout_period
  45. advance_topology_device_in_level
  46. check_watchdog_fencing_and_wait
  47. request_peer_fencing
  48. sort_peers
  49. all_topology_devices_found
  50. parse_action_specific
  51. add_device_properties
  52. add_result
  53. process_remote_stonith_query
  54. fenced_process_fencing_reply
  55. stonith_check_fence_tolerance

   1 /*
   2  * Copyright 2009-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <stdio.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 #include <sys/stat.h>
  17 #include <unistd.h>
  18 #include <sys/utsname.h>
  19 
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <ctype.h>
  24 #include <regex.h>
  25 
  26 #include <crm/crm.h>
  27 #include <crm/msg_xml.h>
  28 #include <crm/common/ipc.h>
  29 #include <crm/common/ipc_internal.h>
  30 #include <crm/cluster/internal.h>
  31 
  32 #include <crm/stonith-ng.h>
  33 #include <crm/fencing/internal.h>
  34 #include <crm/common/xml.h>
  35 #include <crm/common/xml_internal.h>
  36 
  37 #include <crm/common/util.h>
  38 #include <pacemaker-fenced.h>
  39 
  40 #define TIMEOUT_MULTIPLY_FACTOR 1.2
  41 
  42 /* When one fencer queries its peers for devices able to handle a fencing
  43  * request, each peer will reply with a list of such devices available to it.
  44  * Each reply will be parsed into a peer_device_info_t, with each device's
  45  * information kept in a device_properties_t.
  46  */
  47 
  48 typedef struct device_properties_s {
  49     /* Whether access to this device has been verified */
  50     gboolean verified;
  51 
  52     /* The remaining members are indexed by the operation's "phase" */
  53 
  54     /* Whether this device has been executed in each phase */
  55     gboolean executed[st_phase_max];
  56     /* Whether this device is disallowed from executing in each phase */
  57     gboolean disallowed[st_phase_max];
  58     /* Action-specific timeout for each phase */
  59     int custom_action_timeout[st_phase_max];
  60     /* Action-specific maximum random delay for each phase */
  61     int delay_max[st_phase_max];
  62     /* Action-specific base delay for each phase */
  63     int delay_base[st_phase_max];
  64 } device_properties_t;
  65 
  66 typedef struct {
  67     /* Name of peer that sent this result */
  68     char *host;
  69     /* Only try peers for non-topology based operations once */
  70     gboolean tried;
  71     /* Number of entries in the devices table */
  72     int ndevices;
  73     /* Devices available to this host that are capable of fencing the target */
  74     GHashTable *devices;
  75 } peer_device_info_t;
  76 
  77 GHashTable *stonith_remote_op_list = NULL;
  78 
  79 extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data,
  80                                   int call_options);
  81 
  82 static void request_peer_fencing(remote_fencing_op_t *op,
  83                                  peer_device_info_t *peer);
  84 static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
  85 static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
  86 static int get_op_total_timeout(const remote_fencing_op_t *op,
  87                                 const peer_device_info_t *chosen_peer);
  88 
  89 static gint
  90 sort_strings(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
  91 {
  92     return strcmp(a, b);
  93 }
  94 
  95 static void
  96 free_remote_query(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  97 {
  98     if (data != NULL) {
  99         peer_device_info_t *peer = data;
 100 
 101         g_hash_table_destroy(peer->devices);
 102         free(peer->host);
 103         free(peer);
 104     }
 105 }
 106 
 107 void
 108 free_stonith_remote_op_list()
     /* [previous][next][first][last][top][bottom][index][help] */
 109 {
 110     if (stonith_remote_op_list != NULL) {
 111         g_hash_table_destroy(stonith_remote_op_list);
 112         stonith_remote_op_list = NULL;
 113     }
 114 }
 115 
 116 struct peer_count_data {
 117     const remote_fencing_op_t *op;
 118     gboolean verified_only;
 119     int count;
 120 };
 121 
 122 /*!
 123  * \internal
 124  * \brief Increment a counter if a device has not been executed yet
 125  *
 126  * \param[in] key        Device ID (ignored)
 127  * \param[in] value      Device properties
 128  * \param[in] user_data  Peer count data
 129  */
 130 static void
 131 count_peer_device(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 132 {
 133     device_properties_t *props = (device_properties_t*)value;
 134     struct peer_count_data *data = user_data;
 135 
 136     if (!props->executed[data->op->phase]
 137         && (!data->verified_only || props->verified)) {
 138         ++(data->count);
 139     }
 140 }
 141 
 142 /*!
 143  * \internal
 144  * \brief Check the number of available devices in a peer's query results
 145  *
 146  * \param[in] op             Operation that results are for
 147  * \param[in] peer           Peer to count
 148  * \param[in] verified_only  Whether to count only verified devices
 149  *
 150  * \return Number of devices available to peer that were not already executed
 151  */
 152 static int
 153 count_peer_devices(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 154                    const peer_device_info_t *peer, gboolean verified_only)
 155 {
 156     struct peer_count_data data;
 157 
 158     data.op = op;
 159     data.verified_only = verified_only;
 160     data.count = 0;
 161     if (peer) {
 162         g_hash_table_foreach(peer->devices, count_peer_device, &data);
 163     }
 164     return data.count;
 165 }
 166 
 167 /*!
 168  * \internal
 169  * \brief Search for a device in a query result
 170  *
 171  * \param[in] op      Operation that result is for
 172  * \param[in] peer    Query result for a peer
 173  * \param[in] device  Device ID to search for
 174  *
 175  * \return Device properties if found, NULL otherwise
 176  */
 177 static device_properties_t *
 178 find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 179                  const char *device)
 180 {
 181     device_properties_t *props = g_hash_table_lookup(peer->devices, device);
 182 
 183     return (props && !props->executed[op->phase]
 184            && !props->disallowed[op->phase])? props : NULL;
 185 }
 186 
 187 /*!
 188  * \internal
 189  * \brief Find a device in a peer's device list and mark it as executed
 190  *
 191  * \param[in]     op                     Operation that peer result is for
 192  * \param[in,out] peer                   Peer with results to search
 193  * \param[in]     device                 ID of device to mark as done
 194  * \param[in]     verified_devices_only  Only consider verified devices
 195  *
 196  * \return TRUE if device was found and marked, FALSE otherwise
 197  */
 198 static gboolean
 199 grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 200                  const char *device, gboolean verified_devices_only)
 201 {
 202     device_properties_t *props = find_peer_device(op, peer, device);
 203 
 204     if ((props == NULL) || (verified_devices_only && !props->verified)) {
 205         return FALSE;
 206     }
 207 
 208     crm_trace("Removing %s from %s (%d remaining)",
 209               device, peer->host, count_peer_devices(op, peer, FALSE));
 210     props->executed[op->phase] = TRUE;
 211     return TRUE;
 212 }
 213 
 214 static void
 215 clear_remote_op_timers(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 216 {
 217     if (op->query_timer) {
 218         g_source_remove(op->query_timer);
 219         op->query_timer = 0;
 220     }
 221     if (op->op_timer_total) {
 222         g_source_remove(op->op_timer_total);
 223         op->op_timer_total = 0;
 224     }
 225     if (op->op_timer_one) {
 226         g_source_remove(op->op_timer_one);
 227         op->op_timer_one = 0;
 228     }
 229 }
 230 
 231 static void
 232 free_remote_op(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 233 {
 234     remote_fencing_op_t *op = data;
 235 
 236     crm_log_xml_debug(op->request, "Destroying");
 237 
 238     clear_remote_op_timers(op);
 239 
 240     free(op->id);
 241     free(op->action);
 242     free(op->delegate);
 243     free(op->target);
 244     free(op->client_id);
 245     free(op->client_name);
 246     free(op->originator);
 247 
 248     if (op->query_results) {
 249         g_list_free_full(op->query_results, free_remote_query);
 250     }
 251     if (op->request) {
 252         free_xml(op->request);
 253         op->request = NULL;
 254     }
 255     if (op->devices_list) {
 256         g_list_free_full(op->devices_list, free);
 257         op->devices_list = NULL;
 258     }
 259     g_list_free_full(op->automatic_list, free);
 260     g_list_free(op->duplicates);
 261 
 262     pcmk__reset_result(&op->result);
 263     free(op);
 264 }
 265 
 266 void
 267 init_stonith_remote_op_hash_table(GHashTable **table)
     /* [previous][next][first][last][top][bottom][index][help] */
 268 {
 269     if (*table == NULL) {
 270         *table = pcmk__strkey_table(NULL, free_remote_op);
 271     }
 272 }
 273 
 274 /*!
 275  * \internal
 276  * \brief Return an operation's originally requested action (before any remap)
 277  *
 278  * \param[in] op  Operation to check
 279  *
 280  * \return Operation's original action
 281  */
 282 static const char *
 283 op_requested_action(const remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285     return ((op->phase > st_phase_requested)? "reboot" : op->action);
 286 }
 287 
 288 /*!
 289  * \internal
 290  * \brief Remap a "reboot" operation to the "off" phase
 291  *
 292  * \param[in,out] op      Operation to remap
 293  */
 294 static void
 295 op_phase_off(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 296 {
 297     crm_info("Remapping multiple-device reboot targeting %s to 'off' "
 298              CRM_XS " id=%.8s", op->target, op->id);
 299     op->phase = st_phase_off;
 300 
 301     /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the
 302      * memory allocation at each phase.
 303      */
 304     strcpy(op->action, "off");
 305 }
 306 
 307 /*!
 308  * \internal
 309  * \brief Advance a remapped reboot operation to the "on" phase
 310  *
 311  * \param[in,out] op  Operation to remap
 312  */
 313 static void
 314 op_phase_on(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316     GList *iter = NULL;
 317 
 318     crm_info("Remapped 'off' targeting %s complete, "
 319              "remapping to 'on' for %s " CRM_XS " id=%.8s",
 320              op->target, op->client_name, op->id);
 321     op->phase = st_phase_on;
 322     strcpy(op->action, "on");
 323 
 324     /* Skip devices with automatic unfencing, because the cluster will handle it
 325      * when the node rejoins.
 326      */
 327     for (iter = op->automatic_list; iter != NULL; iter = iter->next) {
 328         GList *match = g_list_find_custom(op->devices_list, iter->data,
 329                                             sort_strings);
 330 
 331         if (match) {
 332             op->devices_list = g_list_remove(op->devices_list, match->data);
 333         }
 334     }
 335     g_list_free_full(op->automatic_list, free);
 336     op->automatic_list = NULL;
 337 
 338     /* Rewind device list pointer */
 339     op->devices = op->devices_list;
 340 }
 341 
 342 /*!
 343  * \internal
 344  * \brief Reset a remapped reboot operation
 345  *
 346  * \param[in,out] op  Operation to reset
 347  */
 348 static void
 349 undo_op_remap(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351     if (op->phase > 0) {
 352         crm_info("Undoing remap of reboot targeting %s for %s "
 353                  CRM_XS " id=%.8s", op->target, op->client_name, op->id);
 354         op->phase = st_phase_requested;
 355         strcpy(op->action, "reboot");
 356     }
 357 }
 358 
 359 /*!
 360  * \internal
 361  * \brief Create notification data XML for a fencing operation result
 362  *
 363  * \param[in] op      Fencer operation that completed
 364  *
 365  * \return Newly created XML to add as notification data
 366  * \note The caller is responsible for freeing the result.
 367  */
 368 static xmlNode *
 369 fencing_result2xml(remote_fencing_op_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 370 {
 371     xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
 372 
 373     crm_xml_add_int(notify_data, "state", op->state);
 374     crm_xml_add(notify_data, F_STONITH_TARGET, op->target);
 375     crm_xml_add(notify_data, F_STONITH_ACTION, op->action);
 376     crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate);
 377     crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id);
 378     crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator);
 379     crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
 380     crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
 381 
 382     stonith__xe_set_result(notify_data, &op->result);
 383     return notify_data;
 384 }
 385 
 386 /*!
 387  * \internal
 388  * \brief Broadcast a fence result notification to all CPG peers
 389  *
 390  * \param[in] op         Fencer operation that completed
 391  * \param[in] op_merged  Whether this operation is a duplicate of another
 392  */
 393 void
 394 fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
     /* [previous][next][first][last][top][bottom][index][help] */
 395 {
 396     static int count = 0;
 397     xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
 398     xmlNode *notify_data = fencing_result2xml(op);
 399 
 400     count++;
 401     crm_trace("Broadcasting result to peers");
 402     crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY);
 403     crm_xml_add(bcast, F_SUBTYPE, "broadcast");
 404     crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY);
 405     crm_xml_add_int(bcast, "count", count);
 406 
 407     if (op_merged) {
 408         pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true);
 409     }
 410 
 411     add_message_xml(bcast, F_STONITH_CALLDATA, notify_data);
 412     send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE);
 413     free_xml(notify_data);
 414     free_xml(bcast);
 415 
 416     return;
 417 }
 418 
 419 /*!
 420  * \internal
 421  * \brief Reply to a local request originator and notify all subscribed clients
 422  *
 423  * \param[in] op         Fencer operation that completed
 424  * \param[in] data       Top-level XML to add notification to
 425  */
 426 static void
 427 handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 428 {
 429     xmlNode *notify_data = NULL;
 430     xmlNode *reply = NULL;
 431     pcmk__client_t *client = NULL;
 432 
 433     if (op->notify_sent == TRUE) {
 434         /* nothing to do */
 435         return;
 436     }
 437 
 438     /* Do notification with a clean data object */
 439     crm_xml_add_int(data, "state", op->state);
 440     crm_xml_add(data, F_STONITH_TARGET, op->target);
 441     crm_xml_add(data, F_STONITH_OPERATION, op->action);
 442 
 443     reply = fenced_construct_reply(op->request, data, &op->result);
 444     crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
 445 
 446     /* Send fencing OP reply to local client that initiated fencing */
 447     client = pcmk__find_client_by_id(op->client_id);
 448     if (client == NULL) {
 449         crm_trace("Skipping reply to %s: no longer a client", op->client_id);
 450     } else {
 451         do_local_reply(reply, client, op->call_options);
 452     }
 453 
 454     /* bcast to all local clients that the fencing operation happend */
 455     notify_data = fencing_result2xml(op);
 456     fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
 457     free_xml(notify_data);
 458     fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
 459 
 460     /* mark this op as having notify's already sent */
 461     op->notify_sent = TRUE;
 462     free_xml(reply);
 463 }
 464 
 465 /*!
 466  * \internal
 467  * \brief Finalize all duplicates of a given fencer operation
 468  *
 469  * \param[in] op         Fencer operation that completed
 470  * \param[in] data       Top-level XML to add notification to
 471  */
 472 static void
 473 finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 474 {
 475     for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
 476         remote_fencing_op_t *other = iter->data;
 477 
 478         if (other->state == st_duplicate) {
 479             other->state = op->state;
 480             crm_debug("Performing duplicate notification for %s@%s: %s "
 481                       CRM_XS " id=%.8s",
 482                       other->client_name, other->originator,
 483                       pcmk_exec_status_str(op->result.execution_status),
 484                       other->id);
 485             pcmk__copy_result(&op->result, &other->result);
 486             finalize_op(other, data, true);
 487 
 488         } else {
 489             // Possible if (for example) it timed out already
 490             crm_err("Skipping duplicate notification for %s@%s "
 491                     CRM_XS " state=%s id=%.8s",
 492                     other->client_name, other->originator,
 493                     stonith_op_state_str(other->state), other->id);
 494         }
 495     }
 496 }
 497 
 498 static char *
 499 delegate_from_xml(xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
 500 {
 501     xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER);
 502 
 503     if (match == NULL) {
 504         return crm_element_value_copy(xml, F_ORIG);
 505     } else {
 506         return crm_element_value_copy(match, F_STONITH_DELEGATE);
 507     }
 508 }
 509 
 510 /*!
 511  * \internal
 512  * \brief Finalize a peer fencing operation
 513  *
 514  * Clean up after a fencing operation completes. This function has two code
 515  * paths: the executioner uses it to broadcast the result to CPG peers, and then
 516  * each peer (including the executioner) uses it to process that broadcast and
 517  * notify its IPC clients of the result.
 518  *
 519  * \param[in] op      Fencer operation that completed
 520  * \param[in] data    If not NULL, XML reply of last delegated fencing operation
 521  * \param[in] dup     Whether this operation is a duplicate of another
 522  *                    (in which case, do not broadcast the result)
 523  *
 524  *  \note The operation result should be set before calling this function.
 525  */
 526 static void
 527 finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
     /* [previous][next][first][last][top][bottom][index][help] */
 528 {
 529     int level = LOG_ERR;
 530     const char *subt = NULL;
 531     xmlNode *local_data = NULL;
 532     gboolean op_merged = FALSE;
 533 
 534     CRM_CHECK((op != NULL), return);
 535 
 536     if (op->notify_sent) {
 537         // Most likely, this is a timed-out action that eventually completed
 538         crm_notice("Operation '%s'%s%s by %s for %s@%s%s: "
 539                    "Result arrived too late " CRM_XS " id=%.8s",
 540                    op->action, (op->target? " targeting " : ""),
 541                    (op->target? op->target : ""),
 542                    (op->delegate? op->delegate : "unknown node"),
 543                    op->client_name, op->originator,
 544                    (op_merged? " (merged)" : ""),
 545                    op->id);
 546         return;
 547     }
 548 
 549     set_fencing_completed(op);
 550     clear_remote_op_timers(op);
 551     undo_op_remap(op);
 552 
 553     if (data == NULL) {
 554         data = create_xml_node(NULL, "remote-op");
 555         local_data = data;
 556 
 557     } else if (op->delegate == NULL) {
 558         switch (op->result.execution_status) {
 559             case PCMK_EXEC_NO_FENCE_DEVICE:
 560                 break;
 561             case PCMK_EXEC_INVALID:
 562                 if (op->result.exit_status == CRM_EX_EXPIRED) {
 563                     break;
 564                 }
 565                 // else fall through
 566             default:
 567                 op->delegate = delegate_from_xml(data);
 568                 break;
 569         }
 570     }
 571 
 572     if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) {
 573         op_merged = true;
 574     }
 575 
 576     /* Tell everyone the operation is done, we will continue
 577      * with doing the local notifications once we receive
 578      * the broadcast back. */
 579     subt = crm_element_value(data, F_SUBTYPE);
 580     if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
 581         /* Defer notification until the bcast message arrives */
 582         fenced_broadcast_op_result(op, op_merged);
 583         free_xml(local_data);
 584         return;
 585     }
 586 
 587     if (pcmk__result_ok(&op->result) || dup
 588         || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
 589         level = LOG_NOTICE;
 590     }
 591     do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) "
 592                CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""),
 593                (op->target? op->target : ""),
 594                (op->delegate? op->delegate : "unknown node"),
 595                op->client_name, op->originator,
 596                (op_merged? " (merged)" : ""),
 597                crm_exit_str(op->result.exit_status),
 598                pcmk_exec_status_str(op->result.execution_status),
 599                ((op->result.exit_reason == NULL)? "" : ": "),
 600                ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
 601                op->id);
 602 
 603     handle_local_reply_and_notify(op, data);
 604 
 605     if (!dup) {
 606         finalize_op_duplicates(op, data);
 607     }
 608 
 609     /* Free non-essential parts of the record
 610      * Keep the record around so we can query the history
 611      */
 612     if (op->query_results) {
 613         g_list_free_full(op->query_results, free_remote_query);
 614         op->query_results = NULL;
 615     }
 616     if (op->request) {
 617         free_xml(op->request);
 618         op->request = NULL;
 619     }
 620 
 621     free_xml(local_data);
 622 }
 623 
 624 /*!
 625  * \internal
 626  * \brief Finalize a watchdog fencer op after the waiting time expires
 627  *
 628  * \param[in] userdata  Fencer operation that completed
 629  *
 630  * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
 631  */
 632 static gboolean
 633 remote_op_watchdog_done(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 634 {
 635     remote_fencing_op_t *op = userdata;
 636 
 637     op->op_timer_one = 0;
 638 
 639     crm_notice("Self-fencing (%s) by %s for %s assumed complete "
 640                CRM_XS " id=%.8s",
 641                op->action, op->target, op->client_name, op->id);
 642     op->state = st_done;
 643     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 644     finalize_op(op, NULL, false);
 645     return G_SOURCE_REMOVE;
 646 }
 647 
 648 static gboolean
 649 remote_op_timeout_one(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 650 {
 651     remote_fencing_op_t *op = userdata;
 652 
 653     op->op_timer_one = 0;
 654 
 655     crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
 656                " id=%.8s", op->action, op->target, op->client_name, op->id);
 657     pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
 658                      "Peer did not return fence result within timeout");
 659 
 660     // Try another device, if appropriate
 661     request_peer_fencing(op, NULL);
 662     return FALSE;
 663 }
 664 
 665 /*!
 666  * \internal
 667  * \brief Finalize a remote fencer operation that timed out
 668  *
 669  * \param[in] op      Fencer operation that timed out
 670  * \param[in] reason  Readable description of what step timed out
 671  */
 672 static void
 673 finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
     /* [previous][next][first][last][top][bottom][index][help] */
 674 {
 675     op->op_timer_total = 0;
 676 
 677     crm_debug("Action '%s' targeting %s for client %s timed out "
 678               CRM_XS " id=%.8s",
 679               op->action, op->target, op->client_name, op->id);
 680 
 681     if (op->phase == st_phase_on) {
 682         /* A remapped reboot operation timed out in the "on" phase, but the
 683          * "off" phase completed successfully, so quit trying any further
 684          * devices, and return success.
 685          */
 686         op->state = st_done;
 687         pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
 688     } else {
 689         op->state = st_failed;
 690         pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
 691     }
 692     finalize_op(op, NULL, false);
 693 }
 694 
 695 /*!
 696  * \internal
 697  * \brief Finalize a remote fencer operation that timed out
 698  *
 699  * \param[in] userdata  Fencer operation that timed out
 700  *
 701  * \return G_SOURCE_REMOVE (which tells glib not to restart timer)
 702  */
 703 static gboolean
 704 remote_op_timeout(gpointer userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 705 {
 706     remote_fencing_op_t *op = userdata;
 707 
 708     if (op->state == st_done) {
 709         crm_debug("Action '%s' targeting %s for client %s already completed "
 710                   CRM_XS " id=%.8s",
 711                   op->action, op->target, op->client_name, op->id);
 712     } else {
 713         finalize_timed_out_op(userdata, "Fencing did not complete within a "
 714                                         "total timeout based on the "
 715                                         "configured timeout and retries for "
 716                                         "any devices attempted");
 717     }
 718     return G_SOURCE_REMOVE;
 719 }
 720 
 721 static gboolean
 722 remote_op_query_timeout(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 723 {
 724     remote_fencing_op_t *op = data;
 725 
 726     op->query_timer = 0;
 727     if (op->state == st_done) {
 728         crm_debug("Operation %.8s targeting %s already completed",
 729                   op->id, op->target);
 730     } else if (op->state == st_exec) {
 731         crm_debug("Operation %.8s targeting %s already in progress",
 732                   op->id, op->target);
 733     } else if (op->query_results) {
 734         // Query succeeded, so attempt the actual fencing
 735         crm_debug("Query %.8s targeting %s complete (state=%s)",
 736                   op->id, op->target, stonith_op_state_str(op->state));
 737         request_peer_fencing(op, NULL);
 738     } else {
 739         crm_debug("Query %.8s targeting %s timed out (state=%s)",
 740                   op->id, op->target, stonith_op_state_str(op->state));
 741         if (op->op_timer_total) {
 742             g_source_remove(op->op_timer_total);
 743             op->op_timer_total = 0;
 744         }
 745         finalize_timed_out_op(op, "No capable peers replied to device query "
 746                                   "within timeout");
 747     }
 748 
 749     return FALSE;
 750 }
 751 
 752 static gboolean
 753 topology_is_empty(stonith_topology_t *tp)
     /* [previous][next][first][last][top][bottom][index][help] */
 754 {
 755     int i;
 756 
 757     if (tp == NULL) {
 758         return TRUE;
 759     }
 760 
 761     for (i = 0; i < ST_LEVEL_MAX; i++) {
 762         if (tp->levels[i] != NULL) {
 763             return FALSE;
 764         }
 765     }
 766     return TRUE;
 767 }
 768 
 769 /*!
 770  * \internal
 771  * \brief Add a device to an operation's automatic unfencing list
 772  *
 773  * \param[in,out] op      Operation to modify
 774  * \param[in]     device  Device ID to add
 775  */
 776 static void
 777 add_required_device(remote_fencing_op_t *op, const char *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 778 {
 779     GList *match  = g_list_find_custom(op->automatic_list, device,
 780                                          sort_strings);
 781 
 782     if (!match) {
 783         op->automatic_list = g_list_prepend(op->automatic_list, strdup(device));
 784     }
 785 }
 786 
 787 /*!
 788  * \internal
 789  * \brief Remove a device from the automatic unfencing list
 790  *
 791  * \param[in,out] op      Operation to modify
 792  * \param[in]     device  Device ID to remove
 793  */
 794 static void
 795 remove_required_device(remote_fencing_op_t *op, const char *device)
     /* [previous][next][first][last][top][bottom][index][help] */
 796 {
 797     GList *match = g_list_find_custom(op->automatic_list, device,
 798                                         sort_strings);
 799 
 800     if (match) {
 801         op->automatic_list = g_list_remove(op->automatic_list, match->data);
 802     }
 803 }
 804 
 805 /* deep copy the device list */
 806 static void
 807 set_op_device_list(remote_fencing_op_t * op, GList *devices)
     /* [previous][next][first][last][top][bottom][index][help] */
 808 {
 809     GList *lpc = NULL;
 810 
 811     if (op->devices_list) {
 812         g_list_free_full(op->devices_list, free);
 813         op->devices_list = NULL;
 814     }
 815     for (lpc = devices; lpc != NULL; lpc = lpc->next) {
 816         op->devices_list = g_list_append(op->devices_list, strdup(lpc->data));
 817     }
 818     op->devices = op->devices_list;
 819 }
 820 
 821 /*!
 822  * \internal
 823  * \brief Check whether a node matches a topology target
 824  *
 825  * \param[in] tp    Topology table entry to check
 826  * \param[in] node  Name of node to check
 827  *
 828  * \return TRUE if node matches topology target
 829  */
 830 static gboolean
 831 topology_matches(const stonith_topology_t *tp, const char *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 832 {
 833     regex_t r_patt;
 834 
 835     CRM_CHECK(node && tp && tp->target, return FALSE);
 836     switch (tp->kind) {
 837         case fenced_target_by_attribute:
 838             /* This level targets by attribute, so tp->target is a NAME=VALUE pair
 839              * of a permanent attribute applied to targeted nodes. The test below
 840              * relies on the locally cached copy of the CIB, so if fencing needs to
 841              * be done before the initial CIB is received or after a malformed CIB
 842              * is received, then the topology will be unable to be used.
 843              */
 844             if (node_has_attr(node, tp->target_attribute, tp->target_value)) {
 845                 crm_notice("Matched %s with %s by attribute", node, tp->target);
 846                 return TRUE;
 847             }
 848             break;
 849 
 850         case fenced_target_by_pattern:
 851             /* This level targets node names matching a pattern, so tp->target
 852              * (and tp->target_pattern) is a regular expression.
 853              */
 854             if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) {
 855                 crm_info("Bad regex '%s' for fencing level", tp->target);
 856             } else {
 857                 int status = regexec(&r_patt, node, 0, NULL, 0);
 858 
 859                 regfree(&r_patt);
 860                 if (status == 0) {
 861                     crm_notice("Matched %s with %s by name", node, tp->target);
 862                     return TRUE;
 863                 }
 864             }
 865             break;
 866 
 867         case fenced_target_by_name:
 868             crm_trace("Testing %s against %s", node, tp->target);
 869             return pcmk__str_eq(tp->target, node, pcmk__str_casei);
 870 
 871         default:
 872             break;
 873     }
 874     crm_trace("No match for %s with %s", node, tp->target);
 875     return FALSE;
 876 }
 877 
 878 stonith_topology_t *
 879 find_topology_for_host(const char *host) 
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881     GHashTableIter tIter;
 882     stonith_topology_t *tp = g_hash_table_lookup(topology, host);
 883 
 884     if(tp != NULL) {
 885         crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
 886         return tp;
 887     }
 888 
 889     g_hash_table_iter_init(&tIter, topology);
 890     while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) {
 891         if (topology_matches(tp, host)) {
 892             crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology));
 893             return tp;
 894         }
 895     }
 896 
 897     crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology));
 898     return NULL;
 899 }
 900 
 901 /*!
 902  * \internal
 903  * \brief Set fencing operation's device list to target's next topology level
 904  *
 905  * \param[in,out] op        Remote fencing operation to modify
 906  * \param[in]     empty_ok  If true, an operation without a target (i.e.
 907  *                          queries) or a target without a topology will get a
 908  *                          pcmk_rc_ok return value instead of ENODEV
 909  *
 910  * \return Standard Pacemaker return value
 911  */
 912 static int
 913 advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
     /* [previous][next][first][last][top][bottom][index][help] */
 914 {
 915     stonith_topology_t *tp = NULL;
 916 
 917     if (op->target) {
 918         tp = find_topology_for_host(op->target);
 919     }
 920     if (topology_is_empty(tp)) {
 921         return empty_ok? pcmk_rc_ok : ENODEV;
 922     }
 923 
 924     CRM_ASSERT(tp->levels != NULL);
 925 
 926     stonith__set_call_options(op->call_options, op->id, st_opt_topology);
 927 
 928     /* This is a new level, so undo any remapping left over from previous */
 929     undo_op_remap(op);
 930 
 931     do {
 932         op->level++;
 933 
 934     } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL);
 935 
 936     if (op->level < ST_LEVEL_MAX) {
 937         crm_trace("Attempting fencing level %d targeting %s (%d devices) "
 938                   "for client %s@%s (id=%.8s)",
 939                   op->level, op->target, g_list_length(tp->levels[op->level]),
 940                   op->client_name, op->originator, op->id);
 941         set_op_device_list(op, tp->levels[op->level]);
 942 
 943         // The requested delay has been applied for the first fencing level
 944         if (op->level > 1 && op->delay > 0) {
 945             op->delay = 0;
 946         }
 947 
 948         if (g_list_next(op->devices_list) && pcmk__str_eq(op->action, "reboot", pcmk__str_casei)) {
 949             /* A reboot has been requested for a topology level with multiple
 950              * devices. Instead of rebooting the devices sequentially, we will
 951              * turn them all off, then turn them all on again. (Think about
 952              * switched power outlets for redundant power supplies.)
 953              */
 954             op_phase_off(op);
 955         }
 956         return pcmk_rc_ok;
 957     }
 958 
 959     crm_notice("All fencing options targeting %s for client %s@%s failed "
 960                CRM_XS " id=%.8s",
 961                op->target, op->client_name, op->originator, op->id);
 962     return ENODEV;
 963 }
 964 
 965 /*!
 966  * \brief Check to see if this operation is a duplicate of another in flight
 967  * operation. If so merge this operation into the inflight operation, and mark
 968  * it as a duplicate.
 969  */
 970 static void
 971 merge_duplicates(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 972 {
 973     GHashTableIter iter;
 974     remote_fencing_op_t *other = NULL;
 975 
 976     time_t now = time(NULL);
 977 
 978     g_hash_table_iter_init(&iter, stonith_remote_op_list);
 979     while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) {
 980         const char *other_action = op_requested_action(other);
 981 
 982         if (!strcmp(op->id, other->id)) {
 983             continue; // Don't compare against self
 984         }
 985         if (other->state > st_exec) {
 986             crm_trace("%.8s not duplicate of %.8s: not in progress",
 987                       op->id, other->id);
 988             continue;
 989         }
 990         if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) {
 991             crm_trace("%.8s not duplicate of %.8s: node %s vs. %s",
 992                       op->id, other->id, op->target, other->target);
 993             continue;
 994         }
 995         if (!pcmk__str_eq(op->action, other_action, pcmk__str_casei)) {
 996             crm_trace("%.8s not duplicate of %.8s: action %s vs. %s",
 997                       op->id, other->id, op->action, other_action);
 998             continue;
 999         }
1000         if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) {
1001             crm_trace("%.8s not duplicate of %.8s: same client %s",
1002                       op->id, other->id, op->client_name);
1003             continue;
1004         }
1005         if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) {
1006             crm_trace("%.8s not duplicate of %.8s: suicide for %s",
1007                       op->id, other->id, other->target);
1008             continue;
1009         }
1010         if (!fencing_peer_active(crm_get_peer(0, other->originator))) {
1011             crm_notice("Failing action '%s' targeting %s originating from "
1012                        "client %s@%s: Originator is dead " CRM_XS " id=%.8s",
1013                        other->action, other->target, other->client_name,
1014                        other->originator, other->id);
1015             crm_trace("%.8s not duplicate of %.8s: originator dead",
1016                       op->id, other->id);
1017             other->state = st_failed;
1018             continue;
1019         }
1020         if ((other->total_timeout > 0)
1021             && (now > (other->total_timeout + other->created))) {
1022             crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)",
1023                       op->id, other->id, now, other->created,
1024                       other->total_timeout);
1025             continue;
1026         }
1027 
1028         /* There is another in-flight request to fence the same host
1029          * Piggyback on that instead.  If it fails, so do we.
1030          */
1031         other->duplicates = g_list_append(other->duplicates, op);
1032         if (other->total_timeout == 0) {
1033             other->total_timeout = op->total_timeout =
1034                 TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL);
1035             crm_trace("Best guess as to timeout used for %.8s: %d",
1036                       other->id, other->total_timeout);
1037         }
1038         crm_notice("Merging fencing action '%s' targeting %s originating from "
1039                    "client %s with identical request from %s@%s "
1040                    CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds",
1041                    op->action, op->target, op->client_name,
1042                    other->client_name, other->originator,
1043                    op->id, other->id, other->total_timeout);
1044         report_timeout_period(op, other->total_timeout);
1045         op->state = st_duplicate;
1046     }
1047 }
1048 
1049 static uint32_t fencing_active_peers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1050 {
1051     uint32_t count = 0;
1052     crm_node_t *entry;
1053     GHashTableIter gIter;
1054 
1055     g_hash_table_iter_init(&gIter, crm_peer_cache);
1056     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1057         if(fencing_peer_active(entry)) {
1058             count++;
1059         }
1060     }
1061     return count;
1062 }
1063 
1064 /*!
1065  * \internal
1066  * \brief Process a manual confirmation of a pending fence action
1067  *
1068  * \param[in]  client  IPC client that sent confirmation
1069  * \param[in]  msg     Request XML with manual confirmation
1070  *
1071  * \return Standard Pacemaker return code
1072  */
1073 int
1074 fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
1075 {
1076     remote_fencing_op_t *op = NULL;
1077     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR);
1078 
1079     CRM_CHECK(dev != NULL, return EPROTO);
1080 
1081     crm_notice("Received manual confirmation that %s has been fenced",
1082                crm_str(crm_element_value(dev, F_STONITH_TARGET)));
1083     op = initiate_remote_stonith_op(client, msg, TRUE);
1084     if (op == NULL) {
1085         return EPROTO;
1086     }
1087     op->state = st_done;
1088     set_fencing_completed(op);
1089     op->delegate = strdup("a human");
1090 
1091     // For the fencer's purposes, the fencing operation is done
1092     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1093     finalize_op(op, msg, false);
1094 
1095     /* For the requester's purposes, the operation is still pending. The
1096      * actual result will be sent asynchronously via the operation's done_cb().
1097      */
1098     return EINPROGRESS;
1099 }
1100 
1101 /*!
1102  * \internal
1103  * \brief Create a new remote stonith operation
1104  *
1105  * \param[in] client   ID of local stonith client that initiated the operation
1106  * \param[in] request  The request from the client that started the operation
1107  * \param[in] peer     TRUE if this operation is owned by another stonith peer
1108  *                     (an operation owned by one peer is stored on all peers,
1109  *                     but only the owner executes it; all nodes get the results
1110  *                     once the owner finishes execution)
1111  */
1112 void *
1113 create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1114 {
1115     remote_fencing_op_t *op = NULL;
1116     xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER);
1117     int call_options = 0;
1118     const char *operation = NULL;
1119 
1120     init_stonith_remote_op_hash_table(&stonith_remote_op_list);
1121 
1122     /* If this operation is owned by another node, check to make
1123      * sure we haven't already created this operation. */
1124     if (peer && dev) {
1125         const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
1126 
1127         CRM_CHECK(op_id != NULL, return NULL);
1128 
1129         op = g_hash_table_lookup(stonith_remote_op_list, op_id);
1130         if (op) {
1131             crm_debug("Reusing existing remote fencing op %.8s for %s",
1132                       op_id, ((client == NULL)? "unknown client" : client));
1133             return op;
1134         }
1135     }
1136 
1137     op = calloc(1, sizeof(remote_fencing_op_t));
1138     CRM_ASSERT(op != NULL);
1139 
1140     crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1141     // Value -1 means disable any static/random fencing delays
1142     crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
1143 
1144     if (peer && dev) {
1145         op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
1146     } else {
1147         op->id = crm_generate_uuid();
1148     }
1149 
1150     g_hash_table_replace(stonith_remote_op_list, op->id, op);
1151 
1152     op->state = st_query;
1153     op->replies_expected = fencing_active_peers();
1154     op->action = crm_element_value_copy(dev, F_STONITH_ACTION);
1155     op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN);
1156     op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */
1157     op->created = time(NULL);
1158 
1159     if (op->originator == NULL) {
1160         /* Local or relayed request */
1161         op->originator = strdup(stonith_our_uname);
1162     }
1163 
1164     CRM_LOG_ASSERT(client != NULL);
1165     if (client) {
1166         op->client_id = strdup(client);
1167     }
1168 
1169 
1170     /* For a RELAY operation, set fenced on the client. */
1171     operation = crm_element_value(request, F_STONITH_OPERATION);
1172 
1173     if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1174         op->client_name = crm_strdup_printf("%s.%lu", crm_system_name,
1175                                          (unsigned long) getpid());
1176     } else {
1177         op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME);
1178     }
1179 
1180     op->target = crm_element_value_copy(dev, F_STONITH_TARGET);
1181     op->request = copy_xml(request);    /* TODO: Figure out how to avoid this */
1182     crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
1183     op->call_options = call_options;
1184 
1185     crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid));
1186 
1187     crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, "
1188               "base timeout %d, %u %s expected)",
1189               (peer && dev)? "Recorded" : "Generated", op->id, op->action,
1190               op->target, op->client_name, op->base_timeout,
1191               op->replies_expected,
1192               pcmk__plural_alt(op->replies_expected, "reply", "replies"));
1193 
1194     if (op->call_options & st_opt_cs_nodeid) {
1195         int nodeid;
1196         crm_node_t *node;
1197 
1198         pcmk__scan_min_int(op->target, &nodeid, 0);
1199         node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY);
1200 
1201         /* Ensure the conversion only happens once */
1202         stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid);
1203 
1204         if (node && node->uname) {
1205             free(op->target);
1206             op->target = strdup(node->uname);
1207 
1208         } else {
1209             crm_warn("Could not expand nodeid '%s' into a host name", op->target);
1210         }
1211     }
1212 
1213     /* check to see if this is a duplicate operation of another in-flight operation */
1214     merge_duplicates(op);
1215 
1216     if (op->state != st_duplicate) {
1217         /* kick history readers */
1218         fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
1219     }
1220 
1221     /* safe to trim as long as that doesn't touch pending ops */
1222     stonith_fence_history_trim();
1223 
1224     return op;
1225 }
1226 
1227 /*!
1228  * \internal
1229  * \brief Create a peer fencing operation from a request, and initiate it
1230  *
1231  * \param[in] client     IPC client that made request (NULL to get from request)
1232  * \param[in] request    Request XML
1233  * \param[in] manual_ack Whether this is a manual action confirmation
1234  *
1235  * \return Newly created operation on success, otherwise NULL
1236  */
1237 remote_fencing_op_t *
1238 initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
     /* [previous][next][first][last][top][bottom][index][help] */
1239                            gboolean manual_ack)
1240 {
1241     int query_timeout = 0;
1242     xmlNode *query = NULL;
1243     const char *client_id = NULL;
1244     remote_fencing_op_t *op = NULL;
1245     const char *relay_op_id = NULL;
1246     const char *operation = NULL;
1247 
1248     if (client) {
1249         client_id = client->id;
1250     } else {
1251         client_id = crm_element_value(request, F_STONITH_CLIENTID);
1252     }
1253 
1254     CRM_LOG_ASSERT(client_id != NULL);
1255     op = create_remote_stonith_op(client_id, request, FALSE);
1256     op->owner = TRUE;
1257     if (manual_ack) {
1258         return op;
1259     }
1260 
1261     CRM_CHECK(op->action, return NULL);
1262 
1263     if (advance_topology_level(op, true) != pcmk_rc_ok) {
1264         op->state = st_failed;
1265     }
1266 
1267     switch (op->state) {
1268         case st_failed:
1269             // advance_topology_level() exhausted levels
1270             pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
1271                              "All topology levels failed");
1272             crm_warn("Could not request peer fencing (%s) targeting %s "
1273                      CRM_XS " id=%.8s", op->action, op->target, op->id);
1274             finalize_op(op, NULL, false);
1275             return op;
1276 
1277         case st_duplicate:
1278             crm_info("Requesting peer fencing (%s) targeting %s (duplicate) "
1279                      CRM_XS " id=%.8s", op->action, op->target, op->id);
1280             return op;
1281 
1282         default:
1283             crm_notice("Requesting peer fencing (%s) targeting %s "
1284                        CRM_XS " id=%.8s state=%s base_timeout=%d",
1285                        op->action, op->target, op->id,
1286                        stonith_op_state_str(op->state), op->base_timeout);
1287     }
1288 
1289     query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY,
1290                               NULL, op->call_options);
1291 
1292     crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id);
1293     crm_xml_add(query, F_STONITH_TARGET, op->target);
1294     crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op));
1295     crm_xml_add(query, F_STONITH_ORIGIN, op->originator);
1296     crm_xml_add(query, F_STONITH_CLIENTID, op->client_id);
1297     crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name);
1298     crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout);
1299 
1300     /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */
1301     operation = crm_element_value(request, F_STONITH_OPERATION);
1302     if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) {
1303         relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID);
1304         if (relay_op_id) {
1305             crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id);
1306         }
1307     }
1308 
1309     send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
1310     free_xml(query);
1311 
1312     query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR;
1313     op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op);
1314 
1315     return op;
1316 }
1317 
1318 enum find_best_peer_options {
1319     /*! Skip checking the target peer for capable fencing devices */
1320     FIND_PEER_SKIP_TARGET = 0x0001,
1321     /*! Only check the target peer for capable fencing devices */
1322     FIND_PEER_TARGET_ONLY = 0x0002,
1323     /*! Skip peers and devices that are not verified */
1324     FIND_PEER_VERIFIED_ONLY = 0x0004,
1325 };
1326 
1327 static peer_device_info_t *
1328 find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options)
     /* [previous][next][first][last][top][bottom][index][help] */
1329 {
1330     GList *iter = NULL;
1331     gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE;
1332 
1333     if (!device && pcmk_is_set(op->call_options, st_opt_topology)) {
1334         return NULL;
1335     }
1336 
1337     for (iter = op->query_results; iter != NULL; iter = iter->next) {
1338         peer_device_info_t *peer = iter->data;
1339 
1340         crm_trace("Testing result from %s targeting %s with %d device%s: %d %x",
1341                   peer->host, op->target, peer->ndevices,
1342                   pcmk__plural_s(peer->ndevices), peer->tried, options);
1343         if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1344             continue;
1345         }
1346         if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1347             continue;
1348         }
1349 
1350         if (pcmk_is_set(op->call_options, st_opt_topology)) {
1351 
1352             if (grab_peer_device(op, peer, device, verified_devices_only)) {
1353                 return peer;
1354             }
1355 
1356         } else if ((peer->tried == FALSE)
1357                    && count_peer_devices(op, peer, verified_devices_only)) {
1358 
1359             /* No topology: Use the current best peer */
1360             crm_trace("Simple fencing");
1361             return peer;
1362         }
1363     }
1364 
1365     return NULL;
1366 }
1367 
1368 static peer_device_info_t *
1369 stonith_choose_peer(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
1370 {
1371     const char *device = NULL;
1372     peer_device_info_t *peer = NULL;
1373     uint32_t active = fencing_active_peers();
1374 
1375     do {
1376         if (op->devices) {
1377             device = op->devices->data;
1378             crm_trace("Checking for someone to fence (%s) %s using %s",
1379                       op->action, op->target, device);
1380         } else {
1381             crm_trace("Checking for someone to fence (%s) %s",
1382                       op->action, op->target);
1383         }
1384 
1385         /* Best choice is a peer other than the target with verified access */
1386         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY);
1387         if (peer) {
1388             crm_trace("Found verified peer %s for %s", peer->host, device?device:"<any>");
1389             return peer;
1390         }
1391 
1392         if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) {
1393             crm_trace("Waiting before looking for unverified devices to fence %s", op->target);
1394             return NULL;
1395         }
1396 
1397         /* If no other peer has verified access, next best is unverified access */
1398         peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET);
1399         if (peer) {
1400             crm_trace("Found best unverified peer %s", peer->host);
1401             return peer;
1402         }
1403 
1404         /* If no other peer can do it, last option is self-fencing
1405          * (which is never allowed for the "on" phase of a remapped reboot)
1406          */
1407         if (op->phase != st_phase_on) {
1408             peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY);
1409             if (peer) {
1410                 crm_trace("%s will fence itself", peer->host);
1411                 return peer;
1412             }
1413         }
1414 
1415         /* Try the next fencing level if there is one (unless we're in the "on"
1416          * phase of a remapped "reboot", because we ignore errors in that case)
1417          */
1418     } while ((op->phase != st_phase_on)
1419              && pcmk_is_set(op->call_options, st_opt_topology)
1420              && (advance_topology_level(op, false) == pcmk_rc_ok));
1421 
1422     crm_notice("Couldn't find anyone to fence (%s) %s using %s",
1423                op->action, op->target, (device? device : "any device"));
1424     return NULL;
1425 }
1426 
1427 static int
1428 get_device_timeout(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
1429                    const peer_device_info_t *peer, const char *device)
1430 {
1431     device_properties_t *props;
1432 
1433     if (!peer || !device) {
1434         return op->base_timeout;
1435     }
1436 
1437     props = g_hash_table_lookup(peer->devices, device);
1438     if (!props) {
1439         return op->base_timeout;
1440     }
1441 
1442     return (props->custom_action_timeout[op->phase]?
1443            props->custom_action_timeout[op->phase] : op->base_timeout)
1444            + props->delay_max[op->phase];
1445 }
1446 
1447 struct timeout_data {
1448     const remote_fencing_op_t *op;
1449     const peer_device_info_t *peer;
1450     int total_timeout;
1451 };
1452 
1453 /*!
1454  * \internal
1455  * \brief Add timeout to a total if device has not been executed yet
1456  *
1457  * \param[in] key        GHashTable key (device ID)
1458  * \param[in] value      GHashTable value (device properties)
1459  * \param[in] user_data  Timeout data
1460  */
1461 static void
1462 add_device_timeout(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1463 {
1464     const char *device_id = key;
1465     device_properties_t *props = value;
1466     struct timeout_data *timeout = user_data;
1467 
1468     if (!props->executed[timeout->op->phase]
1469         && !props->disallowed[timeout->op->phase]) {
1470         timeout->total_timeout += get_device_timeout(timeout->op,
1471                                                      timeout->peer, device_id);
1472     }
1473 }
1474 
1475 static int
1476 get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1477 {
1478     struct timeout_data timeout;
1479 
1480     timeout.op = op;
1481     timeout.peer = peer;
1482     timeout.total_timeout = 0;
1483 
1484     g_hash_table_foreach(peer->devices, add_device_timeout, &timeout);
1485 
1486     return (timeout.total_timeout? timeout.total_timeout : op->base_timeout);
1487 }
1488 
1489 static int
1490 get_op_total_timeout(const remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
1491                      const peer_device_info_t *chosen_peer)
1492 {
1493     int total_timeout = 0;
1494     stonith_topology_t *tp = find_topology_for_host(op->target);
1495 
1496     if (pcmk_is_set(op->call_options, st_opt_topology) && tp) {
1497         int i;
1498         GList *device_list = NULL;
1499         GList *iter = NULL;
1500 
1501         /* Yep, this looks scary, nested loops all over the place.
1502          * Here is what is going on.
1503          * Loop1: Iterate through fencing levels.
1504          * Loop2: If a fencing level has devices, loop through each device
1505          * Loop3: For each device in a fencing level, see what peer owns it
1506          *        and what that peer has reported the timeout is for the device.
1507          */
1508         for (i = 0; i < ST_LEVEL_MAX; i++) {
1509             if (!tp->levels[i]) {
1510                 continue;
1511             }
1512             for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
1513                 for (iter = op->query_results; iter != NULL; iter = iter->next) {
1514                     const peer_device_info_t *peer = iter->data;
1515 
1516                     if (find_peer_device(op, peer, device_list->data)) {
1517                         total_timeout += get_device_timeout(op, peer,
1518                                                             device_list->data);
1519                         break;
1520                     }
1521                 }               /* End Loop3: match device with peer that owns device, find device's timeout period */
1522             }                   /* End Loop2: iterate through devices at a specific level */
1523         }                       /*End Loop1: iterate through fencing levels */
1524 
1525     } else if (chosen_peer) {
1526         total_timeout = get_peer_timeout(op, chosen_peer);
1527     } else {
1528         total_timeout = op->base_timeout;
1529     }
1530 
1531     return total_timeout ? total_timeout : op->base_timeout;
1532 }
1533 
1534 static void
1535 report_timeout_period(remote_fencing_op_t * op, int op_timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
1536 {
1537     GList *iter = NULL;
1538     xmlNode *update = NULL;
1539     const char *client_node = NULL;
1540     const char *client_id = NULL;
1541     const char *call_id = NULL;
1542 
1543     if (op->call_options & st_opt_sync_call) {
1544         /* There is no reason to report the timeout for a synchronous call. It
1545          * is impossible to use the reported timeout to do anything when the client
1546          * is blocking for the response.  This update is only important for
1547          * async calls that require a callback to report the results in. */
1548         return;
1549     } else if (!op->request) {
1550         return;
1551     }
1552 
1553     crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id);
1554     client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE);
1555     call_id = crm_element_value(op->request, F_STONITH_CALLID);
1556     client_id = crm_element_value(op->request, F_STONITH_CLIENTID);
1557     if (!client_node || !call_id || !client_id) {
1558         return;
1559     }
1560 
1561     if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) {
1562         // Client is connected to this node, so send update directly to them
1563         do_stonith_async_timeout_update(client_id, call_id, op_timeout);
1564         return;
1565     }
1566 
1567     /* The client is connected to another node, relay this update to them */
1568     update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0);
1569     crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id);
1570     crm_xml_add(update, F_STONITH_CLIENTID, client_id);
1571     crm_xml_add(update, F_STONITH_CALLID, call_id);
1572     crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout);
1573 
1574     send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE);
1575 
1576     free_xml(update);
1577 
1578     for (iter = op->duplicates; iter != NULL; iter = iter->next) {
1579         remote_fencing_op_t *dup = iter->data;
1580 
1581         crm_trace("Reporting timeout for duplicate %.8s to client %s",
1582                   dup->id, dup->client_name);
1583         report_timeout_period(iter->data, op_timeout);
1584     }
1585 }
1586 
1587 /*!
1588  * \internal
1589  * \brief Advance an operation to the next device in its topology
1590  *
1591  * \param[in] op      Fencer operation to advance
1592  * \param[in] device  ID of device that just completed
1593  * \param[in] msg     If not NULL, XML reply of last delegated fencing operation
1594  */
1595 static void
1596 advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1597                                  xmlNode *msg)
1598 {
1599     /* Advance to the next device at this topology level, if any */
1600     if (op->devices) {
1601         op->devices = op->devices->next;
1602     }
1603 
1604     /* Handle automatic unfencing if an "on" action was requested */
1605     if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, "on", pcmk__str_casei)) {
1606         /* If the device we just executed was required, it's not anymore */
1607         remove_required_device(op, device);
1608 
1609         /* If there are no more devices at this topology level, run through any
1610          * remaining devices with automatic unfencing
1611          */
1612         if (op->devices == NULL) {
1613             op->devices = op->automatic_list;
1614         }
1615     }
1616 
1617     if ((op->devices == NULL) && (op->phase == st_phase_off)) {
1618         /* We're done with this level and with required devices, but we had
1619          * remapped "reboot" to "off", so start over with "on". If any devices
1620          * need to be turned back on, op->devices will be non-NULL after this.
1621          */
1622         op_phase_on(op);
1623     }
1624 
1625     // This function is only called if the previous device succeeded
1626     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
1627 
1628     if (op->devices) {
1629         /* Necessary devices remain, so execute the next one */
1630         crm_trace("Next targeting %s on behalf of %s@%s",
1631                   op->target, op->client_name, op->originator);
1632 
1633         // The requested delay has been applied for the first device
1634         if (op->delay > 0) {
1635             op->delay = 0;
1636         }
1637 
1638         request_peer_fencing(op, NULL);
1639     } else {
1640         /* We're done with all devices and phases, so finalize operation */
1641         crm_trace("Marking complex fencing op targeting %s as complete",
1642                   op->target);
1643         op->state = st_done;
1644         finalize_op(op, msg, false);
1645     }
1646 }
1647 
1648 static gboolean
1649 check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
1650 {
1651     if (node_does_watchdog_fencing(op->target)) {
1652 
1653         crm_notice("Waiting %lds for %s to self-fence (%s) for "
1654                    "client %s " CRM_XS " id=%.8s",
1655                    (stonith_watchdog_timeout_ms / 1000),
1656                    op->target, op->action, op->client_name, op->id);
1657         op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
1658                                          remote_op_watchdog_done, op);
1659         return TRUE;
1660     } else {
1661         crm_debug("Skipping fallback to watchdog-fencing as %s is "
1662                  "not in host-list", op->target);
1663     }
1664     return FALSE;
1665 }
1666 
1667 /*!
1668  * \internal
1669  * \brief Ask a peer to execute a fencing operation
1670  *
1671  * \param[in] op      Fencing operation to be executed
1672  * \param[in] peer    If NULL or topology is in use, choose best peer to execute
1673  *                    the fencing, otherwise use this peer
1674  */
1675 static void
1676 request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
     /* [previous][next][first][last][top][bottom][index][help] */
1677 {
1678     const char *device = NULL;
1679     int timeout;
1680 
1681     CRM_CHECK(op != NULL, return);
1682 
1683     crm_trace("Action %.8s targeting %s for %s is %s",
1684               op->id, op->target, op->client_name,
1685               stonith_op_state_str(op->state));
1686     timeout = op->base_timeout;
1687     if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) {
1688         peer = stonith_choose_peer(op);
1689     }
1690 
1691     if (!op->op_timer_total) {
1692         op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer);
1693         op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op);
1694         report_timeout_period(op, op->total_timeout);
1695         crm_info("Total timeout set to %d for peer's fencing targeting %s for %s"
1696                  CRM_XS "id=%.8s",
1697                  op->total_timeout, op->target, op->client_name, op->id);
1698     }
1699 
1700     if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) {
1701         /* Ignore the caller's peer preference if topology is in use, because
1702          * that peer might not have access to the required device. With
1703          * topology, stonith_choose_peer() removes the device from further
1704          * consideration, so the timeout must be calculated beforehand.
1705          *
1706          * @TODO Basing the total timeout on the caller's preferred peer (above)
1707          *       is less than ideal.
1708          */
1709         peer = stonith_choose_peer(op);
1710 
1711         device = op->devices->data;
1712         timeout = get_device_timeout(op, peer, device);
1713     }
1714 
1715     if (peer) {
1716         int timeout_one = 0;
1717         xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0);
1718 
1719         crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id);
1720         crm_xml_add(remote_op, F_STONITH_TARGET, op->target);
1721         crm_xml_add(remote_op, F_STONITH_ACTION, op->action);
1722         crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator);
1723         crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id);
1724         crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
1725         crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
1726         crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1727         crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1728 
1729         if (device) {
1730             timeout_one = TIMEOUT_MULTIPLY_FACTOR *
1731                           get_device_timeout(op, peer, device);
1732             crm_notice("Requesting that %s perform '%s' action targeting %s "
1733                        "using %s " CRM_XS " for client %s (%ds)",
1734                        peer->host, op->action, op->target, device,
1735                        op->client_name, timeout_one);
1736             crm_xml_add(remote_op, F_STONITH_DEVICE, device);
1737 
1738         } else {
1739             timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer);
1740             crm_notice("Requesting that %s perform '%s' action targeting %s "
1741                        CRM_XS " for client %s (%ds, %lds)",
1742                        peer->host, op->action, op->target, op->client_name,
1743                        timeout_one, stonith_watchdog_timeout_ms);
1744         }
1745 
1746         op->state = st_exec;
1747         if (op->op_timer_one) {
1748             g_source_remove(op->op_timer_one);
1749         }
1750 
1751         if (!(stonith_watchdog_timeout_ms > 0 && (
1752                 (pcmk__str_eq(device, STONITH_WATCHDOG_ID,
1753                                         pcmk__str_none)) ||
1754                 (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
1755                     && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) &&
1756              check_watchdog_fencing_and_wait(op))) {
1757 
1758             /* Some thoughts about self-fencing cases reaching this point:
1759                - Actually check in check_watchdog_fencing_and_wait
1760                  shouldn't fail if STONITH_WATCHDOG_ID is
1761                  chosen as fencing-device and it being present implies
1762                  watchdog-fencing is enabled anyway
1763                - If watchdog-fencing is disabled either in general or for
1764                  a specific target - detected in check_watchdog_fencing_and_wait -
1765                  for some other kind of self-fencing we can't expect
1766                  a success answer but timeout is fine if the node doesn't
1767                  come back in between
1768                - Delicate might be the case where we have watchdog-fencing
1769                  enabled for a node but the watchdog-fencing-device isn't
1770                  explicitly chosen for suicide. Local pe-execution in sbd
1771                  may detect the node as unclean and lead to timely suicide.
1772                  Otherwise the selection of stonith-watchdog-timeout at
1773                  least is questionable.
1774              */
1775             op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
1776         }
1777 
1778         send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE);
1779         peer->tried = TRUE;
1780         free_xml(remote_op);
1781         return;
1782 
1783     } else if (op->phase == st_phase_on) {
1784         /* A remapped "on" cannot be executed, but the node was already
1785          * turned off successfully, so ignore the error and continue.
1786          */
1787         crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s "
1788                  "after successful 'off'", device, op->target);
1789         advance_topology_device_in_level(op, device, NULL);
1790         return;
1791 
1792     } else if (op->owner == FALSE) {
1793         crm_err("Fencing (%s) targeting %s for client %s is not ours to control",
1794                 op->action, op->target, op->client_name);
1795 
1796     } else if (op->query_timer == 0) {
1797         /* We've exhausted all available peers */
1798         crm_info("No remaining peers capable of fencing (%s) %s for client %s "
1799                  CRM_XS " state=%s", op->action, op->target, op->client_name,
1800                  stonith_op_state_str(op->state));
1801         CRM_CHECK(op->state < st_done, return);
1802         finalize_timed_out_op(op, "All nodes failed, or are unable, to "
1803                                   "fence target");
1804 
1805     } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) {
1806         /* if the operation never left the query state,
1807          * but we have all the expected replies, then no devices
1808          * are available to execute the fencing operation. */
1809 
1810         if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device,
1811            STONITH_WATCHDOG_ID, pcmk__str_null_matches)) {
1812             if (check_watchdog_fencing_and_wait(op)) {
1813                 return;
1814             }
1815         }
1816 
1817         if (op->state == st_query) {
1818             crm_info("No peers (out of %d) have devices capable of fencing "
1819                      "(%s) %s for client %s " CRM_XS " state=%s",
1820                      op->replies, op->action, op->target, op->client_name,
1821                      stonith_op_state_str(op->state));
1822 
1823             pcmk__reset_result(&op->result);
1824             pcmk__set_result(&op->result, CRM_EX_ERROR,
1825                              PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1826         } else {
1827             if (pcmk_is_set(op->call_options, st_opt_topology)) {
1828                 pcmk__reset_result(&op->result);
1829                 pcmk__set_result(&op->result, CRM_EX_ERROR,
1830                                  PCMK_EXEC_NO_FENCE_DEVICE, NULL);
1831             }
1832             /* ... else use existing result from previous failed attempt
1833              * (topology is not in use, and no devices remain to be attempted).
1834              * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
1835              * prevent finalize_op() from setting the correct delegate if
1836              * needed.
1837              */
1838 
1839             crm_info("No peers (out of %d) are capable of fencing (%s) %s "
1840                      "for client %s " CRM_XS " state=%s",
1841                      op->replies, op->action, op->target, op->client_name,
1842                      stonith_op_state_str(op->state));
1843         }
1844 
1845         op->state = st_failed;
1846         finalize_op(op, NULL, false);
1847 
1848     } else {
1849         crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
1850                  "for client %s " CRM_XS " id=%.8s",
1851                  op->action, op->target, (device? " using " : ""),
1852                  (device? device : ""), op->client_name, op->id);
1853     }
1854 }
1855 
1856 /*!
1857  * \internal
1858  * \brief Comparison function for sorting query results
1859  *
1860  * \param[in] a  GList item to compare
1861  * \param[in] b  GList item to compare
1862  *
1863  * \return Per the glib documentation, "a negative integer if the first value
1864  *         comes before the second, 0 if they are equal, or a positive integer
1865  *         if the first value comes after the second."
1866  */
1867 static gint
1868 sort_peers(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
1869 {
1870     const peer_device_info_t *peer_a = a;
1871     const peer_device_info_t *peer_b = b;
1872 
1873     return (peer_b->ndevices - peer_a->ndevices);
1874 }
1875 
1876 /*!
1877  * \internal
1878  * \brief Determine if all the devices in the topology are found or not
1879  */
1880 static gboolean
1881 all_topology_devices_found(remote_fencing_op_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
1882 {
1883     GList *device = NULL;
1884     GList *iter = NULL;
1885     device_properties_t *match = NULL;
1886     stonith_topology_t *tp = NULL;
1887     gboolean skip_target = FALSE;
1888     int i;
1889 
1890     tp = find_topology_for_host(op->target);
1891     if (!tp) {
1892         return FALSE;
1893     }
1894     if (pcmk__is_fencing_action(op->action)) {
1895         /* Don't count the devices on the target node if we are killing
1896          * the target node. */
1897         skip_target = TRUE;
1898     }
1899 
1900     for (i = 0; i < ST_LEVEL_MAX; i++) {
1901         for (device = tp->levels[i]; device; device = device->next) {
1902             match = NULL;
1903             for (iter = op->query_results; iter && !match; iter = iter->next) {
1904                 peer_device_info_t *peer = iter->data;
1905 
1906                 if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) {
1907                     continue;
1908                 }
1909                 match = find_peer_device(op, peer, device->data);
1910             }
1911             if (!match) {
1912                 return FALSE;
1913             }
1914         }
1915     }
1916 
1917     return TRUE;
1918 }
1919 
1920 /*!
1921  * \internal
1922  * \brief Parse action-specific device properties from XML
1923  *
1924  * \param[in]     msg     XML element containing the properties
1925  * \param[in]     peer    Name of peer that sent XML (for logs)
1926  * \param[in]     device  Device ID (for logs)
1927  * \param[in]     action  Action the properties relate to (for logs)
1928  * \param[in]     phase   Phase the properties relate to
1929  * \param[in,out] props   Device properties to update
1930  */
1931 static void
1932 parse_action_specific(xmlNode *xml, const char *peer, const char *device,
     /* [previous][next][first][last][top][bottom][index][help] */
1933                       const char *action, remote_fencing_op_t *op,
1934                       enum st_remap_phase phase, device_properties_t *props)
1935 {
1936     props->custom_action_timeout[phase] = 0;
1937     crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT,
1938                           &props->custom_action_timeout[phase]);
1939     if (props->custom_action_timeout[phase]) {
1940         crm_trace("Peer %s with device %s returned %s action timeout %d",
1941                   peer, device, action, props->custom_action_timeout[phase]);
1942     }
1943 
1944     props->delay_max[phase] = 0;
1945     crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]);
1946     if (props->delay_max[phase]) {
1947         crm_trace("Peer %s with device %s returned maximum of random delay %d for %s",
1948                   peer, device, props->delay_max[phase], action);
1949     }
1950 
1951     props->delay_base[phase] = 0;
1952     crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]);
1953     if (props->delay_base[phase]) {
1954         crm_trace("Peer %s with device %s returned base delay %d for %s",
1955                   peer, device, props->delay_base[phase], action);
1956     }
1957 
1958     /* Handle devices with automatic unfencing */
1959     if (pcmk__str_eq(action, "on", pcmk__str_casei)) {
1960         int required = 0;
1961 
1962         crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required);
1963         if (required) {
1964             crm_trace("Peer %s requires device %s to execute for action %s",
1965                       peer, device, action);
1966             add_required_device(op, device);
1967         }
1968     }
1969 
1970     /* If a reboot is remapped to off+on, it's possible that a node is allowed
1971      * to perform one action but not another.
1972      */
1973     if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) {
1974         props->disallowed[phase] = TRUE;
1975         crm_trace("Peer %s is disallowed from executing %s for device %s",
1976                   peer, action, device);
1977     }
1978 }
1979 
1980 /*!
1981  * \internal
1982  * \brief Parse one device's properties from peer's XML query reply
1983  *
1984  * \param[in]     xml       XML node containing device properties
1985  * \param[in,out] op        Operation that query and reply relate to
1986  * \param[in,out] peer      Peer's device information
1987  * \param[in]     device    ID of device being parsed
1988  */
1989 static void
1990 add_device_properties(xmlNode *xml, remote_fencing_op_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
1991                       peer_device_info_t *peer, const char *device)
1992 {
1993     xmlNode *child;
1994     int verified = 0;
1995     device_properties_t *props = calloc(1, sizeof(device_properties_t));
1996 
1997     /* Add a new entry to this peer's devices list */
1998     CRM_ASSERT(props != NULL);
1999     g_hash_table_insert(peer->devices, strdup(device), props);
2000 
2001     /* Peers with verified (monitored) access will be preferred */
2002     crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified);
2003     if (verified) {
2004         crm_trace("Peer %s has confirmed a verified device %s",
2005                   peer->host, device);
2006         props->verified = TRUE;
2007     }
2008 
2009     /* Parse action-specific device properties */
2010     parse_action_specific(xml, peer->host, device, op_requested_action(op),
2011                           op, st_phase_requested, props);
2012     for (child = pcmk__xml_first_child(xml); child != NULL;
2013          child = pcmk__xml_next(child)) {
2014         /* Replies for "reboot" operations will include the action-specific
2015          * values for "off" and "on" in child elements, just in case the reboot
2016          * winds up getting remapped.
2017          */
2018         if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) {
2019             parse_action_specific(child, peer->host, device, "off",
2020                                   op, st_phase_off, props);
2021         } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) {
2022             parse_action_specific(child, peer->host, device, "on",
2023                                   op, st_phase_on, props);
2024         }
2025     }
2026 }
2027 
2028 /*!
2029  * \internal
2030  * \brief Parse a peer's XML query reply and add it to operation's results
2031  *
2032  * \param[in,out] op        Operation that query and reply relate to
2033  * \param[in]     host      Name of peer that sent this reply
2034  * \param[in]     ndevices  Number of devices expected in reply
2035  * \param[in]     xml       XML node containing device list
2036  *
2037  * \return Newly allocated result structure with parsed reply
2038  */
2039 static peer_device_info_t *
2040 add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
2041 {
2042     peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t));
2043     xmlNode *child;
2044 
2045     // cppcheck seems not to understand the abort logic in CRM_CHECK
2046     // cppcheck-suppress memleak
2047     CRM_CHECK(peer != NULL, return NULL);
2048     peer->host = strdup(host);
2049     peer->devices = pcmk__strkey_table(free, free);
2050 
2051     /* Each child element describes one capable device available to the peer */
2052     for (child = pcmk__xml_first_child(xml); child != NULL;
2053          child = pcmk__xml_next(child)) {
2054         const char *device = ID(child);
2055 
2056         if (device) {
2057             add_device_properties(child, op, peer, device);
2058         }
2059     }
2060 
2061     peer->ndevices = g_hash_table_size(peer->devices);
2062     CRM_CHECK(ndevices == peer->ndevices,
2063               crm_err("Query claimed to have %d device%s but %d found",
2064                       ndevices, pcmk__plural_s(ndevices), peer->ndevices));
2065 
2066     op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers);
2067     return peer;
2068 }
2069 
2070 /*!
2071  * \internal
2072  * \brief Handle a peer's reply to our fencing query
2073  *
2074  * Parse a query result from XML and store it in the remote operation
2075  * table, and when enough replies have been received, issue a fencing request.
2076  *
2077  * \param[in] msg  XML reply received
2078  *
2079  * \return pcmk_ok on success, -errno on error
2080  *
2081  * \note See initiate_remote_stonith_op() for how the XML query was initially
2082  *       formed, and stonith_query() for how the peer formed its XML reply.
2083  */
2084 int
2085 process_remote_stonith_query(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2086 {
2087     int ndevices = 0;
2088     gboolean host_is_target = FALSE;
2089     gboolean have_all_replies = FALSE;
2090     const char *id = NULL;
2091     const char *host = NULL;
2092     remote_fencing_op_t *op = NULL;
2093     peer_device_info_t *peer = NULL;
2094     uint32_t replies_expected;
2095     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2096 
2097     CRM_CHECK(dev != NULL, return -EPROTO);
2098 
2099     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2100     CRM_CHECK(id != NULL, return -EPROTO);
2101 
2102     dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR);
2103     CRM_CHECK(dev != NULL, return -EPROTO);
2104     crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices);
2105 
2106     op = g_hash_table_lookup(stonith_remote_op_list, id);
2107     if (op == NULL) {
2108         crm_debug("Received query reply for unknown or expired operation %s",
2109                   id);
2110         return -EOPNOTSUPP;
2111     }
2112 
2113     replies_expected = fencing_active_peers();
2114     if (op->replies_expected < replies_expected) {
2115         replies_expected = op->replies_expected;
2116     }
2117     if ((++op->replies >= replies_expected) && (op->state == st_query)) {
2118         have_all_replies = TRUE;
2119     }
2120     host = crm_element_value(msg, F_ORIG);
2121     host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei);
2122 
2123     crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s",
2124              op->replies, replies_expected, host,
2125              op->target, op->action, ndevices, pcmk__plural_s(ndevices), id);
2126     if (ndevices > 0) {
2127         peer = add_result(op, host, ndevices, dev);
2128     }
2129 
2130     pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2131 
2132     if (pcmk_is_set(op->call_options, st_opt_topology)) {
2133         /* If we start the fencing before all the topology results are in,
2134          * it is possible fencing levels will be skipped because of the missing
2135          * query results. */
2136         if (op->state == st_query && all_topology_devices_found(op)) {
2137             /* All the query results are in for the topology, start the fencing ops. */
2138             crm_trace("All topology devices found");
2139             request_peer_fencing(op, peer);
2140 
2141         } else if (have_all_replies) {
2142             crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
2143                      replies_expected, op->replies);
2144             request_peer_fencing(op, NULL);
2145         }
2146 
2147     } else if (op->state == st_query) {
2148         int nverified = count_peer_devices(op, peer, TRUE);
2149 
2150         /* We have a result for a non-topology fencing op that looks promising,
2151          * go ahead and start fencing before query timeout */
2152         if ((peer != NULL) && !host_is_target && nverified) {
2153             /* we have a verified device living on a peer that is not the target */
2154             crm_trace("Found %d verified device%s",
2155                       nverified, pcmk__plural_s(nverified));
2156             request_peer_fencing(op, peer);
2157 
2158         } else if (have_all_replies) {
2159             crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
2160                      replies_expected, op->replies);
2161             request_peer_fencing(op, NULL);
2162 
2163         } else {
2164             crm_trace("Waiting for more peer results before launching fencing operation");
2165         }
2166 
2167     } else if ((peer != NULL) && (op->state == st_done)) {
2168         crm_info("Discarding query result from %s (%d device%s): "
2169                  "Operation is %s", peer->host,
2170                  peer->ndevices, pcmk__plural_s(peer->ndevices),
2171                  stonith_op_state_str(op->state));
2172     }
2173 
2174     return pcmk_ok;
2175 }
2176 
2177 /*!
2178  * \internal
2179  * \brief Handle a peer's reply to a fencing request
2180  *
2181  * Parse a fencing reply from XML, and either finalize the operation
2182  * or attempt another device as appropriate.
2183  *
2184  * \param[in] msg  XML reply received
2185  */
2186 void
2187 fenced_process_fencing_reply(xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
2188 {
2189     const char *id = NULL;
2190     const char *device = NULL;
2191     remote_fencing_op_t *op = NULL;
2192     xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
2193     pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
2194 
2195     CRM_CHECK(dev != NULL, return);
2196 
2197     id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID);
2198     CRM_CHECK(id != NULL, return);
2199 
2200     dev = stonith__find_xe_with_result(msg);
2201     CRM_CHECK(dev != NULL, return);
2202 
2203     stonith__xe_get_result(dev, &result);
2204 
2205     device = crm_element_value(dev, F_STONITH_DEVICE);
2206 
2207     if (stonith_remote_op_list) {
2208         op = g_hash_table_lookup(stonith_remote_op_list, id);
2209     }
2210 
2211     if ((op == NULL) && pcmk__result_ok(&result)) {
2212         /* Record successful fencing operations */
2213         const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID);
2214 
2215         op = create_remote_stonith_op(client_id, dev, TRUE);
2216     }
2217 
2218     if (op == NULL) {
2219         /* Could be for an event that began before we started */
2220         /* TODO: Record the op for later querying */
2221         crm_info("Received peer result of unknown or expired operation %s", id);
2222         pcmk__reset_result(&result);
2223         return;
2224     }
2225 
2226     pcmk__reset_result(&op->result);
2227     op->result = result; // The operation takes ownership of the result
2228 
2229     if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
2230         crm_err("Received outdated reply for device %s (instead of %s) to "
2231                 "fence (%s) %s. Operation already timed out at peer level.",
2232                 device, (const char *) op->devices->data, op->action, op->target);
2233         return;
2234     }
2235 
2236     if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
2237         if (pcmk__result_ok(&op->result)) {
2238             op->state = st_done;
2239         } else {
2240             op->state = st_failed;
2241         }
2242         finalize_op(op, msg, false);
2243         return;
2244 
2245     } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
2246         /* If this isn't a remote level broadcast, and we are not the
2247          * originator of the operation, we should not be receiving this msg. */
2248         crm_err("Received non-broadcast fencing result for operation %.8s "
2249                 "we do not own (device %s targeting %s)",
2250                 op->id, device, op->target);
2251         return;
2252     }
2253 
2254     if (pcmk_is_set(op->call_options, st_opt_topology)) {
2255         const char *device = NULL;
2256         const char *reason = op->result.exit_reason;
2257 
2258         /* We own the op, and it is complete. broadcast the result to all nodes
2259          * and notify our local clients. */
2260         if (op->state == st_done) {
2261             finalize_op(op, msg, false);
2262             return;
2263         }
2264 
2265         device = crm_element_value(msg, F_STONITH_DEVICE);
2266 
2267         if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
2268             /* A remapped "on" failed, but the node was already turned off
2269              * successfully, so ignore the error and continue.
2270              */
2271             crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
2272                      "after successful 'off'",
2273                      device, pcmk_exec_status_str(op->result.execution_status),
2274                      (reason == NULL)? "" : ": ",
2275                      (reason == NULL)? "" : reason,
2276                      op->target);
2277             pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
2278         } else {
2279             crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: "
2280                        "%s%s%s%s",
2281                        op->action, op->target, device, op->client_name,
2282                        op->originator,
2283                        pcmk_exec_status_str(op->result.execution_status),
2284                        (reason == NULL)? "" : " (",
2285                        (reason == NULL)? "" : reason,
2286                        (reason == NULL)? "" : ")");
2287         }
2288 
2289         if (pcmk__result_ok(&op->result)) {
2290             /* An operation completed successfully. Try another device if
2291              * necessary, otherwise mark the operation as done. */
2292             advance_topology_device_in_level(op, device, msg);
2293             return;
2294         } else {
2295             /* This device failed, time to try another topology level. If no other
2296              * levels are available, mark this operation as failed and report results. */
2297             if (advance_topology_level(op, false) != pcmk_rc_ok) {
2298                 op->state = st_failed;
2299                 finalize_op(op, msg, false);
2300                 return;
2301             }
2302         }
2303 
2304     } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
2305         op->state = st_done;
2306         finalize_op(op, msg, false);
2307         return;
2308 
2309     } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
2310                && (op->devices == NULL)) {
2311         /* If the operation timed out don't bother retrying other peers. */
2312         op->state = st_failed;
2313         finalize_op(op, msg, false);
2314         return;
2315 
2316     } else {
2317         /* fall-through and attempt other fencing action using another peer */
2318     }
2319 
2320     /* Retry on failure */
2321     crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
2322               op->target, op->originator, op->client_name,
2323               pcmk_exec_status_str(op->result.execution_status));
2324     request_peer_fencing(op, NULL);
2325 }
2326 
2327 gboolean
2328 stonith_check_fence_tolerance(int tolerance, const char *target, const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
2329 {
2330     GHashTableIter iter;
2331     time_t now = time(NULL);
2332     remote_fencing_op_t *rop = NULL;
2333 
2334     if (tolerance <= 0 || !stonith_remote_op_list || target == NULL ||
2335         action == NULL) {
2336         return FALSE;
2337     }
2338 
2339     g_hash_table_iter_init(&iter, stonith_remote_op_list);
2340     while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) {
2341         if (strcmp(rop->target, target) != 0) {
2342             continue;
2343         } else if (rop->state != st_done) {
2344             continue;
2345         /* We don't have to worry about remapped reboots here
2346          * because if state is done, any remapping has been undone
2347          */
2348         } else if (strcmp(rop->action, action) != 0) {
2349             continue;
2350         } else if ((rop->completed + tolerance) < now) {
2351             continue;
2352         }
2353 
2354         crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s",
2355                    target, action, tolerance, rop->delegate, rop->originator);
2356         return TRUE;
2357     }
2358     return FALSE;
2359 }

/* [previous][next][first][last][top][bottom][index][help] */