root/lib/cluster/election.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. election_complete
  2. election_timer_cb
  3. election_state
  4. election_init
  5. election_remove
  6. election_reset
  7. election_fini
  8. election_timeout_start
  9. election_timeout_stop
  10. election_timeout_set_period
  11. get_uptime
  12. compare_age
  13. election_vote
  14. election_check
  15. parse_election_message
  16. record_vote
  17. send_no_vote
  18. election_count_vote
  19. election_clear_dampening

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/time.h>
  13 #include <sys/resource.h>
  14 
  15 #include <crm/crm.h>
  16 #include <crm/common/mainloop.h>
  17 #include <crm/common/xml.h>
  18 
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include "crmcluster_private.h"
  22 
  23 #define STORM_INTERVAL   2      /* in seconds */
  24 
  25 struct pcmk__election {
  26     enum election_result state;     // Current state of election
  27     guint count;                    // How many times local node has voted
  28     void (*cb)(pcmk_cluster_t *);   // Function to call if election is won
  29     GHashTable *voted;  // Key = node name, value = how node voted
  30     mainloop_timer_t *timeout; // When to abort if all votes not received
  31     int election_wins;         // Track wins, for storm detection
  32     bool wrote_blackbox;       // Write a storm blackbox at most once
  33     time_t expires;            // When storm detection period ends
  34     time_t last_election_loss; // When dampening period ends
  35 };
  36 
  37 static void
  38 election_complete(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
  39 {
  40     pcmk__assert((cluster != NULL) && (cluster->priv->election != NULL));
  41     cluster->priv->election->state = election_won;
  42     if (cluster->priv->election->cb != NULL) {
  43         cluster->priv->election->cb(cluster);
  44     }
  45     election_reset(cluster);
  46 }
  47 
  48 static gboolean
  49 election_timer_cb(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51     pcmk_cluster_t *cluster = user_data;
  52 
  53     crm_info("Declaring local node as winner after election timed out");
  54     election_complete(cluster);
  55     return FALSE;
  56 }
  57 
  58 /*!
  59  * \internal
  60  * \brief Get current state of an election
  61  *
  62  * \param[in] cluster  Cluster with election
  63  *
  64  * \return Current state of \e
  65  */
  66 enum election_result
  67 election_state(const pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69     if ((cluster == NULL) || (cluster->priv->election == NULL)) {
  70         return election_error;
  71     }
  72     return cluster->priv->election->state;
  73 }
  74 
  75 /* The local node will be declared the winner if missing votes are not received
  76  * within this time. The value is chosen to be the same as the default for the
  77  * election-timeout cluster option.
  78  */
  79 #define ELECTION_TIMEOUT_MS 120000
  80 
  81 /*!
  82  * \internal
  83  * \brief Track election state in a cluster
  84  *
  85  * Every node that wishes to participate in an election must initialize the
  86  * election once, typically at start-up.
  87  *
  88  * \param[in] cluster    Cluster that election is for
  89  * \param[in] cb         Function to call if local node wins election
  90  */
  91 void
  92 election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *))
     /* [previous][next][first][last][top][bottom][index][help] */
  93 {
  94     const char *name = pcmk__s(crm_system_name, "election");
  95 
  96     CRM_CHECK(cluster->priv->election == NULL, return);
  97 
  98     cluster->priv->election = pcmk__assert_alloc(1, sizeof(pcmk__election_t));
  99     cluster->priv->election->cb = cb;
 100     cluster->priv->election->timeout = mainloop_timer_add(name,
 101                                                           ELECTION_TIMEOUT_MS,
 102                                                           FALSE,
 103                                                           election_timer_cb,
 104                                                           cluster);
 105 }
 106 
 107 /*!
 108  * \internal
 109  * \brief Disregard any previous vote by specified peer
 110  *
 111  * This discards any recorded vote from a specified peer. Election users should
 112  * call this whenever a voting peer becomes inactive.
 113  *
 114  * \param[in,out] cluster  Cluster with election
 115  * \param[in]     uname    Name of peer to disregard
 116  */
 117 void
 118 election_remove(pcmk_cluster_t *cluster, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     if ((cluster != NULL) && (cluster->priv->election != NULL)
 121         && (uname != NULL) && (cluster->priv->election->voted != NULL)) {
 122         crm_trace("Discarding (no-)vote from lost peer %s", uname);
 123         g_hash_table_remove(cluster->priv->election->voted, uname);
 124     }
 125 }
 126 
 127 /*!
 128  * \internal
 129  * \brief Stop election timer and disregard all votes
 130  *
 131  * \param[in,out] cluster  Cluster with election
 132  */
 133 void
 134 election_reset(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 137         crm_trace("Resetting election");
 138         mainloop_timer_stop(cluster->priv->election->timeout);
 139         if (cluster->priv->election->voted != NULL) {
 140             g_hash_table_destroy(cluster->priv->election->voted);
 141             cluster->priv->election->voted = NULL;
 142         }
 143     }
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Free an election object
 149  *
 150  * Free all memory associated with an election object, stopping its
 151  * election timer (if running).
 152  *
 153  * \param[in,out] cluster  Cluster with election
 154  */
 155 void
 156 election_fini(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 159         election_reset(cluster);
 160         crm_trace("Destroying election");
 161         mainloop_timer_del(cluster->priv->election->timeout);
 162         free(cluster->priv->election);
 163         cluster->priv->election = NULL;
 164     }
 165 }
 166 
 167 static void
 168 election_timeout_start(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170     mainloop_timer_start(cluster->priv->election->timeout);
 171 }
 172 
 173 /*!
 174  * \internal
 175  * \brief Stop an election's timer, if running
 176  *
 177  * \param[in,out] cluster  Cluster with election
 178  */
 179 void
 180 election_timeout_stop(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 183         mainloop_timer_stop(cluster->priv->election->timeout);
 184     }
 185 }
 186 
 187 /*!
 188  * \internal
 189  * \brief Change an election's timeout (restarting timer if running)
 190  *
 191  * \param[in,out] cluster  Cluster with election
 192  * \param[in]     period   New timeout
 193  */
 194 void
 195 election_timeout_set_period(pcmk_cluster_t *cluster, guint period)
     /* [previous][next][first][last][top][bottom][index][help] */
 196 {
 197     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
 198     mainloop_timer_set_period(cluster->priv->election->timeout, period);
 199 }
 200 
 201 static int
 202 get_uptime(struct timeval *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204     static time_t expires = 0;
 205     static struct rusage info;
 206 
 207     time_t tm_now = time(NULL);
 208 
 209     if (expires < tm_now) {
 210         int rc = 0;
 211 
 212         info.ru_utime.tv_sec = 0;
 213         info.ru_utime.tv_usec = 0;
 214         rc = getrusage(RUSAGE_SELF, &info);
 215 
 216         output->tv_sec = 0;
 217         output->tv_usec = 0;
 218 
 219         if (rc < 0) {
 220             crm_perror(LOG_ERR, "Could not calculate the current uptime");
 221             expires = 0;
 222             return -1;
 223         }
 224 
 225         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
 226                   (long)info.ru_utime.tv_usec);
 227     }
 228 
 229     expires = tm_now + STORM_INTERVAL;  /* N seconds after the last _access_ */
 230     output->tv_sec = info.ru_utime.tv_sec;
 231     output->tv_usec = info.ru_utime.tv_usec;
 232 
 233     return 1;
 234 }
 235 
 236 static int
 237 compare_age(struct timeval your_age)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239     struct timeval our_age;
 240 
 241     get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
 242 
 243     if (our_age.tv_sec > your_age.tv_sec) {
 244         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 245         return 1;
 246     } else if (our_age.tv_sec < your_age.tv_sec) {
 247         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 248         return -1;
 249     } else if (our_age.tv_usec > your_age.tv_usec) {
 250         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
 251                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 252         return 1;
 253     } else if (our_age.tv_usec < your_age.tv_usec) {
 254         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
 255                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 256         return -1;
 257     }
 258 
 259     return 0;
 260 }
 261 
 262 /*!
 263  * \internal
 264  * \brief Start a new election by offering local node's candidacy
 265  *
 266  * Broadcast a "vote" election message containing the local node's ID,
 267  * (incremented) election counter, and uptime, and start the election timer.
 268  *
 269  * \param[in,out] cluster  Cluster with election
 270  *
 271  * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 272  *       all active peers do so, or if the election times out, the local node
 273  *       wins the election. (If we lose to any peer vote, we will stop the
 274  *       timer, so a timeout means we did not lose -- either some peer did not
 275  *       vote, or we did not call election_check() in time.)
 276  */
 277 void
 278 election_vote(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 279 {
 280     struct timeval age;
 281     xmlNode *vote = NULL;
 282     pcmk__node_status_t *our_node = NULL;
 283     const char *message_type = NULL;
 284 
 285     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
 286 
 287     if (cluster->priv->node_name == NULL) {
 288         crm_err("Cannot start an election: Local node name unknown");
 289         return;
 290     }
 291 
 292     our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
 293                               pcmk__node_search_cluster_member);
 294     if (!pcmk__cluster_is_node_active(our_node)) {
 295         crm_trace("Cannot vote yet: local node not connected to cluster");
 296         return;
 297     }
 298 
 299     election_reset(cluster);
 300     cluster->priv->election->state = election_in_progress;
 301     message_type = pcmk__server_message_type(cluster->priv->server);
 302 
 303     /* @COMPAT We use message_type as the sender and recipient system for
 304      * backward compatibility (see T566).
 305      */
 306     vote = pcmk__new_request(cluster->priv->server, message_type,
 307                              NULL, message_type, CRM_OP_VOTE, NULL);
 308 
 309     cluster->priv->election->count++;
 310     crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->xml_id);
 311     crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, cluster->priv->election->count);
 312 
 313     // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
 314     get_uptime(&age);
 315     crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
 316                         PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
 317 
 318     pcmk__cluster_send_message(NULL, cluster->priv->server, vote);
 319     pcmk__xml_free(vote);
 320 
 321     crm_debug("Started election round %u", cluster->priv->election->count);
 322     election_timeout_start(cluster);
 323     return;
 324 }
 325 
 326 /*!
 327  * \internal
 328  * \brief Check whether local node has won an election
 329  *
 330  * If all known peers have sent no-vote messages, stop the election timer, set
 331  * the election state to won, and call any registered win callback.
 332  *
 333  * \param[in,out] cluster  Cluster with election
 334  *
 335  * \return TRUE if local node has won, FALSE otherwise
 336  * \note If all known peers have sent no-vote messages, but the election owner
 337  *       does not call this function, the election will not be won (and the
 338  *       callback will not be called) until the election times out.
 339  * \note This should be called when election_count_vote() returns
 340  *       \c election_in_progress.
 341  */
 342 bool
 343 election_check(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 344 {
 345     int voted_size = 0;
 346     int num_members = 0;
 347 
 348     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL),
 349               return false);
 350 
 351     if (cluster->priv->election->voted == NULL) {
 352         crm_trace("Election check requested, but no votes received yet");
 353         return FALSE;
 354     }
 355 
 356     voted_size = g_hash_table_size(cluster->priv->election->voted);
 357     num_members = pcmk__cluster_num_active_nodes();
 358 
 359     /* in the case of #voted > #members, it is better to
 360      *   wait for the timeout and give the cluster time to
 361      *   stabilize
 362      */
 363     if (voted_size >= num_members) {
 364         /* we won and everyone has voted */
 365         election_timeout_stop(cluster);
 366         if (voted_size > num_members) {
 367             GHashTableIter gIter;
 368             const pcmk__node_status_t *node = NULL;
 369             char *key = NULL;
 370 
 371             crm_warn("Received too many votes in election");
 372             g_hash_table_iter_init(&gIter, pcmk__peer_cache);
 373             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
 374                 if (pcmk__cluster_is_node_active(node)) {
 375                     crm_warn("* expected vote: %s", node->name);
 376                 }
 377             }
 378 
 379             g_hash_table_iter_init(&gIter, cluster->priv->election->voted);
 380             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
 381                 crm_warn("* actual vote: %s", key);
 382             }
 383 
 384         }
 385 
 386         crm_info("Election won by local node");
 387         election_complete(cluster);
 388         return TRUE;
 389 
 390     } else {
 391         crm_debug("Election still waiting on %d of %d vote%s",
 392                   num_members - voted_size, num_members,
 393                   pcmk__plural_s(num_members));
 394     }
 395 
 396     return FALSE;
 397 }
 398 
 399 #define LOSS_DAMPEN 2           /* in seconds */
 400 
 401 struct vote {
 402     const char *op;
 403     const char *from;
 404     const char *version;
 405     const char *election_owner;
 406     int election_id;
 407     struct timeval age;
 408 };
 409 
 410 /*!
 411  * \internal
 412  * \brief Unpack an election message
 413  *
 414  * \param[in] message  Election message XML
 415  * \param[out] vote    Parsed fields from message
 416  *
 417  * \return TRUE if election message and election are valid, FALSE otherwise
 418  * \note The parsed struct's pointer members are valid only for the lifetime of
 419  *       the message argument.
 420  */
 421 static bool
 422 parse_election_message(const xmlNode *message, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424     CRM_CHECK(message && vote, return FALSE);
 425 
 426     vote->election_id = -1;
 427     vote->age.tv_sec = -1;
 428     vote->age.tv_usec = -1;
 429 
 430     vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
 431     vote->from = crm_element_value(message, PCMK__XA_SRC);
 432     vote->version = crm_element_value(message, PCMK_XA_VERSION);
 433     vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
 434 
 435     crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
 436 
 437     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
 438         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
 439 
 440         crm_warn("Invalid %s message from %s",
 441                  pcmk__s(vote->op, "election"),
 442                  pcmk__s(vote->from, "unspecified node"));
 443         crm_log_xml_trace(message, "bad-vote");
 444         return FALSE;
 445     }
 446 
 447     // Op-specific validation
 448 
 449     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
 450         /* Only vote ops have uptime.
 451            Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
 452          */
 453         crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
 454                                   PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
 455         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
 456             crm_warn("Cannot count election %s from %s "
 457                      "because it is missing uptime", vote->op, vote->from);
 458             return FALSE;
 459         }
 460 
 461     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
 462         crm_info("Cannot process election message from %s "
 463                  "because %s is not a known election op", vote->from, vote->op);
 464         return FALSE;
 465     }
 466 
 467     /* If the membership cache is NULL, we REALLY shouldn't be voting --
 468      * the question is how we managed to get here.
 469      */
 470     if (pcmk__peer_cache == NULL) {
 471         crm_info("Cannot count election %s from %s "
 472                  "because no peer information available", vote->op, vote->from);
 473         return FALSE;
 474     }
 475     return TRUE;
 476 }
 477 
 478 static void
 479 record_vote(pcmk_cluster_t *cluster, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 480 {
 481     pcmk__assert((vote->from != NULL) && (vote->op != NULL));
 482 
 483     if (cluster->priv->election->voted == NULL) {
 484         cluster->priv->election->voted = pcmk__strkey_table(free, free);
 485     }
 486     pcmk__insert_dup(cluster->priv->election->voted, vote->from, vote->op);
 487 }
 488 
 489 static void
 490 send_no_vote(pcmk_cluster_t *cluster, pcmk__node_status_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 491              struct vote *vote)
 492 {
 493     const char *message_type = NULL;
 494     xmlNode *novote = NULL;
 495 
 496     message_type = pcmk__server_message_type(cluster->priv->server);
 497     novote = pcmk__new_request(cluster->priv->server, message_type,
 498                                vote->from, message_type, CRM_OP_NOVOTE, NULL);
 499     crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
 500     crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
 501 
 502     pcmk__cluster_send_message(peer, cluster->priv->server, novote);
 503     pcmk__xml_free(novote);
 504 }
 505 
 506 /*!
 507  * \internal
 508  * \brief Process an election message (vote or no-vote) from a peer
 509  *
 510  * \param[in,out] cluster  Cluster with election
 511  * \param[in]     message  Election message XML from peer
 512  * \param[in]     can_win  Whether local node is eligible to win
 513  *
 514  * \return Election state after new vote is considered
 515  * \note If the peer message is a vote, and we prefer the peer to win, this will
 516  *       send a no-vote reply to the peer.
 517  * \note The situations "we lost to this vote" from "this is a late no-vote
 518  *       after we've already lost" both return election_lost. If a caller needs
 519  *       to distinguish them, it should save the current state before calling
 520  *       this function, and then compare the result.
 521  */
 522 enum election_result
 523 election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message,
     /* [previous][next][first][last][top][bottom][index][help] */
 524                     bool can_win)
 525 {
 526     int log_level = LOG_INFO;
 527     gboolean done = FALSE;
 528     gboolean we_lose = FALSE;
 529     const char *reason = "unknown";
 530     bool we_are_owner = FALSE;
 531     pcmk__node_status_t *our_node = NULL;
 532     pcmk__node_status_t *your_node = NULL;
 533     time_t tm_now = time(NULL);
 534     struct vote vote;
 535 
 536     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL)
 537               && (message != NULL) && (cluster->priv->node_name != NULL),
 538               return election_error);
 539 
 540     if (!parse_election_message(message, &vote)) {
 541         return election_error;
 542     }
 543 
 544     your_node = pcmk__get_node(0, vote.from, NULL,
 545                                pcmk__node_search_cluster_member);
 546     our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
 547                               pcmk__node_search_cluster_member);
 548     we_are_owner = (our_node != NULL)
 549                    && pcmk__str_eq(our_node->xml_id, vote.election_owner,
 550                                    pcmk__str_none);
 551 
 552     if (!can_win) {
 553         reason = "Not eligible";
 554         we_lose = TRUE;
 555 
 556     } else if (!pcmk__cluster_is_node_active(our_node)) {
 557         reason = "We are not part of the cluster";
 558         log_level = LOG_ERR;
 559         we_lose = TRUE;
 560 
 561     } else if (we_are_owner
 562                && (vote.election_id != cluster->priv->election->count)) {
 563         log_level = LOG_TRACE;
 564         reason = "Superseded";
 565         done = TRUE;
 566 
 567     } else if (!pcmk__cluster_is_node_active(your_node)) {
 568         /* Possibly we cached the message in the FSA queue at a point that it wasn't */
 569         reason = "Peer is not part of our cluster";
 570         log_level = LOG_WARNING;
 571         done = TRUE;
 572 
 573     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
 574                || pcmk__str_eq(vote.from, cluster->priv->node_name,
 575                                pcmk__str_casei)) {
 576         /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
 577          * for us to win
 578          */
 579         if (!we_are_owner) {
 580             crm_warn("Cannot count election round %d %s from %s "
 581                      "because we did not start election (node ID %s did)",
 582                      vote.election_id, vote.op, vote.from,
 583                      vote.election_owner);
 584             return election_error;
 585         }
 586         if (cluster->priv->election->state != election_in_progress) {
 587             // Should only happen if we already lost
 588             crm_debug("Not counting election round %d %s from %s "
 589                       "because no election in progress",
 590                       vote.election_id, vote.op, vote.from);
 591             return cluster->priv->election->state;
 592         }
 593         record_vote(cluster, &vote);
 594         reason = "Recorded";
 595         done = TRUE;
 596 
 597     } else {
 598         // A peer vote requires a comparison to determine which node is better
 599         int age_result = compare_age(vote.age);
 600         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
 601 
 602         if (version_result < 0) {
 603             reason = "Version";
 604             we_lose = TRUE;
 605 
 606         } else if (version_result > 0) {
 607             reason = "Version";
 608 
 609         } else if (age_result < 0) {
 610             reason = "Uptime";
 611             we_lose = TRUE;
 612 
 613         } else if (age_result > 0) {
 614             reason = "Uptime";
 615 
 616         } else if (strcasecmp(cluster->priv->node_name, vote.from) > 0) {
 617             reason = "Host name";
 618             we_lose = TRUE;
 619 
 620         } else {
 621             reason = "Host name";
 622         }
 623     }
 624 
 625     if (cluster->priv->election->expires < tm_now) {
 626         cluster->priv->election->election_wins = 0;
 627         cluster->priv->election->expires = tm_now + STORM_INTERVAL;
 628 
 629     } else if (done == FALSE && we_lose == FALSE) {
 630         int peers = 1 + g_hash_table_size(pcmk__peer_cache);
 631 
 632         /* If every node has to vote down every other node, thats N*(N-1) total elections
 633          * Allow some leeway before _really_ complaining
 634          */
 635         cluster->priv->election->election_wins++;
 636         if (cluster->priv->election->election_wins > (peers * peers)) {
 637             crm_warn("Election storm detected: %d wins in %d seconds",
 638                      cluster->priv->election->election_wins, STORM_INTERVAL);
 639             cluster->priv->election->election_wins = 0;
 640             cluster->priv->election->expires = tm_now + STORM_INTERVAL;
 641             if (!(cluster->priv->election->wrote_blackbox)) {
 642                 /* It's questionable whether a black box (from every node in the
 643                  * cluster) would be truly helpful in diagnosing an election
 644                  * storm. It's also highly doubtful a production environment
 645                  * would get multiple election storms from distinct causes, so
 646                  * saving one blackbox per process lifetime should be
 647                  * sufficient. Alternatives would be to save a timestamp of the
 648                  * last blackbox write instead of a boolean, and write a new one
 649                  * if some amount of time has passed; or to save a storm count,
 650                  * write a blackbox on every Nth occurrence.
 651                  */
 652                 crm_write_blackbox(0, NULL);
 653                 cluster->priv->election->wrote_blackbox = true;
 654             }
 655         }
 656     }
 657 
 658     if (done) {
 659         do_crm_log(log_level + 1,
 660                    "Processed election round %u %s (current round %d) "
 661                    "from %s (%s)",
 662                    vote.election_id, vote.op, cluster->priv->election->count,
 663                    vote.from, reason);
 664         return cluster->priv->election->state;
 665 
 666     } else if (we_lose == FALSE) {
 667         /* We track the time of the last election loss to implement an election
 668          * dampening period, reducing the likelihood of an election storm. If
 669          * this node has lost within the dampening period, don't start a new
 670          * election, even if we win against a peer's vote -- the peer we lost to
 671          * should win again.
 672          *
 673          * @TODO This has a problem case: if an election winner immediately
 674          * leaves the cluster, and a new election is immediately called, all
 675          * nodes could lose, with no new winner elected. The ideal solution
 676          * would be to tie the election structure with the peer caches, which
 677          * would allow us to clear the dampening when the previous winner
 678          * leaves (and would allow other improvements as well).
 679          */
 680         if ((cluster->priv->election->last_election_loss == 0)
 681             || ((tm_now - cluster->priv->election->last_election_loss)
 682                 > (time_t) LOSS_DAMPEN)) {
 683 
 684             do_crm_log(log_level,
 685                        "Election round %d (started by node ID %s) pass: "
 686                        "%s from %s (%s)",
 687                        vote.election_id, vote.election_owner, vote.op,
 688                        vote.from, reason);
 689 
 690             cluster->priv->election->last_election_loss = 0;
 691             election_timeout_stop(cluster);
 692 
 693             /* Start a new election by voting down this, and other, peers */
 694             cluster->priv->election->state = election_start;
 695             return cluster->priv->election->state;
 696         } else {
 697             char *loss_time = NULL;
 698 
 699             loss_time = ctime(&(cluster->priv->election->last_election_loss));
 700             if (loss_time) {
 701                 // Show only HH:MM:SS
 702                 loss_time += 11;
 703                 loss_time[8] = '\0';
 704             }
 705             crm_info("Ignoring election round %d (started by node ID %s) pass "
 706                      "vs %s because we lost less than %ds ago at %s",
 707                      vote.election_id, vote.election_owner, vote.from,
 708                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
 709         }
 710     }
 711 
 712     cluster->priv->election->last_election_loss = tm_now;
 713 
 714     do_crm_log(log_level,
 715                "Election round %d (started by node ID %s) lost: "
 716                "%s from %s (%s)",
 717                vote.election_id, vote.election_owner, vote.op,
 718                vote.from, reason);
 719 
 720     election_reset(cluster);
 721     send_no_vote(cluster, your_node, &vote);
 722     cluster->priv->election->state = election_lost;
 723     return cluster->priv->election->state;
 724 }
 725 
 726 /*!
 727  * \internal
 728  * \brief Reset any election dampening currently in effect
 729  *
 730  * \param[in,out] cluster  Cluster with election
 731  */
 732 void
 733 election_clear_dampening(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 734 {
 735     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 736         cluster->priv->election->last_election_loss = 0;
 737     }
 738 }

/* [previous][next][first][last][top][bottom][index][help] */