root/lib/cluster/election.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. election_complete
  2. election_timer_cb
  3. election_state
  4. election_init
  5. election_remove
  6. election_reset
  7. election_fini
  8. election_timeout_start
  9. election_timeout_stop
  10. election_timeout_set_period
  11. get_uptime
  12. compare_age
  13. election_vote
  14. election_check
  15. parse_election_message
  16. record_vote
  17. send_no_vote
  18. election_count_vote
  19. election_clear_dampening

   1 /*
   2  * Copyright 2004-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/time.h>
  13 #include <sys/resource.h>
  14 
  15 #include <crm/crm.h>
  16 #include <crm/common/mainloop.h>
  17 #include <crm/common/xml.h>
  18 
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include "crmcluster_private.h"
  22 
  23 #define STORM_INTERVAL   2      /* in seconds */
  24 
  25 struct pcmk__election {
  26     enum election_result state;     // Current state of election
  27     guint count;                    // How many times local node has voted
  28     void (*cb)(pcmk_cluster_t *);   // Function to call if election is won
  29     GHashTable *voted;  // Key = node name, value = how node voted
  30     mainloop_timer_t *timeout; // When to abort if all votes not received
  31     int election_wins;         // Track wins, for storm detection
  32     bool wrote_blackbox;       // Write a storm blackbox at most once
  33     time_t expires;            // When storm detection period ends
  34     time_t last_election_loss; // When dampening period ends
  35 };
  36 
  37 static void
  38 election_complete(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
  39 {
  40     pcmk__assert((cluster != NULL) && (cluster->priv->election != NULL));
  41     cluster->priv->election->state = election_won;
  42     if (cluster->priv->election->cb != NULL) {
  43         cluster->priv->election->cb(cluster);
  44     }
  45     election_reset(cluster);
  46 }
  47 
  48 static gboolean
  49 election_timer_cb(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51     pcmk_cluster_t *cluster = user_data;
  52 
  53     crm_info("Declaring local node as winner after election timed out");
  54     election_complete(cluster);
  55     return FALSE;
  56 }
  57 
  58 /*!
  59  * \internal
  60  * \brief Get current state of an election
  61  *
  62  * \param[in] cluster  Cluster with election
  63  *
  64  * \return Current state of \e
  65  */
  66 enum election_result
  67 election_state(const pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
  68 {
  69     if ((cluster == NULL) || (cluster->priv->election == NULL)) {
  70         return election_error;
  71     }
  72     return cluster->priv->election->state;
  73 }
  74 
  75 /* The local node will be declared the winner if missing votes are not received
  76  * within this time. The value is chosen to be the same as the default for the
  77  * election-timeout cluster option.
  78  */
  79 #define ELECTION_TIMEOUT_MS 120000
  80 
  81 /*!
  82  * \internal
  83  * \brief Track election state in a cluster
  84  *
  85  * Every node that wishes to participate in an election must initialize the
  86  * election once, typically at start-up.
  87  *
  88  * \param[in] cluster    Cluster that election is for
  89  * \param[in] cb         Function to call if local node wins election
  90  */
  91 void
  92 election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *))
     /* [previous][next][first][last][top][bottom][index][help] */
  93 {
  94     const char *name = pcmk__s(crm_system_name, "election");
  95 
  96     CRM_CHECK(cluster->priv->election == NULL, return);
  97 
  98     cluster->priv->election = pcmk__assert_alloc(1, sizeof(pcmk__election_t));
  99     cluster->priv->election->cb = cb;
 100     cluster->priv->election->timeout = mainloop_timer_add(name,
 101                                                           ELECTION_TIMEOUT_MS,
 102                                                           FALSE,
 103                                                           election_timer_cb,
 104                                                           cluster);
 105 }
 106 
 107 /*!
 108  * \internal
 109  * \brief Disregard any previous vote by specified peer
 110  *
 111  * This discards any recorded vote from a specified peer. Election users should
 112  * call this whenever a voting peer becomes inactive.
 113  *
 114  * \param[in,out] cluster  Cluster with election
 115  * \param[in]     uname    Name of peer to disregard
 116  */
 117 void
 118 election_remove(pcmk_cluster_t *cluster, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     if ((cluster != NULL) && (cluster->priv->election != NULL)
 121         && (uname != NULL) && (cluster->priv->election->voted != NULL)) {
 122         crm_trace("Discarding (no-)vote from lost peer %s", uname);
 123         g_hash_table_remove(cluster->priv->election->voted, uname);
 124     }
 125 }
 126 
 127 /*!
 128  * \internal
 129  * \brief Stop election timer and disregard all votes
 130  *
 131  * \param[in,out] cluster  Cluster with election
 132  */
 133 void
 134 election_reset(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 137         crm_trace("Resetting election");
 138         mainloop_timer_stop(cluster->priv->election->timeout);
 139         if (cluster->priv->election->voted != NULL) {
 140             g_hash_table_destroy(cluster->priv->election->voted);
 141             cluster->priv->election->voted = NULL;
 142         }
 143     }
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Free an election object
 149  *
 150  * Free all memory associated with an election object, stopping its
 151  * election timer (if running).
 152  *
 153  * \param[in,out] cluster  Cluster with election
 154  */
 155 void
 156 election_fini(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 159         election_reset(cluster);
 160         crm_trace("Destroying election");
 161         mainloop_timer_del(cluster->priv->election->timeout);
 162         free(cluster->priv->election);
 163         cluster->priv->election = NULL;
 164     }
 165 }
 166 
 167 static void
 168 election_timeout_start(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170     mainloop_timer_start(cluster->priv->election->timeout);
 171 }
 172 
 173 /*!
 174  * \internal
 175  * \brief Stop an election's timer, if running
 176  *
 177  * \param[in,out] cluster  Cluster with election
 178  */
 179 void
 180 election_timeout_stop(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 181 {
 182     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 183         mainloop_timer_stop(cluster->priv->election->timeout);
 184     }
 185 }
 186 
 187 /*!
 188  * \internal
 189  * \brief Change an election's timeout (restarting timer if running)
 190  *
 191  * \param[in,out] cluster  Cluster with election
 192  * \param[in]     period   New timeout
 193  */
 194 void
 195 election_timeout_set_period(pcmk_cluster_t *cluster, guint period)
     /* [previous][next][first][last][top][bottom][index][help] */
 196 {
 197     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
 198     mainloop_timer_set_period(cluster->priv->election->timeout, period);
 199 }
 200 
 201 static int
 202 get_uptime(struct timeval *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204     static time_t expires = 0;
 205     static struct rusage info;
 206 
 207     time_t tm_now = time(NULL);
 208 
 209     if (expires < tm_now) {
 210         int rc = 0;
 211 
 212         info.ru_utime.tv_sec = 0;
 213         info.ru_utime.tv_usec = 0;
 214         rc = getrusage(RUSAGE_SELF, &info);
 215 
 216         output->tv_sec = 0;
 217         output->tv_usec = 0;
 218 
 219         if (rc < 0) {
 220             crm_perror(LOG_ERR, "Could not calculate the current uptime");
 221             expires = 0;
 222             return -1;
 223         }
 224 
 225         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
 226                   (long)info.ru_utime.tv_usec);
 227     }
 228 
 229     expires = tm_now + STORM_INTERVAL;  /* N seconds after the last _access_ */
 230     output->tv_sec = info.ru_utime.tv_sec;
 231     output->tv_usec = info.ru_utime.tv_usec;
 232 
 233     return 1;
 234 }
 235 
 236 static int
 237 compare_age(struct timeval your_age)
     /* [previous][next][first][last][top][bottom][index][help] */
 238 {
 239     struct timeval our_age;
 240 
 241     get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
 242 
 243     if (our_age.tv_sec > your_age.tv_sec) {
 244         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 245         return 1;
 246     } else if (our_age.tv_sec < your_age.tv_sec) {
 247         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 248         return -1;
 249     } else if (our_age.tv_usec > your_age.tv_usec) {
 250         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
 251                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 252         return 1;
 253     } else if (our_age.tv_usec < your_age.tv_usec) {
 254         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
 255                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 256         return -1;
 257     }
 258 
 259     return 0;
 260 }
 261 
 262 /*!
 263  * \internal
 264  * \brief Start a new election by offering local node's candidacy
 265  *
 266  * Broadcast a "vote" election message containing the local node's ID,
 267  * (incremented) election counter, and uptime, and start the election timer.
 268  *
 269  * \param[in,out] cluster  Cluster with election
 270  *
 271  * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 272  *       all active peers do so, or if the election times out, the local node
 273  *       wins the election. (If we lose to any peer vote, we will stop the
 274  *       timer, so a timeout means we did not lose -- either some peer did not
 275  *       vote, or we did not call election_check() in time.)
 276  */
 277 void
 278 election_vote(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 279 {
 280     struct timeval age;
 281     xmlNode *vote = NULL;
 282     pcmk__node_status_t *our_node = NULL;
 283     const char *message_type = NULL;
 284 
 285     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL), return);
 286 
 287     if (cluster->priv->node_name == NULL) {
 288         crm_err("Cannot start an election: Local node name unknown");
 289         return;
 290     }
 291 
 292     our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
 293                               pcmk__node_search_cluster_member);
 294     if (!pcmk__cluster_is_node_active(our_node)) {
 295         crm_trace("Cannot vote yet: local node not connected to cluster");
 296         return;
 297     }
 298 
 299     election_reset(cluster);
 300     cluster->priv->election->state = election_in_progress;
 301     message_type = pcmk__server_message_type(cluster->priv->server);
 302 
 303     /* @COMPAT We use message_type as the sender and recipient system for
 304      * backward compatibility (see T566).
 305      */
 306     vote = pcmk__new_request(cluster->priv->server, message_type,
 307                              NULL, message_type, CRM_OP_VOTE, NULL);
 308 
 309     cluster->priv->election->count++;
 310     crm_xml_add(vote, PCMK__XA_ELECTION_OWNER,
 311                 pcmk__cluster_get_xml_id(our_node));
 312     crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, cluster->priv->election->count);
 313 
 314     // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
 315     get_uptime(&age);
 316     crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
 317                         PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
 318 
 319     pcmk__cluster_send_message(NULL, cluster->priv->server, vote);
 320     pcmk__xml_free(vote);
 321 
 322     crm_debug("Started election round %u", cluster->priv->election->count);
 323     election_timeout_start(cluster);
 324     return;
 325 }
 326 
 327 /*!
 328  * \internal
 329  * \brief Check whether local node has won an election
 330  *
 331  * If all known peers have sent no-vote messages, stop the election timer, set
 332  * the election state to won, and call any registered win callback.
 333  *
 334  * \param[in,out] cluster  Cluster with election
 335  *
 336  * \return TRUE if local node has won, FALSE otherwise
 337  * \note If all known peers have sent no-vote messages, but the election owner
 338  *       does not call this function, the election will not be won (and the
 339  *       callback will not be called) until the election times out.
 340  * \note This should be called when election_count_vote() returns
 341  *       \c election_in_progress.
 342  */
 343 bool
 344 election_check(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 345 {
 346     int voted_size = 0;
 347     int num_members = 0;
 348 
 349     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL),
 350               return false);
 351 
 352     if (cluster->priv->election->voted == NULL) {
 353         crm_trace("Election check requested, but no votes received yet");
 354         return FALSE;
 355     }
 356 
 357     voted_size = g_hash_table_size(cluster->priv->election->voted);
 358     num_members = pcmk__cluster_num_active_nodes();
 359 
 360     /* in the case of #voted > #members, it is better to
 361      *   wait for the timeout and give the cluster time to
 362      *   stabilize
 363      */
 364     if (voted_size >= num_members) {
 365         /* we won and everyone has voted */
 366         election_timeout_stop(cluster);
 367         if (voted_size > num_members) {
 368             GHashTableIter gIter;
 369             const pcmk__node_status_t *node = NULL;
 370             char *key = NULL;
 371 
 372             crm_warn("Received too many votes in election");
 373             g_hash_table_iter_init(&gIter, pcmk__peer_cache);
 374             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
 375                 if (pcmk__cluster_is_node_active(node)) {
 376                     crm_warn("* expected vote: %s", node->name);
 377                 }
 378             }
 379 
 380             g_hash_table_iter_init(&gIter, cluster->priv->election->voted);
 381             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
 382                 crm_warn("* actual vote: %s", key);
 383             }
 384 
 385         }
 386 
 387         crm_info("Election won by local node");
 388         election_complete(cluster);
 389         return TRUE;
 390 
 391     } else {
 392         crm_debug("Election still waiting on %d of %d vote%s",
 393                   num_members - voted_size, num_members,
 394                   pcmk__plural_s(num_members));
 395     }
 396 
 397     return FALSE;
 398 }
 399 
 400 #define LOSS_DAMPEN 2           /* in seconds */
 401 
 402 struct vote {
 403     const char *op;
 404     const char *from;
 405     const char *version;
 406     const char *election_owner;
 407     int election_id;
 408     struct timeval age;
 409 };
 410 
 411 /*!
 412  * \internal
 413  * \brief Unpack an election message
 414  *
 415  * \param[in] message  Election message XML
 416  * \param[out] vote    Parsed fields from message
 417  *
 418  * \return TRUE if election message and election are valid, FALSE otherwise
 419  * \note The parsed struct's pointer members are valid only for the lifetime of
 420  *       the message argument.
 421  */
 422 static bool
 423 parse_election_message(const xmlNode *message, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 424 {
 425     CRM_CHECK(message && vote, return FALSE);
 426 
 427     vote->election_id = -1;
 428     vote->age.tv_sec = -1;
 429     vote->age.tv_usec = -1;
 430 
 431     vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
 432     vote->from = crm_element_value(message, PCMK__XA_SRC);
 433     vote->version = crm_element_value(message, PCMK_XA_VERSION);
 434     vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
 435 
 436     crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
 437 
 438     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
 439         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
 440 
 441         crm_warn("Invalid %s message from %s",
 442                  pcmk__s(vote->op, "election"),
 443                  pcmk__s(vote->from, "unspecified node"));
 444         crm_log_xml_trace(message, "bad-vote");
 445         return FALSE;
 446     }
 447 
 448     // Op-specific validation
 449 
 450     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
 451         /* Only vote ops have uptime.
 452            Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
 453          */
 454         crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
 455                                   PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
 456         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
 457             crm_warn("Cannot count election %s from %s "
 458                      "because it is missing uptime", vote->op, vote->from);
 459             return FALSE;
 460         }
 461 
 462     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
 463         crm_info("Cannot process election message from %s "
 464                  "because %s is not a known election op", vote->from, vote->op);
 465         return FALSE;
 466     }
 467 
 468     /* If the membership cache is NULL, we REALLY shouldn't be voting --
 469      * the question is how we managed to get here.
 470      */
 471     if (pcmk__peer_cache == NULL) {
 472         crm_info("Cannot count election %s from %s "
 473                  "because no peer information available", vote->op, vote->from);
 474         return FALSE;
 475     }
 476     return TRUE;
 477 }
 478 
 479 static void
 480 record_vote(pcmk_cluster_t *cluster, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 481 {
 482     pcmk__assert((vote->from != NULL) && (vote->op != NULL));
 483 
 484     if (cluster->priv->election->voted == NULL) {
 485         cluster->priv->election->voted = pcmk__strkey_table(free, free);
 486     }
 487     pcmk__insert_dup(cluster->priv->election->voted, vote->from, vote->op);
 488 }
 489 
 490 static void
 491 send_no_vote(pcmk_cluster_t *cluster, pcmk__node_status_t *peer,
     /* [previous][next][first][last][top][bottom][index][help] */
 492              struct vote *vote)
 493 {
 494     const char *message_type = NULL;
 495     xmlNode *novote = NULL;
 496 
 497     message_type = pcmk__server_message_type(cluster->priv->server);
 498     novote = pcmk__new_request(cluster->priv->server, message_type,
 499                                vote->from, message_type, CRM_OP_NOVOTE, NULL);
 500     crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
 501     crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
 502 
 503     pcmk__cluster_send_message(peer, cluster->priv->server, novote);
 504     pcmk__xml_free(novote);
 505 }
 506 
 507 /*!
 508  * \internal
 509  * \brief Process an election message (vote or no-vote) from a peer
 510  *
 511  * \param[in,out] cluster  Cluster with election
 512  * \param[in]     message  Election message XML from peer
 513  * \param[in]     can_win  Whether local node is eligible to win
 514  *
 515  * \return Election state after new vote is considered
 516  * \note If the peer message is a vote, and we prefer the peer to win, this will
 517  *       send a no-vote reply to the peer.
 518  * \note The situations "we lost to this vote" from "this is a late no-vote
 519  *       after we've already lost" both return election_lost. If a caller needs
 520  *       to distinguish them, it should save the current state before calling
 521  *       this function, and then compare the result.
 522  */
 523 enum election_result
 524 election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message,
     /* [previous][next][first][last][top][bottom][index][help] */
 525                     bool can_win)
 526 {
 527     int log_level = LOG_INFO;
 528     gboolean done = FALSE;
 529     gboolean we_lose = FALSE;
 530     const char *reason = NULL;
 531     bool we_are_owner = FALSE;
 532     pcmk__node_status_t *our_node = NULL;
 533     pcmk__node_status_t *your_node = NULL;
 534     time_t tm_now = time(NULL);
 535     struct vote vote;
 536 
 537     CRM_CHECK((cluster != NULL) && (cluster->priv->election != NULL)
 538               && (message != NULL) && (cluster->priv->node_name != NULL),
 539               return election_error);
 540 
 541     if (!parse_election_message(message, &vote)) {
 542         return election_error;
 543     }
 544 
 545     your_node = pcmk__get_node(0, vote.from, NULL,
 546                                pcmk__node_search_cluster_member);
 547     our_node = pcmk__get_node(0, cluster->priv->node_name, NULL,
 548                               pcmk__node_search_cluster_member);
 549     we_are_owner = (our_node != NULL)
 550                    && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node),
 551                                    vote.election_owner, pcmk__str_none);
 552 
 553     if (!can_win) {
 554         reason = "Not eligible";
 555         we_lose = TRUE;
 556 
 557     } else if (!pcmk__cluster_is_node_active(our_node)) {
 558         reason = "We are not part of the cluster";
 559         log_level = LOG_ERR;
 560         we_lose = TRUE;
 561 
 562     } else if (we_are_owner
 563                && (vote.election_id != cluster->priv->election->count)) {
 564         log_level = LOG_TRACE;
 565         reason = "Superseded";
 566         done = TRUE;
 567 
 568     } else if (!pcmk__cluster_is_node_active(your_node)) {
 569         /* Possibly we cached the message in the FSA queue at a point that it wasn't */
 570         reason = "Peer is not part of our cluster";
 571         log_level = LOG_WARNING;
 572         done = TRUE;
 573 
 574     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
 575                || pcmk__str_eq(vote.from, cluster->priv->node_name,
 576                                pcmk__str_casei)) {
 577         /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
 578          * for us to win
 579          */
 580         if (!we_are_owner) {
 581             crm_warn("Cannot count election round %d %s from %s "
 582                      "because we did not start election (node ID %s did)",
 583                      vote.election_id, vote.op, vote.from,
 584                      vote.election_owner);
 585             return election_error;
 586         }
 587         if (cluster->priv->election->state != election_in_progress) {
 588             // Should only happen if we already lost
 589             crm_debug("Not counting election round %d %s from %s "
 590                       "because no election in progress",
 591                       vote.election_id, vote.op, vote.from);
 592             return cluster->priv->election->state;
 593         }
 594         record_vote(cluster, &vote);
 595         reason = "Recorded";
 596         done = TRUE;
 597 
 598     } else {
 599         // A peer vote requires a comparison to determine which node is better
 600         int age_result = compare_age(vote.age);
 601         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
 602 
 603         if (version_result < 0) {
 604             reason = "Version";
 605             we_lose = TRUE;
 606 
 607         } else if (version_result > 0) {
 608             reason = "Version";
 609 
 610         } else if (age_result < 0) {
 611             reason = "Uptime";
 612             we_lose = TRUE;
 613 
 614         } else if (age_result > 0) {
 615             reason = "Uptime";
 616 
 617         } else if (strcasecmp(cluster->priv->node_name, vote.from) > 0) {
 618             reason = "Host name";
 619             we_lose = TRUE;
 620 
 621         } else {
 622             reason = "Host name";
 623         }
 624     }
 625 
 626     if (cluster->priv->election->expires < tm_now) {
 627         cluster->priv->election->election_wins = 0;
 628         cluster->priv->election->expires = tm_now + STORM_INTERVAL;
 629 
 630     } else if (done == FALSE && we_lose == FALSE) {
 631         int peers = 1 + g_hash_table_size(pcmk__peer_cache);
 632 
 633         /* If every node has to vote down every other node, thats N*(N-1) total elections
 634          * Allow some leeway before _really_ complaining
 635          */
 636         cluster->priv->election->election_wins++;
 637         if (cluster->priv->election->election_wins > (peers * peers)) {
 638             crm_warn("Election storm detected: %d wins in %d seconds",
 639                      cluster->priv->election->election_wins, STORM_INTERVAL);
 640             cluster->priv->election->election_wins = 0;
 641             cluster->priv->election->expires = tm_now + STORM_INTERVAL;
 642             if (!(cluster->priv->election->wrote_blackbox)) {
 643                 /* It's questionable whether a black box (from every node in the
 644                  * cluster) would be truly helpful in diagnosing an election
 645                  * storm. It's also highly doubtful a production environment
 646                  * would get multiple election storms from distinct causes, so
 647                  * saving one blackbox per process lifetime should be
 648                  * sufficient. Alternatives would be to save a timestamp of the
 649                  * last blackbox write instead of a boolean, and write a new one
 650                  * if some amount of time has passed; or to save a storm count,
 651                  * write a blackbox on every Nth occurrence.
 652                  */
 653                 crm_write_blackbox(0, NULL);
 654                 cluster->priv->election->wrote_blackbox = true;
 655             }
 656         }
 657     }
 658 
 659     if (done) {
 660         do_crm_log(log_level + 1,
 661                    "Processed election round %u %s (current round %d) "
 662                    "from %s (%s)",
 663                    vote.election_id, vote.op, cluster->priv->election->count,
 664                    vote.from, reason);
 665         return cluster->priv->election->state;
 666 
 667     } else if (we_lose == FALSE) {
 668         /* We track the time of the last election loss to implement an election
 669          * dampening period, reducing the likelihood of an election storm. If
 670          * this node has lost within the dampening period, don't start a new
 671          * election, even if we win against a peer's vote -- the peer we lost to
 672          * should win again.
 673          *
 674          * @TODO This has a problem case: if an election winner immediately
 675          * leaves the cluster, and a new election is immediately called, all
 676          * nodes could lose, with no new winner elected. The ideal solution
 677          * would be to tie the election structure with the peer caches, which
 678          * would allow us to clear the dampening when the previous winner
 679          * leaves (and would allow other improvements as well).
 680          */
 681         if ((cluster->priv->election->last_election_loss == 0)
 682             || ((tm_now - cluster->priv->election->last_election_loss)
 683                 > (time_t) LOSS_DAMPEN)) {
 684 
 685             do_crm_log(log_level,
 686                        "Election round %d (started by node ID %s) pass: "
 687                        "%s from %s (%s)",
 688                        vote.election_id, vote.election_owner, vote.op,
 689                        vote.from, reason);
 690 
 691             cluster->priv->election->last_election_loss = 0;
 692             election_timeout_stop(cluster);
 693 
 694             /* Start a new election by voting down this, and other, peers */
 695             cluster->priv->election->state = election_start;
 696             return cluster->priv->election->state;
 697         } else {
 698             char *loss_time = NULL;
 699 
 700             loss_time = ctime(&(cluster->priv->election->last_election_loss));
 701             if (loss_time) {
 702                 // Show only HH:MM:SS
 703                 loss_time += 11;
 704                 loss_time[8] = '\0';
 705             }
 706             crm_info("Ignoring election round %d (started by node ID %s) pass "
 707                      "vs %s because we lost less than %ds ago at %s",
 708                      vote.election_id, vote.election_owner, vote.from,
 709                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
 710         }
 711     }
 712 
 713     cluster->priv->election->last_election_loss = tm_now;
 714 
 715     do_crm_log(log_level,
 716                "Election round %d (started by node ID %s) lost: "
 717                "%s from %s (%s)",
 718                vote.election_id, vote.election_owner, vote.op,
 719                vote.from, reason);
 720 
 721     election_reset(cluster);
 722     send_no_vote(cluster, your_node, &vote);
 723     cluster->priv->election->state = election_lost;
 724     return cluster->priv->election->state;
 725 }
 726 
 727 /*!
 728  * \internal
 729  * \brief Reset any election dampening currently in effect
 730  *
 731  * \param[in,out] cluster  Cluster with election
 732  */
 733 void
 734 election_clear_dampening(pcmk_cluster_t *cluster)
     /* [previous][next][first][last][top][bottom][index][help] */
 735 {
 736     if ((cluster != NULL) && (cluster->priv->election != NULL)) {
 737         cluster->priv->election->last_election_loss = 0;
 738     }
 739 }

/* [previous][next][first][last][top][bottom][index][help] */