root/lib/cluster/election.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. election_complete
  2. election_timer_cb
  3. election_state
  4. election_init
  5. election_remove
  6. election_reset
  7. election_fini
  8. election_timeout_start
  9. election_timeout_stop
  10. election_timeout_set_period
  11. get_uptime
  12. compare_age
  13. election_vote
  14. election_check
  15. parse_election_message
  16. record_vote
  17. send_no_vote
  18. election_count_vote
  19. election_clear_dampening

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/time.h>
  13 #include <sys/resource.h>
  14 
  15 #include <crm/common/xml.h>
  16 
  17 #include <crm/common/mainloop.h>
  18 #include <crm/cluster/internal.h>
  19 #include <crm/cluster/election_internal.h>
  20 #include <crm/crm.h>
  21 
  22 #define STORM_INTERVAL   2      /* in seconds */
  23 
  24 struct election_s {
  25     enum election_result state;
  26     guint count;        // How many times local node has voted
  27     char *name;         // Descriptive name for this election
  28     char *uname;        // Local node's name
  29     GSourceFunc cb;     // Function to call if election is won
  30     GHashTable *voted;  // Key = node name, value = how node voted
  31     mainloop_timer_t *timeout; // When to abort if all votes not received
  32     int election_wins;         // Track wins, for storm detection
  33     bool wrote_blackbox;       // Write a storm blackbox at most once
  34     time_t expires;            // When storm detection period ends
  35     time_t last_election_loss; // When dampening period ends
  36 };
  37 
  38 static void
  39 election_complete(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
  40 {
  41     e->state = election_won;
  42     if (e->cb != NULL) {
  43         e->cb(e);
  44     }
  45     election_reset(e);
  46 }
  47 
  48 static gboolean
  49 election_timer_cb(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  50 {
  51     election_t *e = user_data;
  52 
  53     crm_info("%s timed out, declaring local node as winner", e->name);
  54     election_complete(e);
  55     return FALSE;
  56 }
  57 
  58 /*!
  59  * \brief Get current state of an election
  60  *
  61  * \param[in] e  Election object
  62  *
  63  * \return Current state of \e
  64  */
  65 enum election_result
  66 election_state(const election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68     return (e == NULL)? election_error : e->state;
  69 }
  70 
  71 /*!
  72  * \brief Create a new election object
  73  *
  74  * Every node that wishes to participate in an election must create an election
  75  * object. Typically, this should be done once, at start-up. A caller should
  76  * only create a single election object.
  77  *
  78  * \param[in] name       Label for election (for logging)
  79  * \param[in] uname      Local node's name
  80  * \param[in] period_ms  How long to wait for all peers to vote
  81  * \param[in] cb         Function to call if local node wins election
  82  *
  83  * \return Newly allocated election object on success, NULL on error
  84  * \note The caller is responsible for freeing the returned value using
  85  *       election_fini().
  86  */
  87 election_t *
  88 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
     /* [previous][next][first][last][top][bottom][index][help] */
  89 {
  90     election_t *e = NULL;
  91 
  92     static guint count = 0;
  93 
  94     CRM_CHECK(uname != NULL, return NULL);
  95 
  96     e = calloc(1, sizeof(election_t));
  97     if (e == NULL) {
  98         crm_perror(LOG_CRIT, "Cannot create election");
  99         return NULL;
 100     }
 101 
 102     e->uname = strdup(uname);
 103     if (e->uname == NULL) {
 104         crm_perror(LOG_CRIT, "Cannot create election");
 105         free(e);
 106         return NULL;
 107     }
 108 
 109     e->name = name? crm_strdup_printf("election-%s", name)
 110                   : crm_strdup_printf("election-%u", count++);
 111     e->cb = cb;
 112     e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
 113                                     election_timer_cb, e);
 114     crm_trace("Created %s", e->name);
 115     return e;
 116 }
 117 
 118 /*!
 119  * \brief Disregard any previous vote by specified peer
 120  *
 121  * This discards any recorded vote from a specified peer. Election users should
 122  * call this whenever a voting peer becomes inactive.
 123  *
 124  * \param[in,out] e      Election object
 125  * \param[in]     uname  Name of peer to disregard
 126  */
 127 void
 128 election_remove(election_t *e, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 129 {
 130     if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
 131         crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
 132         g_hash_table_remove(e->voted, uname);
 133     }
 134 }
 135 
 136 /*!
 137  * \brief Stop election timer and disregard all votes
 138  *
 139  * \param[in,out] e  Election object
 140  */
 141 void
 142 election_reset(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 143 {
 144     if (e != NULL) {
 145         crm_trace("Resetting election %s", e->name);
 146         mainloop_timer_stop(e->timeout);
 147         if (e->voted) {
 148             crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
 149             g_hash_table_destroy(e->voted);
 150             e->voted = NULL;
 151         }
 152     }
 153 }
 154 
 155 /*!
 156  * \brief Free an election object
 157  *
 158  * Free all memory associated with an election object, stopping its
 159  * election timer (if running).
 160  *
 161  * \param[in,out] e  Election object
 162  */
 163 void
 164 election_fini(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166     if (e != NULL) {
 167         election_reset(e);
 168         crm_trace("Destroying %s", e->name);
 169         mainloop_timer_del(e->timeout);
 170         free(e->uname);
 171         free(e->name);
 172         free(e);
 173     }
 174 }
 175 
 176 static void
 177 election_timeout_start(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 178 {
 179     if (e != NULL) {
 180         mainloop_timer_start(e->timeout);
 181     }
 182 }
 183 
 184 /*!
 185  * \brief Stop an election's timer, if running
 186  *
 187  * \param[in,out] e  Election object
 188  */
 189 void
 190 election_timeout_stop(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 191 {
 192     if (e != NULL) {
 193         mainloop_timer_stop(e->timeout);
 194     }
 195 }
 196 
 197 /*!
 198  * \brief Change an election's timeout (restarting timer if running)
 199  *
 200  * \param[in,out] e       Election object
 201  * \param[in]     period  New timeout
 202  */
 203 void
 204 election_timeout_set_period(election_t *e, guint period)
     /* [previous][next][first][last][top][bottom][index][help] */
 205 {
 206     if (e != NULL) {
 207         mainloop_timer_set_period(e->timeout, period);
 208     } else {
 209         crm_err("No election defined");
 210     }
 211 }
 212 
 213 static int
 214 get_uptime(struct timeval *output)
     /* [previous][next][first][last][top][bottom][index][help] */
 215 {
 216     static time_t expires = 0;
 217     static struct rusage info;
 218 
 219     time_t tm_now = time(NULL);
 220 
 221     if (expires < tm_now) {
 222         int rc = 0;
 223 
 224         info.ru_utime.tv_sec = 0;
 225         info.ru_utime.tv_usec = 0;
 226         rc = getrusage(RUSAGE_SELF, &info);
 227 
 228         output->tv_sec = 0;
 229         output->tv_usec = 0;
 230 
 231         if (rc < 0) {
 232             crm_perror(LOG_ERR, "Could not calculate the current uptime");
 233             expires = 0;
 234             return -1;
 235         }
 236 
 237         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
 238                   (long)info.ru_utime.tv_usec);
 239     }
 240 
 241     expires = tm_now + STORM_INTERVAL;  /* N seconds after the last _access_ */
 242     output->tv_sec = info.ru_utime.tv_sec;
 243     output->tv_usec = info.ru_utime.tv_usec;
 244 
 245     return 1;
 246 }
 247 
 248 static int
 249 compare_age(struct timeval your_age)
     /* [previous][next][first][last][top][bottom][index][help] */
 250 {
 251     struct timeval our_age;
 252 
 253     get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
 254 
 255     if (our_age.tv_sec > your_age.tv_sec) {
 256         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 257         return 1;
 258     } else if (our_age.tv_sec < your_age.tv_sec) {
 259         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 260         return -1;
 261     } else if (our_age.tv_usec > your_age.tv_usec) {
 262         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
 263                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 264         return 1;
 265     } else if (our_age.tv_usec < your_age.tv_usec) {
 266         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
 267                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 268         return -1;
 269     }
 270 
 271     return 0;
 272 }
 273 
 274 /*!
 275  * \brief Start a new election by offering local node's candidacy
 276  *
 277  * Broadcast a "vote" election message containing the local node's ID,
 278  * (incremented) election counter, and uptime, and start the election timer.
 279  *
 280  * \param[in,out] e  Election object
 281  *
 282  * \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
 283  *       all active peers do so, or if the election times out, the local node
 284  *       wins the election. (If we lose to any peer vote, we will stop the
 285  *       timer, so a timeout means we did not lose -- either some peer did not
 286  *       vote, or we did not call election_check() in time.)
 287  */
 288 void
 289 election_vote(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 290 {
 291     struct timeval age;
 292     xmlNode *vote = NULL;
 293     crm_node_t *our_node;
 294 
 295     if (e == NULL) {
 296         crm_trace("Election vote requested, but no election available");
 297         return;
 298     }
 299 
 300     our_node = pcmk__get_node(0, e->uname, NULL,
 301                               pcmk__node_search_cluster_member);
 302     if (!pcmk__cluster_is_node_active(our_node)) {
 303         crm_trace("Cannot vote in %s yet: local node not connected to cluster",
 304                   e->name);
 305         return;
 306     }
 307 
 308     election_reset(e);
 309     e->state = election_in_progress;
 310     vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 311 
 312     e->count++;
 313     crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
 314     crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
 315 
 316     // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
 317     get_uptime(&age);
 318     crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
 319                         PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
 320 
 321     pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
 322     free_xml(vote);
 323 
 324     crm_debug("Started %s round %d", e->name, e->count);
 325     election_timeout_start(e);
 326     return;
 327 }
 328 
 329 /*!
 330  * \brief Check whether local node has won an election
 331  *
 332  * If all known peers have sent no-vote messages, stop the election timer, set
 333  * the election state to won, and call any registered win callback.
 334  *
 335  * \param[in,out] e  Election object
 336  *
 337  * \return TRUE if local node has won, FALSE otherwise
 338  * \note If all known peers have sent no-vote messages, but the election owner
 339  *       does not call this function, the election will not be won (and the
 340  *       callback will not be called) until the election times out.
 341  * \note This should be called when election_count_vote() returns
 342  *       \c election_in_progress.
 343  */
 344 bool
 345 election_check(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 346 {
 347     int voted_size = 0;
 348     int num_members = 0;
 349 
 350     if (e == NULL) {
 351         crm_trace("Election check requested, but no election available");
 352         return FALSE;
 353     }
 354     if (e->voted == NULL) {
 355         crm_trace("%s check requested, but no votes received yet", e->name);
 356         return FALSE;
 357     }
 358 
 359     voted_size = g_hash_table_size(e->voted);
 360     num_members = pcmk__cluster_num_active_nodes();
 361 
 362     /* in the case of #voted > #members, it is better to
 363      *   wait for the timeout and give the cluster time to
 364      *   stabilize
 365      */
 366     if (voted_size >= num_members) {
 367         /* we won and everyone has voted */
 368         election_timeout_stop(e);
 369         if (voted_size > num_members) {
 370             GHashTableIter gIter;
 371             const crm_node_t *node;
 372             char *key = NULL;
 373 
 374             crm_warn("Received too many votes in %s", e->name);
 375             g_hash_table_iter_init(&gIter, crm_peer_cache);
 376             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
 377                 if (pcmk__cluster_is_node_active(node)) {
 378                     crm_warn("* expected vote: %s", node->uname);
 379                 }
 380             }
 381 
 382             g_hash_table_iter_init(&gIter, e->voted);
 383             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
 384                 crm_warn("* actual vote: %s", key);
 385             }
 386 
 387         }
 388 
 389         crm_info("%s won by local node", e->name);
 390         election_complete(e);
 391         return TRUE;
 392 
 393     } else {
 394         crm_debug("%s still waiting on %d of %d votes",
 395                   e->name, num_members - voted_size, num_members);
 396     }
 397 
 398     return FALSE;
 399 }
 400 
 401 #define LOSS_DAMPEN 2           /* in seconds */
 402 
 403 struct vote {
 404     const char *op;
 405     const char *from;
 406     const char *version;
 407     const char *election_owner;
 408     int election_id;
 409     struct timeval age;
 410 };
 411 
 412 /*!
 413  * \brief Unpack an election message
 414  *
 415  * \param[in] e        Election object (for logging only)
 416  * \param[in] message  Election message XML
 417  * \param[out] vote    Parsed fields from message
 418  *
 419  * \return TRUE if election message and election are valid, FALSE otherwise
 420  * \note The parsed struct's pointer members are valid only for the lifetime of
 421  *       the message argument.
 422  */
 423 static bool
 424 parse_election_message(const election_t *e, const xmlNode *message,
     /* [previous][next][first][last][top][bottom][index][help] */
 425                        struct vote *vote)
 426 {
 427     CRM_CHECK(message && vote, return FALSE);
 428 
 429     vote->election_id = -1;
 430     vote->age.tv_sec = -1;
 431     vote->age.tv_usec = -1;
 432 
 433     vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
 434     vote->from = crm_element_value(message, PCMK__XA_SRC);
 435     vote->version = crm_element_value(message, PCMK_XA_VERSION);
 436     vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
 437 
 438     crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
 439 
 440     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
 441         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
 442 
 443         crm_warn("Invalid %s message from %s in %s ",
 444                  (vote->op? vote->op : "election"),
 445                  (vote->from? vote->from : "unspecified node"),
 446                  (e? e->name : "election"));
 447         return FALSE;
 448     }
 449 
 450     // Op-specific validation
 451 
 452     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
 453         /* Only vote ops have uptime.
 454            Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
 455          */
 456         crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
 457                                   PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
 458         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
 459             crm_warn("Cannot count %s %s from %s because it is missing uptime",
 460                      (e? e->name : "election"), vote->op, vote->from);
 461             return FALSE;
 462         }
 463 
 464     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
 465         crm_info("Cannot process %s message from %s because %s is not a known election op",
 466                  (e? e->name : "election"), vote->from, vote->op);
 467         return FALSE;
 468     }
 469 
 470     // Election validation
 471 
 472     if (e == NULL) {
 473         crm_info("Cannot count %s from %s because no election available",
 474                  vote->op, vote->from);
 475         return FALSE;
 476     }
 477 
 478     /* If the membership cache is NULL, we REALLY shouldn't be voting --
 479      * the question is how we managed to get here.
 480      */
 481     if (crm_peer_cache == NULL) {
 482         crm_info("Cannot count %s %s from %s because no peer information available",
 483                  e->name, vote->op, vote->from);
 484         return FALSE;
 485     }
 486     return TRUE;
 487 }
 488 
 489 static void
 490 record_vote(election_t *e, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 491 {
 492     CRM_ASSERT(e && vote && vote->from && vote->op);
 493 
 494     if (e->voted == NULL) {
 495         e->voted = pcmk__strkey_table(free, free);
 496     }
 497     pcmk__insert_dup(e->voted, vote->from, vote->op);
 498 }
 499 
 500 static void
 501 send_no_vote(crm_node_t *peer, struct vote *vote)
     /* [previous][next][first][last][top][bottom][index][help] */
 502 {
 503     // @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and crm_msg_crmd
 504 
 505     xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
 506                                      CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 507 
 508     crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
 509     crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
 510 
 511     pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
 512     free_xml(novote);
 513 }
 514 
 515 /*!
 516  * \brief Process an election message (vote or no-vote) from a peer
 517  *
 518  * \param[in,out] e        Election object
 519  * \param[in]     message  Election message XML from peer
 520  * \param[in]     can_win  Whether local node is eligible to win
 521  *
 522  * \return Election state after new vote is considered
 523  * \note If the peer message is a vote, and we prefer the peer to win, this will
 524  *       send a no-vote reply to the peer.
 525  * \note The situations "we lost to this vote" from "this is a late no-vote
 526  *       after we've already lost" both return election_lost. If a caller needs
 527  *       to distinguish them, it should save the current state before calling
 528  *       this function, and then compare the result.
 529  */
 530 enum election_result
 531 election_count_vote(election_t *e, const xmlNode *message, bool can_win)
     /* [previous][next][first][last][top][bottom][index][help] */
 532 {
 533     int log_level = LOG_INFO;
 534     gboolean done = FALSE;
 535     gboolean we_lose = FALSE;
 536     const char *reason = "unknown";
 537     bool we_are_owner = FALSE;
 538     crm_node_t *our_node = NULL, *your_node = NULL;
 539     time_t tm_now = time(NULL);
 540     struct vote vote;
 541 
 542     CRM_CHECK(message != NULL, return election_error);
 543     if (parse_election_message(e, message, &vote) == FALSE) {
 544         return election_error;
 545     }
 546 
 547     your_node = pcmk__get_node(0, vote.from, NULL,
 548                                pcmk__node_search_cluster_member);
 549     our_node = pcmk__get_node(0, e->uname, NULL,
 550                               pcmk__node_search_cluster_member);
 551     we_are_owner = (our_node != NULL)
 552                    && pcmk__str_eq(our_node->uuid, vote.election_owner,
 553                                    pcmk__str_none);
 554 
 555     if (!can_win) {
 556         reason = "Not eligible";
 557         we_lose = TRUE;
 558 
 559     } else if (!pcmk__cluster_is_node_active(our_node)) {
 560         reason = "We are not part of the cluster";
 561         log_level = LOG_ERR;
 562         we_lose = TRUE;
 563 
 564     } else if (we_are_owner && (vote.election_id != e->count)) {
 565         log_level = LOG_TRACE;
 566         reason = "Superseded";
 567         done = TRUE;
 568 
 569     } else if (!pcmk__cluster_is_node_active(your_node)) {
 570         /* Possibly we cached the message in the FSA queue at a point that it wasn't */
 571         reason = "Peer is not part of our cluster";
 572         log_level = LOG_WARNING;
 573         done = TRUE;
 574 
 575     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
 576                || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
 577         /* Receiving our own broadcast vote, or a no-vote from peer, is a vote
 578          * for us to win
 579          */
 580         if (!we_are_owner) {
 581             crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
 582                      e->name, vote.election_id, vote.op, vote.from,
 583                      vote.election_owner);
 584             return election_error;
 585         }
 586         if (e->state != election_in_progress) {
 587             // Should only happen if we already lost
 588             crm_debug("Not counting %s round %d %s from %s because no election in progress",
 589                       e->name, vote.election_id, vote.op, vote.from);
 590             return e->state;
 591         }
 592         record_vote(e, &vote);
 593         reason = "Recorded";
 594         done = TRUE;
 595 
 596     } else {
 597         // A peer vote requires a comparison to determine which node is better
 598         int age_result = compare_age(vote.age);
 599         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
 600 
 601         if (version_result < 0) {
 602             reason = "Version";
 603             we_lose = TRUE;
 604 
 605         } else if (version_result > 0) {
 606             reason = "Version";
 607 
 608         } else if (age_result < 0) {
 609             reason = "Uptime";
 610             we_lose = TRUE;
 611 
 612         } else if (age_result > 0) {
 613             reason = "Uptime";
 614 
 615         } else if (strcasecmp(e->uname, vote.from) > 0) {
 616             reason = "Host name";
 617             we_lose = TRUE;
 618 
 619         } else {
 620             reason = "Host name";
 621         }
 622     }
 623 
 624     if (e->expires < tm_now) {
 625         e->election_wins = 0;
 626         e->expires = tm_now + STORM_INTERVAL;
 627 
 628     } else if (done == FALSE && we_lose == FALSE) {
 629         int peers = 1 + g_hash_table_size(crm_peer_cache);
 630 
 631         /* If every node has to vote down every other node, thats N*(N-1) total elections
 632          * Allow some leeway before _really_ complaining
 633          */
 634         e->election_wins++;
 635         if (e->election_wins > (peers * peers)) {
 636             crm_warn("%s election storm detected: %d wins in %d seconds",
 637                      e->name, e->election_wins, STORM_INTERVAL);
 638             e->election_wins = 0;
 639             e->expires = tm_now + STORM_INTERVAL;
 640             if (e->wrote_blackbox == FALSE) {
 641                 /* It's questionable whether a black box (from every node in the
 642                  * cluster) would be truly helpful in diagnosing an election
 643                  * storm. It's also highly doubtful a production environment
 644                  * would get multiple election storms from distinct causes, so
 645                  * saving one blackbox per process lifetime should be
 646                  * sufficient. Alternatives would be to save a timestamp of the
 647                  * last blackbox write instead of a boolean, and write a new one
 648                  * if some amount of time has passed; or to save a storm count,
 649                  * write a blackbox on every Nth occurrence.
 650                  */
 651                 crm_write_blackbox(0, NULL);
 652                 e->wrote_blackbox = TRUE;
 653             }
 654         }
 655     }
 656 
 657     if (done) {
 658         do_crm_log(log_level + 1,
 659                    "Processed %s round %d %s (current round %d) from %s (%s)",
 660                    e->name, vote.election_id, vote.op, e->count, vote.from,
 661                    reason);
 662         return e->state;
 663 
 664     } else if (we_lose == FALSE) {
 665         /* We track the time of the last election loss to implement an election
 666          * dampening period, reducing the likelihood of an election storm. If
 667          * this node has lost within the dampening period, don't start a new
 668          * election, even if we win against a peer's vote -- the peer we lost to
 669          * should win again.
 670          *
 671          * @TODO This has a problem case: if an election winner immediately
 672          * leaves the cluster, and a new election is immediately called, all
 673          * nodes could lose, with no new winner elected. The ideal solution
 674          * would be to tie the election structure with the peer caches, which
 675          * would allow us to clear the dampening when the previous winner
 676          * leaves (and would allow other improvements as well).
 677          */
 678         if ((e->last_election_loss == 0)
 679             || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
 680 
 681             do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
 682                        e->name, vote.election_id, vote.election_owner, vote.op,
 683                        vote.from, reason);
 684 
 685             e->last_election_loss = 0;
 686             election_timeout_stop(e);
 687 
 688             /* Start a new election by voting down this, and other, peers */
 689             e->state = election_start;
 690             return e->state;
 691         } else {
 692             char *loss_time = ctime(&e->last_election_loss);
 693 
 694             if (loss_time) {
 695                 // Show only HH:MM:SS
 696                 loss_time += 11;
 697                 loss_time[8] = '\0';
 698             }
 699             crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
 700                      e->name, vote.election_id, vote.election_owner, vote.from,
 701                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
 702         }
 703     }
 704 
 705     e->last_election_loss = tm_now;
 706 
 707     do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
 708                e->name, vote.election_id, vote.election_owner, vote.op,
 709                vote.from, reason);
 710 
 711     election_reset(e);
 712     send_no_vote(your_node, &vote);
 713     e->state = election_lost;
 714     return e->state;
 715 }
 716 
 717 /*!
 718  * \brief Reset any election dampening currently in effect
 719  *
 720  * \param[in,out] e        Election object to clear
 721  */
 722 void
 723 election_clear_dampening(election_t *e)
     /* [previous][next][first][last][top][bottom][index][help] */
 724 {
 725     e->last_election_loss = 0;
 726 }

/* [previous][next][first][last][top][bottom][index][help] */