This source file includes following definitions.
- election_complete
 
- election_timer_cb
 
- election_state
 
- election_init
 
- election_remove
 
- election_reset
 
- election_fini
 
- election_timeout_start
 
- election_timeout_stop
 
- election_timeout_set_period
 
- get_uptime
 
- compare_age
 
- election_vote
 
- election_check
 
- parse_election_message
 
- record_vote
 
- send_no_vote
 
- election_count_vote
 
- election_clear_dampening
 
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/time.h>
  13 #include <sys/resource.h>
  14 
  15 #include <crm/msg_xml.h>
  16 #include <crm/common/xml.h>
  17 
  18 #include <crm/common/mainloop.h>
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include <crm/crm.h>
  22 
  23 #define STORM_INTERVAL   2      
  24 
  25 struct election_s {
  26     enum election_result state;
  27     guint count;        
  28     char *name;         
  29     char *uname;        
  30     GSourceFunc cb;     
  31     GHashTable *voted;  
  32     mainloop_timer_t *timeout; 
  33     int election_wins;         
  34     bool wrote_blackbox;       
  35     time_t expires;            
  36     time_t last_election_loss; 
  37 };
  38 
  39 static void
  40 election_complete(election_t *e)
     
  41 {
  42     e->state = election_won;
  43     if (e->cb != NULL) {
  44         e->cb(e);
  45     }
  46     election_reset(e);
  47 }
  48 
  49 static gboolean
  50 election_timer_cb(gpointer user_data)
     
  51 {
  52     election_t *e = user_data;
  53 
  54     crm_info("%s timed out, declaring local node as winner", e->name);
  55     election_complete(e);
  56     return FALSE;
  57 }
  58 
  59 
  60 
  61 
  62 
  63 
  64 
  65 
  66 enum election_result
  67 election_state(election_t *e)
     
  68 {
  69     return (e == NULL)? election_error : e->state;
  70 }
  71 
  72 
  73 
  74 
  75 
  76 
  77 
  78 
  79 
  80 
  81 
  82 
  83 
  84 
  85 
  86 
  87 
  88 election_t *
  89 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
     
  90 {
  91     election_t *e = NULL;
  92 
  93     static guint count = 0;
  94 
  95     CRM_CHECK(uname != NULL, return NULL);
  96 
  97     e = calloc(1, sizeof(election_t));
  98     if (e == NULL) {
  99         crm_perror(LOG_CRIT, "Cannot create election");
 100         return NULL;
 101     }
 102 
 103     e->uname = strdup(uname);
 104     if (e->uname == NULL) {
 105         crm_perror(LOG_CRIT, "Cannot create election");
 106         free(e);
 107         return NULL;
 108     }
 109 
 110     e->name = name? crm_strdup_printf("election-%s", name)
 111                   : crm_strdup_printf("election-%u", count++);
 112     e->cb = cb;
 113     e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
 114                                     election_timer_cb, e);
 115     crm_trace("Created %s", e->name);
 116     return e;
 117 }
 118 
 119 
 120 
 121 
 122 
 123 
 124 
 125 
 126 
 127 
 128 void
 129 election_remove(election_t *e, const char *uname)
     
 130 {
 131     if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
 132         crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
 133         g_hash_table_remove(e->voted, uname);
 134     }
 135 }
 136 
 137 
 138 
 139 
 140 
 141 
 142 void
 143 election_reset(election_t *e)
     
 144 {
 145     if (e != NULL) {
 146         crm_trace("Resetting election %s", e->name);
 147         mainloop_timer_stop(e->timeout);
 148         if (e->voted) {
 149             crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
 150             g_hash_table_destroy(e->voted);
 151             e->voted = NULL;
 152         }
 153     }
 154 }
 155 
 156 
 157 
 158 
 159 
 160 
 161 
 162 
 163 
 164 void
 165 election_fini(election_t *e)
     
 166 {
 167     if (e != NULL) {
 168         election_reset(e);
 169         crm_trace("Destroying %s", e->name);
 170         mainloop_timer_del(e->timeout);
 171         free(e->uname);
 172         free(e->name);
 173         free(e);
 174     }
 175 }
 176 
 177 static void
 178 election_timeout_start(election_t *e)
     
 179 {
 180     if (e != NULL) {
 181         mainloop_timer_start(e->timeout);
 182     }
 183 }
 184 
 185 
 186 
 187 
 188 
 189 
 190 void
 191 election_timeout_stop(election_t *e)
     
 192 {
 193     if (e != NULL) {
 194         mainloop_timer_stop(e->timeout);
 195     }
 196 }
 197 
 198 
 199 
 200 
 201 
 202 
 203 
 204 void
 205 election_timeout_set_period(election_t *e, guint period)
     
 206 {
 207     if (e != NULL) {
 208         mainloop_timer_set_period(e->timeout, period);
 209     } else {
 210         crm_err("No election defined");
 211     }
 212 }
 213 
 214 static int
 215 get_uptime(struct timeval *output)
     
 216 {
 217     static time_t expires = 0;
 218     static struct rusage info;
 219 
 220     time_t tm_now = time(NULL);
 221 
 222     if (expires < tm_now) {
 223         int rc = 0;
 224 
 225         info.ru_utime.tv_sec = 0;
 226         info.ru_utime.tv_usec = 0;
 227         rc = getrusage(RUSAGE_SELF, &info);
 228 
 229         output->tv_sec = 0;
 230         output->tv_usec = 0;
 231 
 232         if (rc < 0) {
 233             crm_perror(LOG_ERR, "Could not calculate the current uptime");
 234             expires = 0;
 235             return -1;
 236         }
 237 
 238         crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
 239                   (long)info.ru_utime.tv_usec);
 240     }
 241 
 242     expires = tm_now + STORM_INTERVAL;  
 243     output->tv_sec = info.ru_utime.tv_sec;
 244     output->tv_usec = info.ru_utime.tv_usec;
 245 
 246     return 1;
 247 }
 248 
 249 static int
 250 compare_age(struct timeval your_age)
     
 251 {
 252     struct timeval our_age;
 253 
 254     get_uptime(&our_age); 
 255 
 256     if (our_age.tv_sec > your_age.tv_sec) {
 257         crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 258         return 1;
 259     } else if (our_age.tv_sec < your_age.tv_sec) {
 260         crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
 261         return -1;
 262     } else if (our_age.tv_usec > your_age.tv_usec) {
 263         crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
 264                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 265         return 1;
 266     } else if (our_age.tv_usec < your_age.tv_usec) {
 267         crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
 268                   (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
 269         return -1;
 270     }
 271 
 272     return 0;
 273 }
 274 
 275 
 276 
 277 
 278 
 279 
 280 
 281 
 282 
 283 
 284 
 285 
 286 
 287 
 288 void
 289 election_vote(election_t *e)
     
 290 {
 291     struct timeval age;
 292     xmlNode *vote = NULL;
 293     crm_node_t *our_node;
 294 
 295     if (e == NULL) {
 296         crm_trace("Election vote requested, but no election available");
 297         return;
 298     }
 299 
 300     our_node = crm_get_peer(0, e->uname);
 301     if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
 302         crm_trace("Cannot vote in %s yet: local node not connected to cluster",
 303                   e->name);
 304         return;
 305     }
 306 
 307     election_reset(e);
 308     e->state = election_in_progress;
 309     vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 310 
 311     e->count++;
 312     crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
 313     crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
 314 
 315     get_uptime(&age);
 316     crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age);
 317 
 318     send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
 319     free_xml(vote);
 320 
 321     crm_debug("Started %s round %d", e->name, e->count);
 322     election_timeout_start(e);
 323     return;
 324 }
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336 
 337 
 338 
 339 
 340 
 341 bool
 342 election_check(election_t *e)
     
 343 {
 344     int voted_size = 0;
 345     int num_members = 0;
 346 
 347     if (e == NULL) {
 348         crm_trace("Election check requested, but no election available");
 349         return FALSE;
 350     }
 351     if (e->voted == NULL) {
 352         crm_trace("%s check requested, but no votes received yet", e->name);
 353         return FALSE;
 354     }
 355 
 356     voted_size = g_hash_table_size(e->voted);
 357     num_members = crm_active_peers();
 358 
 359     
 360 
 361 
 362 
 363     if (voted_size >= num_members) {
 364         
 365         election_timeout_stop(e);
 366         if (voted_size > num_members) {
 367             GHashTableIter gIter;
 368             const crm_node_t *node;
 369             char *key = NULL;
 370 
 371             crm_warn("Received too many votes in %s", e->name);
 372             g_hash_table_iter_init(&gIter, crm_peer_cache);
 373             while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
 374                 if (crm_is_peer_active(node)) {
 375                     crm_warn("* expected vote: %s", node->uname);
 376                 }
 377             }
 378 
 379             g_hash_table_iter_init(&gIter, e->voted);
 380             while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
 381                 crm_warn("* actual vote: %s", key);
 382             }
 383 
 384         }
 385 
 386         crm_info("%s won by local node", e->name);
 387         election_complete(e);
 388         return TRUE;
 389 
 390     } else {
 391         crm_debug("%s still waiting on %d of %d votes",
 392                   e->name, num_members - voted_size, num_members);
 393     }
 394 
 395     return FALSE;
 396 }
 397 
 398 #define LOSS_DAMPEN 2           
 399 
 400 struct vote {
 401     const char *op;
 402     const char *from;
 403     const char *version;
 404     const char *election_owner;
 405     int election_id;
 406     struct timeval age;
 407 };
 408 
 409 
 410 
 411 
 412 
 413 
 414 
 415 
 416 
 417 
 418 
 419 
 420 static bool
 421 parse_election_message(election_t *e, xmlNode *message, struct vote *vote)
     
 422 {
 423     CRM_CHECK(message && vote, return FALSE);
 424 
 425     vote->election_id = -1;
 426     vote->age.tv_sec = -1;
 427     vote->age.tv_usec = -1;
 428 
 429     vote->op = crm_element_value(message, F_CRM_TASK);
 430     vote->from = crm_element_value(message, F_CRM_HOST_FROM);
 431     vote->version = crm_element_value(message, F_CRM_VERSION);
 432     vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER);
 433 
 434     crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id));
 435 
 436     if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
 437         || (vote->election_owner == NULL) || (vote->election_id < 0)) {
 438 
 439         crm_warn("Invalid %s message from %s in %s ",
 440                  (vote->op? vote->op : "election"),
 441                  (vote->from? vote->from : "unspecified node"),
 442                  (e? e->name : "election"));
 443         return FALSE;
 444     }
 445 
 446     
 447 
 448     if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
 449         
 450         crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S,
 451                                   F_CRM_ELECTION_AGE_US, &(vote->age));
 452         if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
 453             crm_warn("Cannot count %s %s from %s because it is missing uptime",
 454                      (e? e->name : "election"), vote->op, vote->from);
 455             return FALSE;
 456         }
 457 
 458     } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
 459         crm_info("Cannot process %s message from %s because %s is not a known election op",
 460                  (e? e->name : "election"), vote->from, vote->op);
 461         return FALSE;
 462     }
 463 
 464     
 465 
 466     if (e == NULL) {
 467         crm_info("Cannot count %s from %s because no election available",
 468                  vote->op, vote->from);
 469         return FALSE;
 470     }
 471 
 472     
 473 
 474 
 475     if (crm_peer_cache == NULL) {
 476         crm_info("Cannot count %s %s from %s because no peer information available",
 477                  e->name, vote->op, vote->from);
 478         return FALSE;
 479     }
 480     return TRUE;
 481 }
 482 
 483 static void
 484 record_vote(election_t *e, struct vote *vote)
     
 485 {
 486     char *voter_copy = NULL;
 487     char *vote_copy = NULL;
 488 
 489     CRM_ASSERT(e && vote && vote->from && vote->op);
 490     if (e->voted == NULL) {
 491         e->voted = pcmk__strkey_table(free, free);
 492     }
 493 
 494     voter_copy = strdup(vote->from);
 495     vote_copy = strdup(vote->op);
 496     CRM_ASSERT(voter_copy && vote_copy);
 497 
 498     g_hash_table_replace(e->voted, voter_copy, vote_copy);
 499 }
 500 
 501 static void
 502 send_no_vote(crm_node_t *peer, struct vote *vote)
     
 503 {
 504     
 505 
 506     xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
 507                                      CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 508 
 509     crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner);
 510     crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id);
 511 
 512     send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
 513     free_xml(novote);
 514 }
 515 
 516 
 517 
 518 
 519 
 520 
 521 
 522 
 523 
 524 
 525 
 526 
 527 
 528 
 529 
 530 
 531 enum election_result
 532 election_count_vote(election_t *e, xmlNode *message, bool can_win)
     
 533 {
 534     int log_level = LOG_INFO;
 535     gboolean done = FALSE;
 536     gboolean we_lose = FALSE;
 537     const char *reason = "unknown";
 538     bool we_are_owner = FALSE;
 539     crm_node_t *our_node = NULL, *your_node = NULL;
 540     time_t tm_now = time(NULL);
 541     struct vote vote;
 542 
 543     CRM_CHECK(message != NULL, return election_error);
 544     if (parse_election_message(e, message, &vote) == FALSE) {
 545         return election_error;
 546     }
 547 
 548     your_node = crm_get_peer(0, vote.from);
 549     our_node = crm_get_peer(0, e->uname);
 550     we_are_owner = (our_node != NULL)
 551                    && pcmk__str_eq(our_node->uuid, vote.election_owner,
 552                                    pcmk__str_none);
 553 
 554     if (!can_win) {
 555         reason = "Not eligible";
 556         we_lose = TRUE;
 557 
 558     } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
 559         reason = "We are not part of the cluster";
 560         log_level = LOG_ERR;
 561         we_lose = TRUE;
 562 
 563     } else if (we_are_owner && (vote.election_id != e->count)) {
 564         log_level = LOG_TRACE;
 565         reason = "Superseded";
 566         done = TRUE;
 567 
 568     } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
 569         
 570         reason = "Peer is not part of our cluster";
 571         log_level = LOG_WARNING;
 572         done = TRUE;
 573 
 574     } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
 575                || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
 576         
 577 
 578 
 579         if (!we_are_owner) {
 580             crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
 581                      e->name, vote.election_id, vote.op, vote.from,
 582                      vote.election_owner);
 583             return election_error;
 584         }
 585         if (e->state != election_in_progress) {
 586             
 587             crm_debug("Not counting %s round %d %s from %s because no election in progress",
 588                       e->name, vote.election_id, vote.op, vote.from);
 589             return e->state;
 590         }
 591         record_vote(e, &vote);
 592         reason = "Recorded";
 593         done = TRUE;
 594 
 595     } else {
 596         
 597         int age_result = compare_age(vote.age);
 598         int version_result = compare_version(vote.version, CRM_FEATURE_SET);
 599 
 600         if (version_result < 0) {
 601             reason = "Version";
 602             we_lose = TRUE;
 603 
 604         } else if (version_result > 0) {
 605             reason = "Version";
 606 
 607         } else if (age_result < 0) {
 608             reason = "Uptime";
 609             we_lose = TRUE;
 610 
 611         } else if (age_result > 0) {
 612             reason = "Uptime";
 613 
 614         } else if (strcasecmp(e->uname, vote.from) > 0) {
 615             reason = "Host name";
 616             we_lose = TRUE;
 617 
 618         } else {
 619             reason = "Host name";
 620         }
 621     }
 622 
 623     if (e->expires < tm_now) {
 624         e->election_wins = 0;
 625         e->expires = tm_now + STORM_INTERVAL;
 626 
 627     } else if (done == FALSE && we_lose == FALSE) {
 628         int peers = 1 + g_hash_table_size(crm_peer_cache);
 629 
 630         
 631 
 632 
 633         e->election_wins++;
 634         if (e->election_wins > (peers * peers)) {
 635             crm_warn("%s election storm detected: %d wins in %d seconds",
 636                      e->name, e->election_wins, STORM_INTERVAL);
 637             e->election_wins = 0;
 638             e->expires = tm_now + STORM_INTERVAL;
 639             if (e->wrote_blackbox == FALSE) {
 640                 
 641 
 642 
 643 
 644 
 645 
 646 
 647 
 648 
 649 
 650                 crm_write_blackbox(0, NULL);
 651                 e->wrote_blackbox = TRUE;
 652             }
 653         }
 654     }
 655 
 656     if (done) {
 657         do_crm_log(log_level + 1,
 658                    "Processed %s round %d %s (current round %d) from %s (%s)",
 659                    e->name, vote.election_id, vote.op, e->count, vote.from,
 660                    reason);
 661         return e->state;
 662 
 663     } else if (we_lose == FALSE) {
 664         
 665 
 666 
 667 
 668 
 669 
 670 
 671 
 672 
 673 
 674 
 675 
 676 
 677         if ((e->last_election_loss == 0)
 678             || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
 679 
 680             do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
 681                        e->name, vote.election_id, vote.election_owner, vote.op,
 682                        vote.from, reason);
 683 
 684             e->last_election_loss = 0;
 685             election_timeout_stop(e);
 686 
 687             
 688             e->state = election_start;
 689             return e->state;
 690         } else {
 691             char *loss_time = ctime(&e->last_election_loss);
 692 
 693             if (loss_time) {
 694                 
 695                 loss_time += 11;
 696                 loss_time[8] = '\0';
 697             }
 698             crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
 699                      e->name, vote.election_id, vote.election_owner, vote.from,
 700                      LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
 701         }
 702     }
 703 
 704     e->last_election_loss = tm_now;
 705 
 706     do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
 707                e->name, vote.election_id, vote.election_owner, vote.op,
 708                vote.from, reason);
 709 
 710     election_reset(e);
 711     send_no_vote(your_node, &vote);
 712     e->state = election_lost;
 713     return e->state;
 714 }
 715 
 716 
 717 
 718 
 719 
 720 
 721 void
 722 election_clear_dampening(election_t *e)
     
 723 {
 724     e->last_election_loss = 0;
 725 }