This source file includes following definitions.
- election_complete
- election_timer_cb
- election_state
- election_init
- election_remove
- election_reset
- election_fini
- election_timeout_start
- election_timeout_stop
- election_timeout_set_period
- get_uptime
- compare_age
- election_vote
- election_check
- parse_election_message
- record_vote
- send_no_vote
- election_count_vote
- election_clear_dampening
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/time.h>
13 #include <sys/resource.h>
14
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17
18 #include <crm/common/mainloop.h>
19 #include <crm/cluster/internal.h>
20 #include <crm/cluster/election_internal.h>
21 #include <crm/crm.h>
22
23 #define STORM_INTERVAL 2
24
25 struct election_s {
26 enum election_result state;
27 guint count;
28 char *name;
29 char *uname;
30 GSourceFunc cb;
31 GHashTable *voted;
32 mainloop_timer_t *timeout;
33 int election_wins;
34 bool wrote_blackbox;
35 time_t expires;
36 time_t last_election_loss;
37 };
38
39 static void
40 election_complete(election_t *e)
41 {
42 e->state = election_won;
43 if (e->cb != NULL) {
44 e->cb(e);
45 }
46 election_reset(e);
47 }
48
49 static gboolean
50 election_timer_cb(gpointer user_data)
51 {
52 election_t *e = user_data;
53
54 crm_info("%s timed out, declaring local node as winner", e->name);
55 election_complete(e);
56 return FALSE;
57 }
58
59
60
61
62
63
64
65
66 enum election_result
67 election_state(election_t *e)
68 {
69 return (e == NULL)? election_error : e->state;
70 }
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 election_t *
89 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
90 {
91 election_t *e = NULL;
92
93 static guint count = 0;
94
95 CRM_CHECK(uname != NULL, return NULL);
96
97 e = calloc(1, sizeof(election_t));
98 if (e == NULL) {
99 crm_perror(LOG_CRIT, "Cannot create election");
100 return NULL;
101 }
102
103 e->uname = strdup(uname);
104 if (e->uname == NULL) {
105 crm_perror(LOG_CRIT, "Cannot create election");
106 free(e);
107 return NULL;
108 }
109
110 e->name = name? crm_strdup_printf("election-%s", name)
111 : crm_strdup_printf("election-%u", count++);
112 e->cb = cb;
113 e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
114 election_timer_cb, e);
115 crm_trace("Created %s", e->name);
116 return e;
117 }
118
119
120
121
122
123
124
125
126
127
128 void
129 election_remove(election_t *e, const char *uname)
130 {
131 if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
132 crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
133 g_hash_table_remove(e->voted, uname);
134 }
135 }
136
137
138
139
140
141
142 void
143 election_reset(election_t *e)
144 {
145 if (e != NULL) {
146 crm_trace("Resetting election %s", e->name);
147 mainloop_timer_stop(e->timeout);
148 if (e->voted) {
149 crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
150 g_hash_table_destroy(e->voted);
151 e->voted = NULL;
152 }
153 }
154 }
155
156
157
158
159
160
161
162
163
164 void
165 election_fini(election_t *e)
166 {
167 if (e != NULL) {
168 election_reset(e);
169 crm_trace("Destroying %s", e->name);
170 mainloop_timer_del(e->timeout);
171 free(e->uname);
172 free(e->name);
173 free(e);
174 }
175 }
176
177 static void
178 election_timeout_start(election_t *e)
179 {
180 if (e != NULL) {
181 mainloop_timer_start(e->timeout);
182 }
183 }
184
185
186
187
188
189
190 void
191 election_timeout_stop(election_t *e)
192 {
193 if (e != NULL) {
194 mainloop_timer_stop(e->timeout);
195 }
196 }
197
198
199
200
201
202
203
204 void
205 election_timeout_set_period(election_t *e, guint period)
206 {
207 if (e != NULL) {
208 mainloop_timer_set_period(e->timeout, period);
209 } else {
210 crm_err("No election defined");
211 }
212 }
213
214 static int
215 get_uptime(struct timeval *output)
216 {
217 static time_t expires = 0;
218 static struct rusage info;
219
220 time_t tm_now = time(NULL);
221
222 if (expires < tm_now) {
223 int rc = 0;
224
225 info.ru_utime.tv_sec = 0;
226 info.ru_utime.tv_usec = 0;
227 rc = getrusage(RUSAGE_SELF, &info);
228
229 output->tv_sec = 0;
230 output->tv_usec = 0;
231
232 if (rc < 0) {
233 crm_perror(LOG_ERR, "Could not calculate the current uptime");
234 expires = 0;
235 return -1;
236 }
237
238 crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
239 (long)info.ru_utime.tv_usec);
240 }
241
242 expires = tm_now + STORM_INTERVAL;
243 output->tv_sec = info.ru_utime.tv_sec;
244 output->tv_usec = info.ru_utime.tv_usec;
245
246 return 1;
247 }
248
249 static int
250 compare_age(struct timeval your_age)
251 {
252 struct timeval our_age;
253
254 get_uptime(&our_age);
255
256 if (our_age.tv_sec > your_age.tv_sec) {
257 crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
258 return 1;
259 } else if (our_age.tv_sec < your_age.tv_sec) {
260 crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
261 return -1;
262 } else if (our_age.tv_usec > your_age.tv_usec) {
263 crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
264 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
265 return 1;
266 } else if (our_age.tv_usec < your_age.tv_usec) {
267 crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
268 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
269 return -1;
270 }
271
272 return 0;
273 }
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 void
289 election_vote(election_t *e)
290 {
291 struct timeval age;
292 xmlNode *vote = NULL;
293 crm_node_t *our_node;
294
295 if (e == NULL) {
296 crm_trace("Election vote requested, but no election available");
297 return;
298 }
299
300 our_node = crm_get_peer(0, e->uname);
301 if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
302 crm_trace("Cannot vote in %s yet: local node not connected to cluster",
303 e->name);
304 return;
305 }
306
307 election_reset(e);
308 e->state = election_in_progress;
309 vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
310
311 e->count++;
312 crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
313 crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
314
315 get_uptime(&age);
316 crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age);
317
318 send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
319 free_xml(vote);
320
321 crm_debug("Started %s round %d", e->name, e->count);
322 election_timeout_start(e);
323 return;
324 }
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341 bool
342 election_check(election_t *e)
343 {
344 int voted_size = 0;
345 int num_members = 0;
346
347 if (e == NULL) {
348 crm_trace("Election check requested, but no election available");
349 return FALSE;
350 }
351 if (e->voted == NULL) {
352 crm_trace("%s check requested, but no votes received yet", e->name);
353 return FALSE;
354 }
355
356 voted_size = g_hash_table_size(e->voted);
357 num_members = crm_active_peers();
358
359
360
361
362
363 if (voted_size >= num_members) {
364
365 election_timeout_stop(e);
366 if (voted_size > num_members) {
367 GHashTableIter gIter;
368 const crm_node_t *node;
369 char *key = NULL;
370
371 crm_warn("Received too many votes in %s", e->name);
372 g_hash_table_iter_init(&gIter, crm_peer_cache);
373 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
374 if (crm_is_peer_active(node)) {
375 crm_warn("* expected vote: %s", node->uname);
376 }
377 }
378
379 g_hash_table_iter_init(&gIter, e->voted);
380 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
381 crm_warn("* actual vote: %s", key);
382 }
383
384 }
385
386 crm_info("%s won by local node", e->name);
387 election_complete(e);
388 return TRUE;
389
390 } else {
391 crm_debug("%s still waiting on %d of %d votes",
392 e->name, num_members - voted_size, num_members);
393 }
394
395 return FALSE;
396 }
397
398 #define LOSS_DAMPEN 2
399
400 struct vote {
401 const char *op;
402 const char *from;
403 const char *version;
404 const char *election_owner;
405 int election_id;
406 struct timeval age;
407 };
408
409
410
411
412
413
414
415
416
417
418
419
420 static bool
421 parse_election_message(election_t *e, xmlNode *message, struct vote *vote)
422 {
423 CRM_CHECK(message && vote, return FALSE);
424
425 vote->election_id = -1;
426 vote->age.tv_sec = -1;
427 vote->age.tv_usec = -1;
428
429 vote->op = crm_element_value(message, F_CRM_TASK);
430 vote->from = crm_element_value(message, F_CRM_HOST_FROM);
431 vote->version = crm_element_value(message, F_CRM_VERSION);
432 vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER);
433
434 crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id));
435
436 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
437 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
438
439 crm_warn("Invalid %s message from %s in %s ",
440 (vote->op? vote->op : "election"),
441 (vote->from? vote->from : "unspecified node"),
442 (e? e->name : "election"));
443 return FALSE;
444 }
445
446
447
448 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
449
450 crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S,
451 F_CRM_ELECTION_AGE_US, &(vote->age));
452 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
453 crm_warn("Cannot count %s %s from %s because it is missing uptime",
454 (e? e->name : "election"), vote->op, vote->from);
455 return FALSE;
456 }
457
458 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
459 crm_info("Cannot process %s message from %s because %s is not a known election op",
460 (e? e->name : "election"), vote->from, vote->op);
461 return FALSE;
462 }
463
464
465
466 if (e == NULL) {
467 crm_info("Cannot count %s from %s because no election available",
468 vote->op, vote->from);
469 return FALSE;
470 }
471
472
473
474
475 if (crm_peer_cache == NULL) {
476 crm_info("Cannot count %s %s from %s because no peer information available",
477 e->name, vote->op, vote->from);
478 return FALSE;
479 }
480 return TRUE;
481 }
482
483 static void
484 record_vote(election_t *e, struct vote *vote)
485 {
486 char *voter_copy = NULL;
487 char *vote_copy = NULL;
488
489 CRM_ASSERT(e && vote && vote->from && vote->op);
490 if (e->voted == NULL) {
491 e->voted = pcmk__strkey_table(free, free);
492 }
493
494 voter_copy = strdup(vote->from);
495 vote_copy = strdup(vote->op);
496 CRM_ASSERT(voter_copy && vote_copy);
497
498 g_hash_table_replace(e->voted, voter_copy, vote_copy);
499 }
500
501 static void
502 send_no_vote(crm_node_t *peer, struct vote *vote)
503 {
504
505
506 xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
507 CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
508
509 crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner);
510 crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id);
511
512 send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
513 free_xml(novote);
514 }
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 enum election_result
532 election_count_vote(election_t *e, xmlNode *message, bool can_win)
533 {
534 int log_level = LOG_INFO;
535 gboolean done = FALSE;
536 gboolean we_lose = FALSE;
537 const char *reason = "unknown";
538 bool we_are_owner = FALSE;
539 crm_node_t *our_node = NULL, *your_node = NULL;
540 time_t tm_now = time(NULL);
541 struct vote vote;
542
543 CRM_CHECK(message != NULL, return election_error);
544 if (parse_election_message(e, message, &vote) == FALSE) {
545 return election_error;
546 }
547
548 your_node = crm_get_peer(0, vote.from);
549 our_node = crm_get_peer(0, e->uname);
550 we_are_owner = (our_node != NULL)
551 && pcmk__str_eq(our_node->uuid, vote.election_owner,
552 pcmk__str_none);
553
554 if (!can_win) {
555 reason = "Not eligible";
556 we_lose = TRUE;
557
558 } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
559 reason = "We are not part of the cluster";
560 log_level = LOG_ERR;
561 we_lose = TRUE;
562
563 } else if (we_are_owner && (vote.election_id != e->count)) {
564 log_level = LOG_TRACE;
565 reason = "Superseded";
566 done = TRUE;
567
568 } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
569
570 reason = "Peer is not part of our cluster";
571 log_level = LOG_WARNING;
572 done = TRUE;
573
574 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
575 || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
576
577
578
579 if (!we_are_owner) {
580 crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
581 e->name, vote.election_id, vote.op, vote.from,
582 vote.election_owner);
583 return election_error;
584 }
585 if (e->state != election_in_progress) {
586
587 crm_debug("Not counting %s round %d %s from %s because no election in progress",
588 e->name, vote.election_id, vote.op, vote.from);
589 return e->state;
590 }
591 record_vote(e, &vote);
592 reason = "Recorded";
593 done = TRUE;
594
595 } else {
596
597 int age_result = compare_age(vote.age);
598 int version_result = compare_version(vote.version, CRM_FEATURE_SET);
599
600 if (version_result < 0) {
601 reason = "Version";
602 we_lose = TRUE;
603
604 } else if (version_result > 0) {
605 reason = "Version";
606
607 } else if (age_result < 0) {
608 reason = "Uptime";
609 we_lose = TRUE;
610
611 } else if (age_result > 0) {
612 reason = "Uptime";
613
614 } else if (strcasecmp(e->uname, vote.from) > 0) {
615 reason = "Host name";
616 we_lose = TRUE;
617
618 } else {
619 reason = "Host name";
620 }
621 }
622
623 if (e->expires < tm_now) {
624 e->election_wins = 0;
625 e->expires = tm_now + STORM_INTERVAL;
626
627 } else if (done == FALSE && we_lose == FALSE) {
628 int peers = 1 + g_hash_table_size(crm_peer_cache);
629
630
631
632
633 e->election_wins++;
634 if (e->election_wins > (peers * peers)) {
635 crm_warn("%s election storm detected: %d wins in %d seconds",
636 e->name, e->election_wins, STORM_INTERVAL);
637 e->election_wins = 0;
638 e->expires = tm_now + STORM_INTERVAL;
639 if (e->wrote_blackbox == FALSE) {
640
641
642
643
644
645
646
647
648
649
650 crm_write_blackbox(0, NULL);
651 e->wrote_blackbox = TRUE;
652 }
653 }
654 }
655
656 if (done) {
657 do_crm_log(log_level + 1,
658 "Processed %s round %d %s (current round %d) from %s (%s)",
659 e->name, vote.election_id, vote.op, e->count, vote.from,
660 reason);
661 return e->state;
662
663 } else if (we_lose == FALSE) {
664
665
666
667
668
669
670
671
672
673
674
675
676
677 if ((e->last_election_loss == 0)
678 || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
679
680 do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
681 e->name, vote.election_id, vote.election_owner, vote.op,
682 vote.from, reason);
683
684 e->last_election_loss = 0;
685 election_timeout_stop(e);
686
687
688 e->state = election_start;
689 return e->state;
690 } else {
691 char *loss_time = ctime(&e->last_election_loss);
692
693 if (loss_time) {
694
695 loss_time += 11;
696 loss_time[8] = '\0';
697 }
698 crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
699 e->name, vote.election_id, vote.election_owner, vote.from,
700 LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
701 }
702 }
703
704 e->last_election_loss = tm_now;
705
706 do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
707 e->name, vote.election_id, vote.election_owner, vote.op,
708 vote.from, reason);
709
710 election_reset(e);
711 send_no_vote(your_node, &vote);
712 e->state = election_lost;
713 return e->state;
714 }
715
716
717
718
719
720
721 void
722 election_clear_dampening(election_t *e)
723 {
724 e->last_election_loss = 0;
725 }