This source file includes following definitions.
- election_complete
- election_timer_cb
- election_state
- election_init
- election_remove
- election_reset
- election_fini
- election_timeout_start
- election_timeout_stop
- election_timeout_set_period
- crm_uptime
- crm_compare_age
- election_vote
- election_check
- parse_election_message
- record_vote
- send_no_vote
- election_count_vote
- election_clear_dampening
1
2
3
4
5
6
7
8 #include <crm_internal.h>
9
10 #include <sys/time.h>
11 #include <sys/resource.h>
12
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml.h>
15
16 #include <crm/common/mainloop.h>
17 #include <crm/cluster/internal.h>
18 #include <crm/cluster/election.h>
19 #include <crm/crm.h>
20
21 #define STORM_INTERVAL 2
22
23 struct election_s {
24 enum election_result state;
25 guint count;
26 char *name;
27 char *uname;
28 GSourceFunc cb;
29 GHashTable *voted;
30 mainloop_timer_t *timeout;
31 int election_wins;
32 bool wrote_blackbox;
33 time_t expires;
34 time_t last_election_loss;
35 };
36
37 static void election_complete(election_t *e)
38 {
39 e->state = election_won;
40
41 if(e->cb) {
42 e->cb(e);
43 }
44
45 election_reset(e);
46 }
47
48 static gboolean election_timer_cb(gpointer user_data)
49 {
50 election_t *e = user_data;
51
52 crm_info("%s timed out, declaring local node as winner", e->name);
53 election_complete(e);
54 return FALSE;
55 }
56
57 enum election_result
58 election_state(election_t *e)
59 {
60 if(e) {
61 return e->state;
62 }
63 return election_error;
64 }
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 election_t *
83 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
84 {
85 election_t *e = NULL;
86
87 static guint count = 0;
88
89 CRM_CHECK(uname != NULL, return NULL);
90
91 e = calloc(1, sizeof(election_t));
92 if (e == NULL) {
93 crm_perror(LOG_CRIT, "Cannot create election");
94 return NULL;
95 }
96
97 e->uname = strdup(uname);
98 if (e->uname == NULL) {
99 crm_perror(LOG_CRIT, "Cannot create election");
100 free(e);
101 return NULL;
102 }
103
104 e->name = name? crm_strdup_printf("election-%s", name)
105 : crm_strdup_printf("election-%u", count++);
106 e->cb = cb;
107 e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
108 election_timer_cb, e);
109 crm_trace("Created %s", e->name);
110 return e;
111 }
112
113
114
115
116
117
118
119
120
121
122 void
123 election_remove(election_t *e, const char *uname)
124 {
125 if(e && uname && e->voted) {
126 crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
127 g_hash_table_remove(e->voted, uname);
128 }
129 }
130
131
132
133
134
135
136 void
137 election_reset(election_t *e)
138 {
139 if (e != NULL) {
140 crm_trace("Resetting election %s", e->name);
141 mainloop_timer_stop(e->timeout);
142 if (e->voted) {
143 crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
144 g_hash_table_destroy(e->voted);
145 e->voted = NULL;
146 }
147 }
148 }
149
150
151
152
153
154
155
156
157
158 void
159 election_fini(election_t *e)
160 {
161 if(e) {
162 election_reset(e);
163 crm_trace("Destroying %s", e->name);
164 mainloop_timer_del(e->timeout);
165 free(e->uname);
166 free(e->name);
167 free(e);
168 }
169 }
170
171 static void
172 election_timeout_start(election_t *e)
173 {
174 if(e) {
175 mainloop_timer_start(e->timeout);
176 }
177 }
178
179
180
181
182
183
184 void
185 election_timeout_stop(election_t *e)
186 {
187 if(e) {
188 mainloop_timer_stop(e->timeout);
189 }
190 }
191
192
193
194
195
196
197
198 void
199 election_timeout_set_period(election_t *e, guint period)
200 {
201 if(e) {
202 mainloop_timer_set_period(e->timeout, period);
203 } else {
204 crm_err("No election defined");
205 }
206 }
207
208 static int
209 crm_uptime(struct timeval *output)
210 {
211 static time_t expires = 0;
212 static struct rusage info;
213
214 time_t tm_now = time(NULL);
215
216 if (expires < tm_now) {
217 int rc = 0;
218
219 info.ru_utime.tv_sec = 0;
220 info.ru_utime.tv_usec = 0;
221 rc = getrusage(RUSAGE_SELF, &info);
222
223 output->tv_sec = 0;
224 output->tv_usec = 0;
225
226 if (rc < 0) {
227 crm_perror(LOG_ERR, "Could not calculate the current uptime");
228 expires = 0;
229 return -1;
230 }
231
232 crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
233 (long)info.ru_utime.tv_usec);
234 }
235
236 expires = tm_now + STORM_INTERVAL;
237 output->tv_sec = info.ru_utime.tv_sec;
238 output->tv_usec = info.ru_utime.tv_usec;
239
240 return 1;
241 }
242
243 static int
244 crm_compare_age(struct timeval your_age)
245 {
246 struct timeval our_age;
247
248 crm_uptime(&our_age);
249
250 if (our_age.tv_sec > your_age.tv_sec) {
251 crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
252 return 1;
253 } else if (our_age.tv_sec < your_age.tv_sec) {
254 crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
255 return -1;
256 } else if (our_age.tv_usec > your_age.tv_usec) {
257 crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
258 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
259 return 1;
260 } else if (our_age.tv_usec < your_age.tv_usec) {
261 crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
262 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
263 return -1;
264 }
265
266 return 0;
267 }
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282 void
283 election_vote(election_t *e)
284 {
285 struct timeval age;
286 xmlNode *vote = NULL;
287 crm_node_t *our_node;
288
289 if (e == NULL) {
290 crm_trace("Election vote requested, but no election available");
291 return;
292 }
293
294 our_node = crm_get_peer(0, e->uname);
295 if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
296 crm_trace("Cannot vote in %s yet: local node not connected to cluster",
297 e->name);
298 return;
299 }
300
301 election_reset(e);
302 e->state = election_in_progress;
303 vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
304
305 e->count++;
306 crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
307 crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
308
309 crm_uptime(&age);
310 crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age);
311
312 send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
313 free_xml(vote);
314
315 crm_debug("Started %s round %d", e->name, e->count);
316 election_timeout_start(e);
317 return;
318 }
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335 bool
336 election_check(election_t *e)
337 {
338 int voted_size = 0;
339 int num_members = 0;
340
341 if(e == NULL) {
342 crm_trace("Election check requested, but no election available");
343 return FALSE;
344 }
345 if (e->voted == NULL) {
346 crm_trace("%s check requested, but no votes received yet", e->name);
347 return FALSE;
348 }
349
350 voted_size = g_hash_table_size(e->voted);
351 num_members = crm_active_peers();
352
353
354
355
356
357 if (voted_size >= num_members) {
358
359 election_timeout_stop(e);
360 if (voted_size > num_members) {
361 GHashTableIter gIter;
362 const crm_node_t *node;
363 char *key = NULL;
364
365 crm_warn("Received too many votes in %s", e->name);
366 g_hash_table_iter_init(&gIter, crm_peer_cache);
367 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
368 if (crm_is_peer_active(node)) {
369 crm_warn("* expected vote: %s", node->uname);
370 }
371 }
372
373 g_hash_table_iter_init(&gIter, e->voted);
374 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
375 crm_warn("* actual vote: %s", key);
376 }
377
378 }
379
380 crm_info("%s won by local node", e->name);
381 election_complete(e);
382 return TRUE;
383
384 } else {
385 crm_debug("%s still waiting on %d of %d votes",
386 e->name, num_members - voted_size, num_members);
387 }
388
389 return FALSE;
390 }
391
392 #define LOSS_DAMPEN 2
393
394 struct vote {
395 const char *op;
396 const char *from;
397 const char *version;
398 const char *election_owner;
399 int election_id;
400 struct timeval age;
401 };
402
403
404
405
406
407
408
409
410
411
412
413
414 static bool
415 parse_election_message(election_t *e, xmlNode *message, struct vote *vote)
416 {
417 CRM_CHECK(message && vote, return FALSE);
418
419 vote->election_id = -1;
420 vote->age.tv_sec = -1;
421 vote->age.tv_usec = -1;
422
423 vote->op = crm_element_value(message, F_CRM_TASK);
424 vote->from = crm_element_value(message, F_CRM_HOST_FROM);
425 vote->version = crm_element_value(message, F_CRM_VERSION);
426 vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER);
427
428 crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id));
429
430 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
431 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
432
433 crm_warn("Invalid %s message from %s in %s ",
434 (vote->op? vote->op : "election"),
435 (vote->from? vote->from : "unspecified node"),
436 (e? e->name : "election"));
437 return FALSE;
438 }
439
440
441
442 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
443
444 crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S,
445 F_CRM_ELECTION_AGE_US, &(vote->age));
446 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
447 crm_warn("Cannot count %s %s from %s because it is missing uptime",
448 (e? e->name : "election"), vote->op, vote->from);
449 return FALSE;
450 }
451
452 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
453 crm_info("Cannot process %s message from %s because %s is not a known election op",
454 (e? e->name : "election"), vote->from, vote->op);
455 return FALSE;
456 }
457
458
459
460 if (e == NULL) {
461 crm_info("Cannot count %s from %s because no election available",
462 vote->op, vote->from);
463 return FALSE;
464 }
465
466
467
468
469 if (crm_peer_cache == NULL) {
470 crm_info("Cannot count %s %s from %s because no peer information available",
471 e->name, vote->op, vote->from);
472 return FALSE;
473 }
474 return TRUE;
475 }
476
477 static void
478 record_vote(election_t *e, struct vote *vote)
479 {
480 char *voter_copy = NULL;
481 char *vote_copy = NULL;
482
483 CRM_ASSERT(e && vote && vote->from && vote->op);
484 if (e->voted == NULL) {
485 e->voted = crm_str_table_new();
486 }
487
488 voter_copy = strdup(vote->from);
489 vote_copy = strdup(vote->op);
490 CRM_ASSERT(voter_copy && vote_copy);
491
492 g_hash_table_replace(e->voted, voter_copy, vote_copy);
493 }
494
495 static void
496 send_no_vote(crm_node_t *peer, struct vote *vote)
497 {
498
499
500 xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
501 CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
502
503 crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner);
504 crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id);
505
506 send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
507 free_xml(novote);
508 }
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525 enum election_result
526 election_count_vote(election_t *e, xmlNode *message, bool can_win)
527 {
528 int log_level = LOG_INFO;
529 gboolean done = FALSE;
530 gboolean we_lose = FALSE;
531 const char *reason = "unknown";
532 bool we_are_owner = FALSE;
533 crm_node_t *our_node = NULL, *your_node = NULL;
534 time_t tm_now = time(NULL);
535 struct vote vote;
536
537 CRM_CHECK(message != NULL, return election_error);
538 if (parse_election_message(e, message, &vote) == FALSE) {
539 return election_error;
540 }
541
542 your_node = crm_get_peer(0, vote.from);
543 our_node = crm_get_peer(0, e->uname);
544 we_are_owner = (our_node != NULL)
545 && pcmk__str_eq(our_node->uuid, vote.election_owner,
546 pcmk__str_none);
547
548 if(can_win == FALSE) {
549 reason = "Not eligible";
550 we_lose = TRUE;
551
552 } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
553 reason = "We are not part of the cluster";
554 log_level = LOG_ERR;
555 we_lose = TRUE;
556
557 } else if (we_are_owner && (vote.election_id != e->count)) {
558 log_level = LOG_TRACE;
559 reason = "Superseded";
560 done = TRUE;
561
562 } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
563
564 reason = "Peer is not part of our cluster";
565 log_level = LOG_WARNING;
566 done = TRUE;
567
568 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
569 || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
570
571
572
573 if (!we_are_owner) {
574 crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
575 e->name, vote.election_id, vote.op, vote.from,
576 vote.election_owner);
577 return election_error;
578 }
579 if (e->state != election_in_progress) {
580
581 crm_debug("Not counting %s round %d %s from %s because no election in progress",
582 e->name, vote.election_id, vote.op, vote.from);
583 return e->state;
584 }
585 record_vote(e, &vote);
586 reason = "Recorded";
587 done = TRUE;
588
589 } else {
590
591 int age_result = crm_compare_age(vote.age);
592 int version_result = compare_version(vote.version, CRM_FEATURE_SET);
593
594 if (version_result < 0) {
595 reason = "Version";
596 we_lose = TRUE;
597
598 } else if (version_result > 0) {
599 reason = "Version";
600
601 } else if (age_result < 0) {
602 reason = "Uptime";
603 we_lose = TRUE;
604
605 } else if (age_result > 0) {
606 reason = "Uptime";
607
608 } else if (strcasecmp(e->uname, vote.from) > 0) {
609 reason = "Host name";
610 we_lose = TRUE;
611
612 } else {
613 reason = "Host name";
614 }
615 }
616
617 if (e->expires < tm_now) {
618 e->election_wins = 0;
619 e->expires = tm_now + STORM_INTERVAL;
620
621 } else if (done == FALSE && we_lose == FALSE) {
622 int peers = 1 + g_hash_table_size(crm_peer_cache);
623
624
625
626
627 e->election_wins++;
628 if (e->election_wins > (peers * peers)) {
629 crm_warn("%s election storm detected: %d wins in %d seconds",
630 e->name, e->election_wins, STORM_INTERVAL);
631 e->election_wins = 0;
632 e->expires = tm_now + STORM_INTERVAL;
633 if (e->wrote_blackbox == FALSE) {
634
635
636
637
638
639
640
641
642
643
644 crm_write_blackbox(0, NULL);
645 e->wrote_blackbox = TRUE;
646 }
647 }
648 }
649
650 if (done) {
651 do_crm_log(log_level + 1,
652 "Processed %s round %d %s (current round %d) from %s (%s)",
653 e->name, vote.election_id, vote.op, e->count, vote.from,
654 reason);
655 return e->state;
656
657 } else if (we_lose == FALSE) {
658
659
660
661
662
663
664
665
666
667
668
669
670
671 if ((e->last_election_loss == 0)
672 || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
673
674 do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
675 e->name, vote.election_id, vote.election_owner, vote.op,
676 vote.from, reason);
677
678 e->last_election_loss = 0;
679 election_timeout_stop(e);
680
681
682 e->state = election_start;
683 return e->state;
684 } else {
685 char *loss_time = ctime(&e->last_election_loss);
686
687 if (loss_time) {
688
689 loss_time += 11;
690 loss_time[8] = '\0';
691 }
692 crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
693 e->name, vote.election_id, vote.election_owner, vote.from,
694 LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
695 }
696 }
697
698 e->last_election_loss = tm_now;
699
700 do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
701 e->name, vote.election_id, vote.election_owner, vote.op,
702 vote.from, reason);
703
704 election_reset(e);
705 send_no_vote(your_node, &vote);
706 e->state = election_lost;
707 return e->state;
708 }
709
710
711
712
713
714
715 void
716 election_clear_dampening(election_t *e)
717 {
718 e->last_election_loss = 0;
719 }