This source file includes following definitions.
- election_complete
- election_timer_cb
- election_state
- election_init
- election_remove
- election_reset
- election_fini
- election_timeout_start
- election_timeout_stop
- election_timeout_set_period
- get_uptime
- compare_age
- election_vote
- election_check
- parse_election_message
- record_vote
- send_no_vote
- election_count_vote
- election_clear_dampening
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/time.h>
13 #include <sys/resource.h>
14
15 #include <crm/common/xml.h>
16
17 #include <crm/common/mainloop.h>
18 #include <crm/cluster/internal.h>
19 #include <crm/cluster/election_internal.h>
20 #include <crm/crm.h>
21
22 #define STORM_INTERVAL 2
23
24 struct election_s {
25 enum election_result state;
26 guint count;
27 char *name;
28 char *uname;
29 GSourceFunc cb;
30 GHashTable *voted;
31 mainloop_timer_t *timeout;
32 int election_wins;
33 bool wrote_blackbox;
34 time_t expires;
35 time_t last_election_loss;
36 };
37
38 static void
39 election_complete(election_t *e)
40 {
41 e->state = election_won;
42 if (e->cb != NULL) {
43 e->cb(e);
44 }
45 election_reset(e);
46 }
47
48 static gboolean
49 election_timer_cb(gpointer user_data)
50 {
51 election_t *e = user_data;
52
53 crm_info("%s timed out, declaring local node as winner", e->name);
54 election_complete(e);
55 return FALSE;
56 }
57
58
59
60
61
62
63
64
65 enum election_result
66 election_state(const election_t *e)
67 {
68 return (e == NULL)? election_error : e->state;
69 }
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87 election_t *
88 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
89 {
90 election_t *e = NULL;
91
92 static guint count = 0;
93
94 CRM_CHECK(uname != NULL, return NULL);
95
96 e = calloc(1, sizeof(election_t));
97 if (e == NULL) {
98 crm_perror(LOG_CRIT, "Cannot create election");
99 return NULL;
100 }
101
102 e->uname = strdup(uname);
103 if (e->uname == NULL) {
104 crm_perror(LOG_CRIT, "Cannot create election");
105 free(e);
106 return NULL;
107 }
108
109 e->name = name? crm_strdup_printf("election-%s", name)
110 : crm_strdup_printf("election-%u", count++);
111 e->cb = cb;
112 e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
113 election_timer_cb, e);
114 crm_trace("Created %s", e->name);
115 return e;
116 }
117
118
119
120
121
122
123
124
125
126
127 void
128 election_remove(election_t *e, const char *uname)
129 {
130 if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
131 crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
132 g_hash_table_remove(e->voted, uname);
133 }
134 }
135
136
137
138
139
140
141 void
142 election_reset(election_t *e)
143 {
144 if (e != NULL) {
145 crm_trace("Resetting election %s", e->name);
146 mainloop_timer_stop(e->timeout);
147 if (e->voted) {
148 crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
149 g_hash_table_destroy(e->voted);
150 e->voted = NULL;
151 }
152 }
153 }
154
155
156
157
158
159
160
161
162
163 void
164 election_fini(election_t *e)
165 {
166 if (e != NULL) {
167 election_reset(e);
168 crm_trace("Destroying %s", e->name);
169 mainloop_timer_del(e->timeout);
170 free(e->uname);
171 free(e->name);
172 free(e);
173 }
174 }
175
176 static void
177 election_timeout_start(election_t *e)
178 {
179 if (e != NULL) {
180 mainloop_timer_start(e->timeout);
181 }
182 }
183
184
185
186
187
188
189 void
190 election_timeout_stop(election_t *e)
191 {
192 if (e != NULL) {
193 mainloop_timer_stop(e->timeout);
194 }
195 }
196
197
198
199
200
201
202
203 void
204 election_timeout_set_period(election_t *e, guint period)
205 {
206 if (e != NULL) {
207 mainloop_timer_set_period(e->timeout, period);
208 } else {
209 crm_err("No election defined");
210 }
211 }
212
213 static int
214 get_uptime(struct timeval *output)
215 {
216 static time_t expires = 0;
217 static struct rusage info;
218
219 time_t tm_now = time(NULL);
220
221 if (expires < tm_now) {
222 int rc = 0;
223
224 info.ru_utime.tv_sec = 0;
225 info.ru_utime.tv_usec = 0;
226 rc = getrusage(RUSAGE_SELF, &info);
227
228 output->tv_sec = 0;
229 output->tv_usec = 0;
230
231 if (rc < 0) {
232 crm_perror(LOG_ERR, "Could not calculate the current uptime");
233 expires = 0;
234 return -1;
235 }
236
237 crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
238 (long)info.ru_utime.tv_usec);
239 }
240
241 expires = tm_now + STORM_INTERVAL;
242 output->tv_sec = info.ru_utime.tv_sec;
243 output->tv_usec = info.ru_utime.tv_usec;
244
245 return 1;
246 }
247
248 static int
249 compare_age(struct timeval your_age)
250 {
251 struct timeval our_age;
252
253 get_uptime(&our_age);
254
255 if (our_age.tv_sec > your_age.tv_sec) {
256 crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
257 return 1;
258 } else if (our_age.tv_sec < your_age.tv_sec) {
259 crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
260 return -1;
261 } else if (our_age.tv_usec > your_age.tv_usec) {
262 crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
263 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
264 return 1;
265 } else if (our_age.tv_usec < your_age.tv_usec) {
266 crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
267 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
268 return -1;
269 }
270
271 return 0;
272 }
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 void
289 election_vote(election_t *e)
290 {
291 struct timeval age;
292 xmlNode *vote = NULL;
293 crm_node_t *our_node;
294
295 if (e == NULL) {
296 crm_trace("Election vote requested, but no election available");
297 return;
298 }
299
300 our_node = pcmk__get_node(0, e->uname, NULL,
301 pcmk__node_search_cluster_member);
302 if (!pcmk__cluster_is_node_active(our_node)) {
303 crm_trace("Cannot vote in %s yet: local node not connected to cluster",
304 e->name);
305 return;
306 }
307
308 election_reset(e);
309 e->state = election_in_progress;
310 vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
311
312 e->count++;
313 crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->uuid);
314 crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
315
316
317 get_uptime(&age);
318 crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
319 PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
320
321 pcmk__cluster_send_message(NULL, crm_msg_crmd, vote);
322 free_xml(vote);
323
324 crm_debug("Started %s round %d", e->name, e->count);
325 election_timeout_start(e);
326 return;
327 }
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344 bool
345 election_check(election_t *e)
346 {
347 int voted_size = 0;
348 int num_members = 0;
349
350 if (e == NULL) {
351 crm_trace("Election check requested, but no election available");
352 return FALSE;
353 }
354 if (e->voted == NULL) {
355 crm_trace("%s check requested, but no votes received yet", e->name);
356 return FALSE;
357 }
358
359 voted_size = g_hash_table_size(e->voted);
360 num_members = pcmk__cluster_num_active_nodes();
361
362
363
364
365
366 if (voted_size >= num_members) {
367
368 election_timeout_stop(e);
369 if (voted_size > num_members) {
370 GHashTableIter gIter;
371 const crm_node_t *node;
372 char *key = NULL;
373
374 crm_warn("Received too many votes in %s", e->name);
375 g_hash_table_iter_init(&gIter, crm_peer_cache);
376 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
377 if (pcmk__cluster_is_node_active(node)) {
378 crm_warn("* expected vote: %s", node->uname);
379 }
380 }
381
382 g_hash_table_iter_init(&gIter, e->voted);
383 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
384 crm_warn("* actual vote: %s", key);
385 }
386
387 }
388
389 crm_info("%s won by local node", e->name);
390 election_complete(e);
391 return TRUE;
392
393 } else {
394 crm_debug("%s still waiting on %d of %d votes",
395 e->name, num_members - voted_size, num_members);
396 }
397
398 return FALSE;
399 }
400
401 #define LOSS_DAMPEN 2
402
403 struct vote {
404 const char *op;
405 const char *from;
406 const char *version;
407 const char *election_owner;
408 int election_id;
409 struct timeval age;
410 };
411
412
413
414
415
416
417
418
419
420
421
422
423 static bool
424 parse_election_message(const election_t *e, const xmlNode *message,
425 struct vote *vote)
426 {
427 CRM_CHECK(message && vote, return FALSE);
428
429 vote->election_id = -1;
430 vote->age.tv_sec = -1;
431 vote->age.tv_usec = -1;
432
433 vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
434 vote->from = crm_element_value(message, PCMK__XA_SRC);
435 vote->version = crm_element_value(message, PCMK_XA_VERSION);
436 vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
437
438 crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
439
440 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
441 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
442
443 crm_warn("Invalid %s message from %s in %s ",
444 (vote->op? vote->op : "election"),
445 (vote->from? vote->from : "unspecified node"),
446 (e? e->name : "election"));
447 return FALSE;
448 }
449
450
451
452 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
453
454
455
456 crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
457 PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
458 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
459 crm_warn("Cannot count %s %s from %s because it is missing uptime",
460 (e? e->name : "election"), vote->op, vote->from);
461 return FALSE;
462 }
463
464 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
465 crm_info("Cannot process %s message from %s because %s is not a known election op",
466 (e? e->name : "election"), vote->from, vote->op);
467 return FALSE;
468 }
469
470
471
472 if (e == NULL) {
473 crm_info("Cannot count %s from %s because no election available",
474 vote->op, vote->from);
475 return FALSE;
476 }
477
478
479
480
481 if (crm_peer_cache == NULL) {
482 crm_info("Cannot count %s %s from %s because no peer information available",
483 e->name, vote->op, vote->from);
484 return FALSE;
485 }
486 return TRUE;
487 }
488
489 static void
490 record_vote(election_t *e, struct vote *vote)
491 {
492 pcmk__assert(e && vote && vote->from && vote->op);
493
494 if (e->voted == NULL) {
495 e->voted = pcmk__strkey_table(free, free);
496 }
497 pcmk__insert_dup(e->voted, vote->from, vote->op);
498 }
499
500 static void
501 send_no_vote(crm_node_t *peer, struct vote *vote)
502 {
503
504
505 xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
506 CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
507
508 crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
509 crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
510
511 pcmk__cluster_send_message(peer, crm_msg_crmd, novote);
512 free_xml(novote);
513 }
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530 enum election_result
531 election_count_vote(election_t *e, const xmlNode *message, bool can_win)
532 {
533 int log_level = LOG_INFO;
534 gboolean done = FALSE;
535 gboolean we_lose = FALSE;
536 const char *reason = "unknown";
537 bool we_are_owner = FALSE;
538 crm_node_t *our_node = NULL, *your_node = NULL;
539 time_t tm_now = time(NULL);
540 struct vote vote;
541
542 CRM_CHECK(message != NULL, return election_error);
543 if (parse_election_message(e, message, &vote) == FALSE) {
544 return election_error;
545 }
546
547 your_node = pcmk__get_node(0, vote.from, NULL,
548 pcmk__node_search_cluster_member);
549 our_node = pcmk__get_node(0, e->uname, NULL,
550 pcmk__node_search_cluster_member);
551 we_are_owner = (our_node != NULL)
552 && pcmk__str_eq(our_node->uuid, vote.election_owner,
553 pcmk__str_none);
554
555 if (!can_win) {
556 reason = "Not eligible";
557 we_lose = TRUE;
558
559 } else if (!pcmk__cluster_is_node_active(our_node)) {
560 reason = "We are not part of the cluster";
561 log_level = LOG_ERR;
562 we_lose = TRUE;
563
564 } else if (we_are_owner && (vote.election_id != e->count)) {
565 log_level = LOG_TRACE;
566 reason = "Superseded";
567 done = TRUE;
568
569 } else if (!pcmk__cluster_is_node_active(your_node)) {
570
571 reason = "Peer is not part of our cluster";
572 log_level = LOG_WARNING;
573 done = TRUE;
574
575 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
576 || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
577
578
579
580 if (!we_are_owner) {
581 crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
582 e->name, vote.election_id, vote.op, vote.from,
583 vote.election_owner);
584 return election_error;
585 }
586 if (e->state != election_in_progress) {
587
588 crm_debug("Not counting %s round %d %s from %s because no election in progress",
589 e->name, vote.election_id, vote.op, vote.from);
590 return e->state;
591 }
592 record_vote(e, &vote);
593 reason = "Recorded";
594 done = TRUE;
595
596 } else {
597
598 int age_result = compare_age(vote.age);
599 int version_result = compare_version(vote.version, CRM_FEATURE_SET);
600
601 if (version_result < 0) {
602 reason = "Version";
603 we_lose = TRUE;
604
605 } else if (version_result > 0) {
606 reason = "Version";
607
608 } else if (age_result < 0) {
609 reason = "Uptime";
610 we_lose = TRUE;
611
612 } else if (age_result > 0) {
613 reason = "Uptime";
614
615 } else if (strcasecmp(e->uname, vote.from) > 0) {
616 reason = "Host name";
617 we_lose = TRUE;
618
619 } else {
620 reason = "Host name";
621 }
622 }
623
624 if (e->expires < tm_now) {
625 e->election_wins = 0;
626 e->expires = tm_now + STORM_INTERVAL;
627
628 } else if (done == FALSE && we_lose == FALSE) {
629 int peers = 1 + g_hash_table_size(crm_peer_cache);
630
631
632
633
634 e->election_wins++;
635 if (e->election_wins > (peers * peers)) {
636 crm_warn("%s election storm detected: %d wins in %d seconds",
637 e->name, e->election_wins, STORM_INTERVAL);
638 e->election_wins = 0;
639 e->expires = tm_now + STORM_INTERVAL;
640 if (e->wrote_blackbox == FALSE) {
641
642
643
644
645
646
647
648
649
650
651 crm_write_blackbox(0, NULL);
652 e->wrote_blackbox = TRUE;
653 }
654 }
655 }
656
657 if (done) {
658 do_crm_log(log_level + 1,
659 "Processed %s round %d %s (current round %d) from %s (%s)",
660 e->name, vote.election_id, vote.op, e->count, vote.from,
661 reason);
662 return e->state;
663
664 } else if (we_lose == FALSE) {
665
666
667
668
669
670
671
672
673
674
675
676
677
678 if ((e->last_election_loss == 0)
679 || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
680
681 do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
682 e->name, vote.election_id, vote.election_owner, vote.op,
683 vote.from, reason);
684
685 e->last_election_loss = 0;
686 election_timeout_stop(e);
687
688
689 e->state = election_start;
690 return e->state;
691 } else {
692 char *loss_time = ctime(&e->last_election_loss);
693
694 if (loss_time) {
695
696 loss_time += 11;
697 loss_time[8] = '\0';
698 }
699 crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
700 e->name, vote.election_id, vote.election_owner, vote.from,
701 LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
702 }
703 }
704
705 e->last_election_loss = tm_now;
706
707 do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
708 e->name, vote.election_id, vote.election_owner, vote.op,
709 vote.from, reason);
710
711 election_reset(e);
712 send_no_vote(your_node, &vote);
713 e->state = election_lost;
714 return e->state;
715 }
716
717
718
719
720
721
722 void
723 election_clear_dampening(election_t *e)
724 {
725 e->last_election_loss = 0;
726 }