This source file includes following definitions.
- election_complete
- election_timer_cb
- election_state
- election_init
- election_remove
- election_reset
- election_fini
- election_timeout_start
- election_timeout_stop
- election_timeout_set_period
- get_uptime
- compare_age
- election_vote
- election_check
- parse_election_message
- record_vote
- send_no_vote
- election_count_vote
- election_clear_dampening
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/time.h>
13 #include <sys/resource.h>
14
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17
18 #include <crm/common/mainloop.h>
19 #include <crm/cluster/internal.h>
20 #include <crm/cluster/election_internal.h>
21 #include <crm/crm.h>
22
23 #define STORM_INTERVAL 2
24
25 struct election_s {
26 enum election_result state;
27 guint count;
28 char *name;
29 char *uname;
30 GSourceFunc cb;
31 GHashTable *voted;
32 mainloop_timer_t *timeout;
33 int election_wins;
34 bool wrote_blackbox;
35 time_t expires;
36 time_t last_election_loss;
37 };
38
39 static void
40 election_complete(election_t *e)
41 {
42 e->state = election_won;
43 if (e->cb != NULL) {
44 e->cb(e);
45 }
46 election_reset(e);
47 }
48
49 static gboolean
50 election_timer_cb(gpointer user_data)
51 {
52 election_t *e = user_data;
53
54 crm_info("%s timed out, declaring local node as winner", e->name);
55 election_complete(e);
56 return FALSE;
57 }
58
59
60
61
62
63
64
65
66 enum election_result
67 election_state(const election_t *e)
68 {
69 return (e == NULL)? election_error : e->state;
70 }
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 election_t *
89 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
90 {
91 election_t *e = NULL;
92
93 static guint count = 0;
94
95 CRM_CHECK(uname != NULL, return NULL);
96
97 e = calloc(1, sizeof(election_t));
98 if (e == NULL) {
99 crm_perror(LOG_CRIT, "Cannot create election");
100 return NULL;
101 }
102
103 e->uname = strdup(uname);
104 if (e->uname == NULL) {
105 crm_perror(LOG_CRIT, "Cannot create election");
106 free(e);
107 return NULL;
108 }
109
110 e->name = name? crm_strdup_printf("election-%s", name)
111 : crm_strdup_printf("election-%u", count++);
112 e->cb = cb;
113 e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
114 election_timer_cb, e);
115 crm_trace("Created %s", e->name);
116 return e;
117 }
118
119
120
121
122
123
124
125
126
127
128 void
129 election_remove(election_t *e, const char *uname)
130 {
131 if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
132 crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
133 g_hash_table_remove(e->voted, uname);
134 }
135 }
136
137
138
139
140
141
142 void
143 election_reset(election_t *e)
144 {
145 if (e != NULL) {
146 crm_trace("Resetting election %s", e->name);
147 mainloop_timer_stop(e->timeout);
148 if (e->voted) {
149 crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
150 g_hash_table_destroy(e->voted);
151 e->voted = NULL;
152 }
153 }
154 }
155
156
157
158
159
160
161
162
163
164 void
165 election_fini(election_t *e)
166 {
167 if (e != NULL) {
168 election_reset(e);
169 crm_trace("Destroying %s", e->name);
170 mainloop_timer_del(e->timeout);
171 free(e->uname);
172 free(e->name);
173 free(e);
174 }
175 }
176
177 static void
178 election_timeout_start(election_t *e)
179 {
180 if (e != NULL) {
181 mainloop_timer_start(e->timeout);
182 }
183 }
184
185
186
187
188
189
190 void
191 election_timeout_stop(election_t *e)
192 {
193 if (e != NULL) {
194 mainloop_timer_stop(e->timeout);
195 }
196 }
197
198
199
200
201
202
203
204 void
205 election_timeout_set_period(election_t *e, guint period)
206 {
207 if (e != NULL) {
208 mainloop_timer_set_period(e->timeout, period);
209 } else {
210 crm_err("No election defined");
211 }
212 }
213
214 static int
215 get_uptime(struct timeval *output)
216 {
217 static time_t expires = 0;
218 static struct rusage info;
219
220 time_t tm_now = time(NULL);
221
222 if (expires < tm_now) {
223 int rc = 0;
224
225 info.ru_utime.tv_sec = 0;
226 info.ru_utime.tv_usec = 0;
227 rc = getrusage(RUSAGE_SELF, &info);
228
229 output->tv_sec = 0;
230 output->tv_usec = 0;
231
232 if (rc < 0) {
233 crm_perror(LOG_ERR, "Could not calculate the current uptime");
234 expires = 0;
235 return -1;
236 }
237
238 crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
239 (long)info.ru_utime.tv_usec);
240 }
241
242 expires = tm_now + STORM_INTERVAL;
243 output->tv_sec = info.ru_utime.tv_sec;
244 output->tv_usec = info.ru_utime.tv_usec;
245
246 return 1;
247 }
248
249 static int
250 compare_age(struct timeval your_age)
251 {
252 struct timeval our_age;
253
254 get_uptime(&our_age);
255
256 if (our_age.tv_sec > your_age.tv_sec) {
257 crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
258 return 1;
259 } else if (our_age.tv_sec < your_age.tv_sec) {
260 crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
261 return -1;
262 } else if (our_age.tv_usec > your_age.tv_usec) {
263 crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
264 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
265 return 1;
266 } else if (our_age.tv_usec < your_age.tv_usec) {
267 crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
268 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
269 return -1;
270 }
271
272 return 0;
273 }
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289 void
290 election_vote(election_t *e)
291 {
292 struct timeval age;
293 xmlNode *vote = NULL;
294 crm_node_t *our_node;
295
296 if (e == NULL) {
297 crm_trace("Election vote requested, but no election available");
298 return;
299 }
300
301 our_node = crm_get_peer(0, e->uname);
302 if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) {
303 crm_trace("Cannot vote in %s yet: local node not connected to cluster",
304 e->name);
305 return;
306 }
307
308 election_reset(e);
309 e->state = election_in_progress;
310 vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
311
312 e->count++;
313 crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
314 crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
315
316 get_uptime(&age);
317 crm_xml_add_timeval(vote, F_CRM_ELECTION_AGE_S, F_CRM_ELECTION_AGE_US, &age);
318
319 send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
320 free_xml(vote);
321
322 crm_debug("Started %s round %d", e->name, e->count);
323 election_timeout_start(e);
324 return;
325 }
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342 bool
343 election_check(election_t *e)
344 {
345 int voted_size = 0;
346 int num_members = 0;
347
348 if (e == NULL) {
349 crm_trace("Election check requested, but no election available");
350 return FALSE;
351 }
352 if (e->voted == NULL) {
353 crm_trace("%s check requested, but no votes received yet", e->name);
354 return FALSE;
355 }
356
357 voted_size = g_hash_table_size(e->voted);
358 num_members = crm_active_peers();
359
360
361
362
363
364 if (voted_size >= num_members) {
365
366 election_timeout_stop(e);
367 if (voted_size > num_members) {
368 GHashTableIter gIter;
369 const crm_node_t *node;
370 char *key = NULL;
371
372 crm_warn("Received too many votes in %s", e->name);
373 g_hash_table_iter_init(&gIter, crm_peer_cache);
374 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
375 if (crm_is_peer_active(node)) {
376 crm_warn("* expected vote: %s", node->uname);
377 }
378 }
379
380 g_hash_table_iter_init(&gIter, e->voted);
381 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
382 crm_warn("* actual vote: %s", key);
383 }
384
385 }
386
387 crm_info("%s won by local node", e->name);
388 election_complete(e);
389 return TRUE;
390
391 } else {
392 crm_debug("%s still waiting on %d of %d votes",
393 e->name, num_members - voted_size, num_members);
394 }
395
396 return FALSE;
397 }
398
399 #define LOSS_DAMPEN 2
400
401 struct vote {
402 const char *op;
403 const char *from;
404 const char *version;
405 const char *election_owner;
406 int election_id;
407 struct timeval age;
408 };
409
410
411
412
413
414
415
416
417
418
419
420
421 static bool
422 parse_election_message(const election_t *e, const xmlNode *message,
423 struct vote *vote)
424 {
425 CRM_CHECK(message && vote, return FALSE);
426
427 vote->election_id = -1;
428 vote->age.tv_sec = -1;
429 vote->age.tv_usec = -1;
430
431 vote->op = crm_element_value(message, F_CRM_TASK);
432 vote->from = crm_element_value(message, F_CRM_HOST_FROM);
433 vote->version = crm_element_value(message, F_CRM_VERSION);
434 vote->election_owner = crm_element_value(message, F_CRM_ELECTION_OWNER);
435
436 crm_element_value_int(message, F_CRM_ELECTION_ID, &(vote->election_id));
437
438 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
439 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
440
441 crm_warn("Invalid %s message from %s in %s ",
442 (vote->op? vote->op : "election"),
443 (vote->from? vote->from : "unspecified node"),
444 (e? e->name : "election"));
445 return FALSE;
446 }
447
448
449
450 if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
451
452 crm_element_value_timeval(message, F_CRM_ELECTION_AGE_S,
453 F_CRM_ELECTION_AGE_US, &(vote->age));
454 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
455 crm_warn("Cannot count %s %s from %s because it is missing uptime",
456 (e? e->name : "election"), vote->op, vote->from);
457 return FALSE;
458 }
459
460 } else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
461 crm_info("Cannot process %s message from %s because %s is not a known election op",
462 (e? e->name : "election"), vote->from, vote->op);
463 return FALSE;
464 }
465
466
467
468 if (e == NULL) {
469 crm_info("Cannot count %s from %s because no election available",
470 vote->op, vote->from);
471 return FALSE;
472 }
473
474
475
476
477 if (crm_peer_cache == NULL) {
478 crm_info("Cannot count %s %s from %s because no peer information available",
479 e->name, vote->op, vote->from);
480 return FALSE;
481 }
482 return TRUE;
483 }
484
485 static void
486 record_vote(election_t *e, struct vote *vote)
487 {
488 char *voter_copy = NULL;
489 char *vote_copy = NULL;
490
491 CRM_ASSERT(e && vote && vote->from && vote->op);
492 if (e->voted == NULL) {
493 e->voted = pcmk__strkey_table(free, free);
494 }
495
496 voter_copy = strdup(vote->from);
497 vote_copy = strdup(vote->op);
498 CRM_ASSERT(voter_copy && vote_copy);
499
500 g_hash_table_replace(e->voted, voter_copy, vote_copy);
501 }
502
503 static void
504 send_no_vote(crm_node_t *peer, struct vote *vote)
505 {
506
507
508 xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
509 CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
510
511 crm_xml_add(novote, F_CRM_ELECTION_OWNER, vote->election_owner);
512 crm_xml_add_int(novote, F_CRM_ELECTION_ID, vote->election_id);
513
514 send_cluster_message(peer, crm_msg_crmd, novote, TRUE);
515 free_xml(novote);
516 }
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533 enum election_result
534 election_count_vote(election_t *e, const xmlNode *message, bool can_win)
535 {
536 int log_level = LOG_INFO;
537 gboolean done = FALSE;
538 gboolean we_lose = FALSE;
539 const char *reason = "unknown";
540 bool we_are_owner = FALSE;
541 crm_node_t *our_node = NULL, *your_node = NULL;
542 time_t tm_now = time(NULL);
543 struct vote vote;
544
545 CRM_CHECK(message != NULL, return election_error);
546 if (parse_election_message(e, message, &vote) == FALSE) {
547 return election_error;
548 }
549
550 your_node = crm_get_peer(0, vote.from);
551 our_node = crm_get_peer(0, e->uname);
552 we_are_owner = (our_node != NULL)
553 && pcmk__str_eq(our_node->uuid, vote.election_owner,
554 pcmk__str_none);
555
556 if (!can_win) {
557 reason = "Not eligible";
558 we_lose = TRUE;
559
560 } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
561 reason = "We are not part of the cluster";
562 log_level = LOG_ERR;
563 we_lose = TRUE;
564
565 } else if (we_are_owner && (vote.election_id != e->count)) {
566 log_level = LOG_TRACE;
567 reason = "Superseded";
568 done = TRUE;
569
570 } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
571
572 reason = "Peer is not part of our cluster";
573 log_level = LOG_WARNING;
574 done = TRUE;
575
576 } else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
577 || pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
578
579
580
581 if (!we_are_owner) {
582 crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
583 e->name, vote.election_id, vote.op, vote.from,
584 vote.election_owner);
585 return election_error;
586 }
587 if (e->state != election_in_progress) {
588
589 crm_debug("Not counting %s round %d %s from %s because no election in progress",
590 e->name, vote.election_id, vote.op, vote.from);
591 return e->state;
592 }
593 record_vote(e, &vote);
594 reason = "Recorded";
595 done = TRUE;
596
597 } else {
598
599 int age_result = compare_age(vote.age);
600 int version_result = compare_version(vote.version, CRM_FEATURE_SET);
601
602 if (version_result < 0) {
603 reason = "Version";
604 we_lose = TRUE;
605
606 } else if (version_result > 0) {
607 reason = "Version";
608
609 } else if (age_result < 0) {
610 reason = "Uptime";
611 we_lose = TRUE;
612
613 } else if (age_result > 0) {
614 reason = "Uptime";
615
616 } else if (strcasecmp(e->uname, vote.from) > 0) {
617 reason = "Host name";
618 we_lose = TRUE;
619
620 } else {
621 reason = "Host name";
622 }
623 }
624
625 if (e->expires < tm_now) {
626 e->election_wins = 0;
627 e->expires = tm_now + STORM_INTERVAL;
628
629 } else if (done == FALSE && we_lose == FALSE) {
630 int peers = 1 + g_hash_table_size(crm_peer_cache);
631
632
633
634
635 e->election_wins++;
636 if (e->election_wins > (peers * peers)) {
637 crm_warn("%s election storm detected: %d wins in %d seconds",
638 e->name, e->election_wins, STORM_INTERVAL);
639 e->election_wins = 0;
640 e->expires = tm_now + STORM_INTERVAL;
641 if (e->wrote_blackbox == FALSE) {
642
643
644
645
646
647
648
649
650
651
652 crm_write_blackbox(0, NULL);
653 e->wrote_blackbox = TRUE;
654 }
655 }
656 }
657
658 if (done) {
659 do_crm_log(log_level + 1,
660 "Processed %s round %d %s (current round %d) from %s (%s)",
661 e->name, vote.election_id, vote.op, e->count, vote.from,
662 reason);
663 return e->state;
664
665 } else if (we_lose == FALSE) {
666
667
668
669
670
671
672
673
674
675
676
677
678
679 if ((e->last_election_loss == 0)
680 || ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
681
682 do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
683 e->name, vote.election_id, vote.election_owner, vote.op,
684 vote.from, reason);
685
686 e->last_election_loss = 0;
687 election_timeout_stop(e);
688
689
690 e->state = election_start;
691 return e->state;
692 } else {
693 char *loss_time = ctime(&e->last_election_loss);
694
695 if (loss_time) {
696
697 loss_time += 11;
698 loss_time[8] = '\0';
699 }
700 crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
701 e->name, vote.election_id, vote.election_owner, vote.from,
702 LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
703 }
704 }
705
706 e->last_election_loss = tm_now;
707
708 do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
709 e->name, vote.election_id, vote.election_owner, vote.op,
710 vote.from, reason);
711
712 election_reset(e);
713 send_no_vote(your_node, &vote);
714 e->state = election_lost;
715 return e->state;
716 }
717
718
719
720
721
722
723 void
724 election_clear_dampening(election_t *e)
725 {
726 e->last_election_loss = 0;
727 }