This source file includes following definitions.
- election_complete
- election_timer_cb
- election_state
- election_init
- election_remove
- election_reset
- election_fini
- election_timeout_start
- election_timeout_stop
- election_timeout_set_period
- crm_uptime
- crm_compare_age
- election_vote
- election_check
- election_count_vote
1
2
3
4
5
6
7
8 #include <crm_internal.h>
9
10 #include <sys/time.h>
11 #include <sys/resource.h>
12
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml.h>
15
16 #include <crm/common/mainloop.h>
17 #include <crm/cluster/internal.h>
18 #include <crm/cluster/election.h>
19 #include <crm/crm.h>
20
21 #define STORM_INTERVAL 2
22
23 struct election_s
24 {
25 enum election_result state;
26 guint count;
27 char *name;
28 char *uname;
29 GSourceFunc cb;
30 GHashTable *voted;
31 mainloop_timer_t *timeout;
32 };
33
34 static void election_complete(election_t *e)
35 {
36 crm_info("Election %s complete", e->name);
37 e->state = election_won;
38
39 if(e->cb) {
40 e->cb(e);
41 }
42
43 election_reset(e);
44 }
45
46 static gboolean election_timer_cb(gpointer user_data)
47 {
48 election_t *e = user_data;
49
50 crm_info("Election %s %p timed out", e->name, e);
51 election_complete(e);
52 return FALSE;
53 }
54
55 enum election_result
56 election_state(election_t *e)
57 {
58 if(e) {
59 return e->state;
60 }
61 return election_error;
62 }
63
64 election_t *
65 election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
66 {
67 static guint count = 0;
68 election_t *e = calloc(1, sizeof(election_t));
69
70 if(e != NULL) {
71 if(name) {
72 e->name = crm_strdup_printf("election-%s", name);
73 } else {
74 e->name = crm_strdup_printf("election-%u", count++);
75 }
76
77 e->cb = cb;
78 e->uname = strdup(uname);
79 e->timeout = mainloop_timer_add(e->name, period_ms, FALSE, election_timer_cb, e);
80 crm_trace("Created %s %p", e->name, e);
81 }
82 return e;
83 }
84
85 void
86 election_remove(election_t *e, const char *uname)
87 {
88 if(e && uname && e->voted) {
89 g_hash_table_remove(e->voted, uname);
90 }
91 }
92
93 void
94 election_reset(election_t *e)
95 {
96 crm_trace("Resetting election %s", e->name);
97 if(e) {
98 mainloop_timer_stop(e->timeout);
99 }
100 if (e && e->voted) {
101 crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
102 g_hash_table_destroy(e->voted);
103 e->voted = NULL;
104 }
105 }
106
107 void
108 election_fini(election_t *e)
109 {
110 if(e) {
111 election_reset(e);
112 crm_trace("Destroying %s", e->name);
113 mainloop_timer_del(e->timeout);
114 free(e->uname);
115 free(e->name);
116 free(e);
117 }
118 }
119
120 static void
121 election_timeout_start(election_t *e)
122 {
123 if(e) {
124 mainloop_timer_start(e->timeout);
125 }
126 }
127
128 void
129 election_timeout_stop(election_t *e)
130 {
131 if(e) {
132 mainloop_timer_stop(e->timeout);
133 }
134 }
135
136 void
137 election_timeout_set_period(election_t *e, guint period)
138 {
139 if(e) {
140 mainloop_timer_set_period(e->timeout, period);
141 } else {
142 crm_err("No election defined");
143 }
144 }
145
146 static int
147 crm_uptime(struct timeval *output)
148 {
149 static time_t expires = 0;
150 static struct rusage info;
151
152 time_t tm_now = time(NULL);
153
154 if (expires < tm_now) {
155 int rc = 0;
156
157 info.ru_utime.tv_sec = 0;
158 info.ru_utime.tv_usec = 0;
159 rc = getrusage(RUSAGE_SELF, &info);
160
161 output->tv_sec = 0;
162 output->tv_usec = 0;
163
164 if (rc < 0) {
165 crm_perror(LOG_ERR, "Could not calculate the current uptime");
166 expires = 0;
167 return -1;
168 }
169
170 crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
171 (long)info.ru_utime.tv_usec);
172 }
173
174 expires = tm_now + STORM_INTERVAL;
175 output->tv_sec = info.ru_utime.tv_sec;
176 output->tv_usec = info.ru_utime.tv_usec;
177
178 return 1;
179 }
180
181 static int
182 crm_compare_age(struct timeval your_age)
183 {
184 struct timeval our_age;
185
186 crm_uptime(&our_age);
187
188 if (our_age.tv_sec > your_age.tv_sec) {
189 crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
190 return 1;
191 } else if (our_age.tv_sec < your_age.tv_sec) {
192 crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
193 return -1;
194 } else if (our_age.tv_usec > your_age.tv_usec) {
195 crm_debug("Win: %ld.%ld vs %ld.%ld (usec)",
196 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
197 return 1;
198 } else if (our_age.tv_usec < your_age.tv_usec) {
199 crm_debug("Lose: %ld.%ld vs %ld.%ld (usec)",
200 (long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
201 return -1;
202 }
203
204 return 0;
205 }
206
207 void
208 election_vote(election_t *e)
209 {
210 struct timeval age;
211 xmlNode *vote = NULL;
212 crm_node_t *our_node;
213
214 if(e == NULL) {
215 crm_trace("Not voting in election: not initialized");
216 return;
217 }
218
219 our_node = crm_get_peer(0, e->uname);
220 if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
221 crm_trace("Cannot vote yet: %p", our_node);
222 return;
223 }
224
225 e->state = election_in_progress;
226 vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
227
228 e->count++;
229 crm_xml_add(vote, F_CRM_ELECTION_OWNER, our_node->uuid);
230 crm_xml_add_int(vote, F_CRM_ELECTION_ID, e->count);
231
232 crm_uptime(&age);
233 crm_xml_add_int(vote, F_CRM_ELECTION_AGE_S, age.tv_sec);
234 crm_xml_add_int(vote, F_CRM_ELECTION_AGE_US, age.tv_usec);
235
236 send_cluster_message(NULL, crm_msg_crmd, vote, TRUE);
237 free_xml(vote);
238
239 crm_debug("Started election %d", e->count);
240 if (e->voted) {
241 g_hash_table_destroy(e->voted);
242 e->voted = NULL;
243 }
244
245 election_timeout_start(e);
246 return;
247 }
248
249 bool
250 election_check(election_t *e)
251 {
252 int voted_size = 0;
253 int num_members = crm_active_peers();
254
255 if(e == NULL) {
256 crm_trace("not initialized");
257 return FALSE;
258 }
259
260 if (e->voted) {
261 voted_size = g_hash_table_size(e->voted);
262 }
263
264
265
266
267 if (voted_size >= num_members) {
268
269 election_timeout_stop(e);
270 if (voted_size > num_members) {
271 GHashTableIter gIter;
272 const crm_node_t *node;
273 char *key = NULL;
274
275 g_hash_table_iter_init(&gIter, crm_peer_cache);
276 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
277 if (crm_is_peer_active(node)) {
278 crm_err("member: %s proc=%.32x", node->uname, node->processes);
279 }
280 }
281
282 g_hash_table_iter_init(&gIter, e->voted);
283 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
284 crm_err("voted: %s", key);
285 }
286
287 }
288
289 election_complete(e);
290 return TRUE;
291
292 } else {
293 crm_debug("Still waiting on %d non-votes (%d total)",
294 num_members - voted_size, num_members);
295 }
296
297 return FALSE;
298 }
299
300 #define loss_dampen 2
301
302
303 enum election_result
304 election_count_vote(election_t *e, xmlNode *vote, bool can_win)
305 {
306 int age = 0;
307 int election_id = -1;
308 int log_level = LOG_INFO;
309 gboolean use_born_on = FALSE;
310 gboolean done = FALSE;
311 gboolean we_lose = FALSE;
312 const char *op = NULL;
313 const char *from = NULL;
314 const char *reason = "unknown";
315 const char *election_owner = NULL;
316 crm_node_t *our_node = NULL, *your_node = NULL;
317
318 static int election_wins = 0;
319
320 xmlNode *novote = NULL;
321 time_t tm_now = time(NULL);
322 static time_t expires = 0;
323 static time_t last_election_loss = 0;
324
325
326
327
328
329 CRM_CHECK(vote != NULL, return election_error);
330
331 if(e == NULL) {
332 crm_info("Not voting in election: not initialized");
333 return election_lost;
334
335 } else if(crm_peer_cache == NULL) {
336 crm_info("Not voting in election: no peer cache");
337 return election_lost;
338 }
339
340 op = crm_element_value(vote, F_CRM_TASK);
341 from = crm_element_value(vote, F_CRM_HOST_FROM);
342 election_owner = crm_element_value(vote, F_CRM_ELECTION_OWNER);
343 crm_element_value_int(vote, F_CRM_ELECTION_ID, &election_id);
344
345 your_node = crm_get_peer(0, from);
346 our_node = crm_get_peer(0, e->uname);
347
348 if (e->voted == NULL) {
349 crm_debug("Created voted hash");
350 e->voted = crm_str_table_new();
351 }
352
353 if (is_heartbeat_cluster()) {
354 use_born_on = TRUE;
355 } else if (is_classic_ais_cluster()) {
356 use_born_on = TRUE;
357 }
358
359 if(can_win == FALSE) {
360 reason = "Not eligible";
361 we_lose = TRUE;
362
363 } else if (our_node == NULL || crm_is_peer_active(our_node) == FALSE) {
364 reason = "We are not part of the cluster";
365 log_level = LOG_ERR;
366 we_lose = TRUE;
367
368 } else if (election_id != e->count && crm_str_eq(our_node->uuid, election_owner, TRUE)) {
369 log_level = LOG_TRACE;
370 reason = "Superseded";
371 done = TRUE;
372
373 } else if (your_node == NULL || crm_is_peer_active(your_node) == FALSE) {
374
375 reason = "Peer is not part of our cluster";
376 log_level = LOG_WARNING;
377 done = TRUE;
378
379 } else if (crm_str_eq(op, CRM_OP_NOVOTE, TRUE)) {
380 char *op_copy = strdup(op);
381 char *uname_copy = strdup(from);
382
383 CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));
384
385
386 g_hash_table_replace(e->voted, uname_copy, op_copy);
387 reason = "Recorded";
388 done = TRUE;
389
390 } else {
391 struct timeval your_age;
392 const char *your_version = crm_element_value(vote, F_CRM_VERSION);
393 int tv_sec = 0;
394 int tv_usec = 0;
395
396 crm_element_value_int(vote, F_CRM_ELECTION_AGE_S, &tv_sec);
397 crm_element_value_int(vote, F_CRM_ELECTION_AGE_US, &tv_usec);
398
399 your_age.tv_sec = tv_sec;
400 your_age.tv_usec = tv_usec;
401
402 age = crm_compare_age(your_age);
403 if (crm_str_eq(from, e->uname, TRUE)) {
404 char *op_copy = strdup(op);
405 char *uname_copy = strdup(from);
406
407 CRM_ASSERT(crm_str_eq(our_node->uuid, election_owner, TRUE));
408
409
410 g_hash_table_replace(e->voted, uname_copy, op_copy);
411 reason = "Recorded";
412 done = TRUE;
413
414 } else if (compare_version(your_version, CRM_FEATURE_SET) < 0) {
415 reason = "Version";
416 we_lose = TRUE;
417
418 } else if (compare_version(your_version, CRM_FEATURE_SET) > 0) {
419 reason = "Version";
420
421 } else if (age < 0) {
422 reason = "Uptime";
423 we_lose = TRUE;
424
425 } else if (age > 0) {
426 reason = "Uptime";
427
428
429 } else if (use_born_on && your_node->born < our_node->born) {
430 reason = "Born";
431 we_lose = TRUE;
432
433 } else if (use_born_on && your_node->born > our_node->born) {
434 reason = "Born";
435
436 } else if (e->uname == NULL) {
437 reason = "Unknown host name";
438 we_lose = TRUE;
439
440 } else if (strcasecmp(e->uname, from) > 0) {
441 reason = "Host name";
442 we_lose = TRUE;
443
444 } else {
445 reason = "Host name";
446 CRM_ASSERT(strcasecmp(e->uname, from) < 0);
447
448
449
450
451 }
452 }
453
454 if (expires < tm_now) {
455 election_wins = 0;
456 expires = tm_now + STORM_INTERVAL;
457
458 } else if (done == FALSE && we_lose == FALSE) {
459 int peers = 1 + g_hash_table_size(crm_peer_cache);
460
461
462
463
464 election_wins++;
465 if (election_wins > (peers * peers)) {
466 crm_warn("Election storm detected: %d elections in %d seconds", election_wins,
467 STORM_INTERVAL);
468 election_wins = 0;
469 expires = tm_now + STORM_INTERVAL;
470 crm_write_blackbox(0, NULL);
471 }
472 }
473
474 if (done) {
475 do_crm_log(log_level + 1, "Election %d (current: %d, owner: %s): Processed %s from %s (%s)",
476 election_id, e->count, election_owner, op, from, reason);
477 return e->state;
478
479 } else if (we_lose == FALSE) {
480 do_crm_log(log_level, "Election %d (owner: %s) pass: %s from %s (%s)",
481 election_id, election_owner, op, from, reason);
482
483 if (last_election_loss == 0
484 || tm_now - last_election_loss > (time_t) loss_dampen) {
485
486 last_election_loss = 0;
487 election_timeout_stop(e);
488
489
490 e->state = election_start;
491 return e->state;
492 }
493
494 crm_info("Election %d ignore: We already lost an election less than %ds ago (%s)",
495 election_id, loss_dampen, ctime(&last_election_loss));
496 }
497
498 novote = create_request(CRM_OP_NOVOTE, NULL, from,
499 CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
500
501 do_crm_log(log_level, "Election %d (owner: %s) lost: %s from %s (%s)",
502 election_id, election_owner, op, from, reason);
503
504 election_timeout_stop(e);
505
506 crm_xml_add(novote, F_CRM_ELECTION_OWNER, election_owner);
507 crm_xml_add_int(novote, F_CRM_ELECTION_ID, election_id);
508
509 send_cluster_message(your_node, crm_msg_crmd, novote, TRUE);
510 free_xml(novote);
511
512 last_election_loss = tm_now;
513 e->state = election_lost;
514 return e->state;
515 }