13#include <sys/resource.h>
23#define STORM_INTERVAL 2
25struct pcmk__election {
34 time_t last_election_loss;
49election_timer_cb(gpointer user_data)
53 crm_info(
"Declaring local node as winner after election timed out");
54 election_complete(cluster);
69 if ((cluster == NULL) || (cluster->
priv->
election == NULL)) {
79#define ELECTION_TIMEOUT_MS 120000
120 if ((cluster != NULL) && (cluster->
priv->
election != NULL)
136 if ((cluster != NULL) && (cluster->
priv->
election != NULL)) {
158 if ((cluster != NULL) && (cluster->
priv->
election != NULL)) {
182 if ((cluster != NULL) && (cluster->
priv->
election != NULL)) {
202get_uptime(
struct timeval *output)
204 static time_t expires = 0;
205 static struct rusage info;
207 time_t tm_now = time(NULL);
209 if (expires < tm_now) {
212 info.ru_utime.tv_sec = 0;
213 info.ru_utime.tv_usec = 0;
214 rc = getrusage(RUSAGE_SELF, &info);
220 crm_perror(LOG_ERR,
"Could not calculate the current uptime");
225 crm_debug(
"Current CPU usage is: %lds, %ldus", (
long)info.ru_utime.tv_sec,
226 (
long)info.ru_utime.tv_usec);
230 output->tv_sec = info.ru_utime.tv_sec;
231 output->tv_usec = info.ru_utime.tv_usec;
237compare_age(
struct timeval your_age)
239 struct timeval our_age;
241 get_uptime(&our_age);
243 if (our_age.tv_sec > your_age.tv_sec) {
244 crm_debug(
"Win: %ld vs %ld (seconds)", (
long)our_age.tv_sec, (
long)your_age.tv_sec);
246 }
else if (our_age.tv_sec < your_age.tv_sec) {
247 crm_debug(
"Lose: %ld vs %ld (seconds)", (
long)our_age.tv_sec, (
long)your_age.tv_sec);
249 }
else if (our_age.tv_usec > your_age.tv_usec) {
250 crm_debug(
"Win: %ld.%06ld vs %ld.%06ld (usec)",
251 (
long)our_age.tv_sec, (
long)our_age.tv_usec, (
long)your_age.tv_sec, (
long)your_age.tv_usec);
253 }
else if (our_age.tv_usec < your_age.tv_usec) {
254 crm_debug(
"Lose: %ld.%06ld vs %ld.%06ld (usec)",
255 (
long)our_age.tv_sec, (
long)our_age.tv_usec, (
long)your_age.tv_sec, (
long)your_age.tv_usec);
281 xmlNode *vote = NULL;
283 const char *message_type = NULL;
288 crm_err(
"Cannot start an election: Local node name unknown");
295 crm_trace(
"Cannot vote yet: local node not connected to cluster");
323 election_timeout_start(cluster);
353 crm_trace(
"Election check requested, but no votes received yet");
357 voted_size = g_hash_table_size(cluster->
priv->
election->voted);
364 if (voted_size >= num_members) {
367 if (voted_size > num_members) {
368 GHashTableIter gIter;
372 crm_warn(
"Received too many votes in election");
374 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
380 g_hash_table_iter_init(&gIter, cluster->
priv->
election->voted);
381 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
387 crm_info(
"Election won by local node");
388 election_complete(cluster);
392 crm_debug(
"Election still waiting on %d of %d vote%s",
393 num_members - voted_size, num_members,
406 const char *election_owner;
423parse_election_message(
const xmlNode *message,
struct vote *vote)
425 CRM_CHECK(message && vote,
return FALSE);
427 vote->election_id = -1;
428 vote->age.tv_sec = -1;
429 vote->age.tv_usec = -1;
438 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
439 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
441 crm_warn(
"Invalid %s message from %s",
442 pcmk__s(vote->op,
"election"),
443 pcmk__s(vote->from,
"unspecified node"));
456 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
457 crm_warn(
"Cannot count election %s from %s "
458 "because it is missing uptime", vote->op, vote->from);
463 crm_info(
"Cannot process election message from %s "
464 "because %s is not a known election op", vote->from, vote->op);
472 crm_info(
"Cannot count election %s from %s "
473 "because no peer information available", vote->op, vote->from);
482 pcmk__assert((vote->from != NULL) && (vote->op != NULL));
494 const char *message_type = NULL;
495 xmlNode *novote = NULL;
527 int log_level = LOG_INFO;
528 gboolean done = FALSE;
529 gboolean we_lose = FALSE;
530 const char *reason = NULL;
531 bool we_are_owner = FALSE;
534 time_t tm_now = time(NULL);
541 if (!parse_election_message(message, &vote)) {
549 we_are_owner = (our_node != NULL)
554 reason =
"Not eligible";
558 reason =
"We are not part of the cluster";
562 }
else if (we_are_owner
563 && (vote.election_id != cluster->
priv->
election->count)) {
565 reason =
"Superseded";
570 reason =
"Peer is not part of our cluster";
571 log_level = LOG_WARNING;
581 crm_warn(
"Cannot count election round %d %s from %s "
582 "because we did not start election (node ID %s did)",
583 vote.election_id, vote.op, vote.from,
584 vote.election_owner);
589 crm_debug(
"Not counting election round %d %s from %s "
590 "because no election in progress",
591 vote.election_id, vote.op, vote.from);
594 record_vote(cluster, &vote);
600 int age_result = compare_age(vote.age);
603 if (version_result < 0) {
607 }
else if (version_result > 0) {
610 }
else if (age_result < 0) {
614 }
else if (age_result > 0) {
617 }
else if (strcasecmp(cluster->
priv->
node_name, vote.from) > 0) {
618 reason =
"Host name";
622 reason =
"Host name";
630 }
else if (done == FALSE && we_lose == FALSE) {
637 if (cluster->
priv->
election->election_wins > (peers * peers)) {
638 crm_warn(
"Election storm detected: %d wins in %d seconds",
661 "Processed election round %u %s (current round %d) "
663 vote.election_id, vote.op, cluster->
priv->
election->count,
667 }
else if (we_lose == FALSE) {
682 || ((tm_now - cluster->
priv->
election->last_election_loss)
686 "Election round %d (started by node ID %s) pass: "
688 vote.election_id, vote.election_owner, vote.op,
698 char *loss_time = NULL;
700 loss_time = ctime(&(cluster->
priv->
election->last_election_loss));
706 crm_info(
"Ignoring election round %d (started by node ID %s) pass "
707 "vs %s because we lost less than %ds ago at %s",
708 vote.election_id, vote.election_owner, vote.from,
716 "Election round %d (started by node ID %s) lost: "
718 vote.election_id, vote.election_owner, vote.op,
722 send_no_vote(cluster, your_node, &vote);
736 if ((cluster != NULL) && (cluster->
priv->
election != NULL)) {
const char * pcmk__cluster_get_xml_id(pcmk__node_status_t *node)
GHashTable * pcmk__peer_cache
@ pcmk__node_search_cluster_member
Search for cluster nodes from membership cache.
struct pcmk__election pcmk__election_t
bool pcmk__cluster_send_message(const pcmk__node_status_t *node, enum pcmk_ipc_server service, const xmlNode *data)
pcmk__node_status_t * pcmk__get_node(unsigned int id, const char *uname, const char *xml_id, uint32_t flags)
bool pcmk__cluster_is_node_active(const pcmk__node_status_t *node)
unsigned int pcmk__cluster_num_active_nodes(void)
#define pcmk__assert_alloc(nmemb, size)
int compare_version(const char *version1, const char *version2)
void election_vote(pcmk_cluster_t *cluster)
void election_init(pcmk_cluster_t *cluster, void(*cb)(pcmk_cluster_t *))
#define ELECTION_TIMEOUT_MS
void election_fini(pcmk_cluster_t *cluster)
bool election_check(pcmk_cluster_t *cluster)
void election_reset(pcmk_cluster_t *cluster)
enum election_result election_state(const pcmk_cluster_t *cluster)
void election_clear_dampening(pcmk_cluster_t *cluster)
enum election_result election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message, bool can_win)
void election_timeout_set_period(pcmk_cluster_t *cluster, guint period)
void election_timeout_stop(pcmk_cluster_t *cluster)
void election_remove(pcmk_cluster_t *cluster, const char *uname)
Functions for conducting elections.
#define crm_info(fmt, args...)
void crm_write_blackbox(int nsig, const struct qb_log_callsite *callsite)
#define do_crm_log(level, fmt, args...)
Log a message.
#define crm_warn(fmt, args...)
#define crm_perror(level, fmt, args...)
Send a system error message to both the log and stderr.
#define CRM_CHECK(expr, failure_action)
#define crm_debug(fmt, args...)
#define crm_err(fmt, args...)
#define crm_log_xml_trace(xml, text)
#define crm_trace(fmt, args...)
Wrappers for and extensions to glib mainloop.
guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms)
struct mainloop_timer_s mainloop_timer_t
mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata)
void mainloop_timer_del(mainloop_timer_t *t)
void mainloop_timer_start(mainloop_timer_t *t)
void mainloop_timer_stop(mainloop_timer_t *t)
#define pcmk__new_request(server, sender_system, recipient_node, recipient_system, task, data)
#define pcmk__assert(expr)
const char * pcmk__server_message_type(enum pcmk_ipc_server server)
void pcmk__insert_dup(GHashTable *table, const char *name, const char *value)
GHashTable * pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func)
#define pcmk__plural_s(i)
pcmk__election_t * election
Election state (if election is needed)
char * node_name
Local node name at cluster layer.
enum pcmk_ipc_server server
Server this connection is for (if any)
pcmk__cluster_private_t * priv
Node status data (may be a cluster node or a Pacemaker Remote node)
char * name
Node name as known to cluster layer, or Pacemaker Remote node name.
Wrappers for and extensions to libxml2.
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
const char * crm_xml_add_timeval(xmlNode *xml, const char *name_sec, const char *name_usec, const struct timeval *value)
Create XML attributes for seconds and microseconds.
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
const char * crm_xml_add_int(xmlNode *node, const char *name, int value)
Create an XML attribute with specified name and integer value.
int crm_element_value_timeval(const xmlNode *data, const char *name_sec, const char *name_usec, struct timeval *dest)
Retrieve the value of XML second/microsecond attributes as time.
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
void pcmk__xml_free(xmlNode *xml)
#define PCMK__XA_ELECTION_ID
#define PCMK__XA_ELECTION_OWNER
#define PCMK__XA_CRM_TASK
#define PCMK__XA_ELECTION_AGE_NANO_SEC
#define PCMK__XA_ELECTION_AGE_SEC