1 /*
2 * Copyright 2009-2024 the Pacemaker project contributors
3 *
4 * The version control history for this file may have further details.
5 *
6 * This source code is licensed under the GNU Lesser General Public License
7 * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8 */
9
10 #ifndef PCMK__CRM_CLUSTER_ELECTION_INTERNAL__H
11 #define PCMK__CRM_CLUSTER_ELECTION_INTERNAL__H
12
13 #include <stdbool.h> // bool
14
15 #include <glib.h> // guint, GSourceFunc
16 #include <libxml/tree.h> // xmlNode
17
18 #include <crm/common/ipc.h> // enum pcmk_ipc_server
19 #include <crm/cluster.h> // pcmk_cluster_t
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 /**
26 * \file
27 * \brief Functions for conducting elections
28 *
29 * An election is useful for a daemon that runs on all nodes but needs any one
30 * instance to perform a special role.
31 *
32 * Elections are closely tied to the cluster peer cache. Peers in the cache that
33 * are active members are eligible to vote. Elections are named for logging
34 * purposes, but only one election may exist at any time, so typically an
35 * election would be created at daemon start-up and freed at shutdown.
36 *
37 * Pacemaker's election procedure has been heavily adapted from the
38 * Invitation Algorithm variant of the Garcia-Molina Bully Algorithm:
39 *
40 * https://en.wikipedia.org/wiki/Bully_algorithm
41 *
42 * Elections are conducted via cluster messages. There are two types of
43 * messages: a "vote" is a declaration of the voting node's candidacy, and is
44 * always broadcast; a "no-vote" is a concession by the responding node, and is
45 * always a reply to the preferred node's vote. (These correspond to "invite"
46 * and "accept" in the traditional algorithm.)
47 *
48 * A vote together with any no-vote replies to it is considered an election
49 * round. Rounds are numbered with a simple counter unique to each node
50 * (this would be the group number in the traditional algorithm). Concurrent
51 * election rounds are possible.
52 *
53 * An election round is started when any node broadcasts a vote. When a node
54 * receives another node's vote, it compares itself against the sending node
55 * according to certain metrics, and either starts a new round (if it prefers
56 * itself) or replies to the other node with a no-vote (if it prefers that
57 * node).
58 *
59 * If a node receives no-votes from all other active nodes, it declares itself
60 * the winner. The library API does not notify other nodes of this; callers
61 * must implement that if desired.
62 */
63
64 // Possible election results
65 enum election_result {
66 election_start = 0, // New election needed
67 election_in_progress, // Election started but not all peers have voted
68 election_lost, // Local node lost most recent election
69 election_won, // Local node won most recent election
70 election_error, // Election message or object invalid
71 };
72
73 void election_reset(pcmk_cluster_t *cluster);
74 void election_init(pcmk_cluster_t *cluster, void (*cb)(pcmk_cluster_t *));
75
76 void election_timeout_set_period(pcmk_cluster_t *cluster, guint period_ms);
77 void election_timeout_stop(pcmk_cluster_t *cluster);
78
79 void election_vote(pcmk_cluster_t *cluster);
80 bool election_check(pcmk_cluster_t *cluster);
81 void election_remove(pcmk_cluster_t *cluster, const char *uname);
82 enum election_result election_state(const pcmk_cluster_t *cluster);
83 enum election_result election_count_vote(pcmk_cluster_t *cluster,
84 const xmlNode *message, bool can_win);
85 void election_clear_dampening(pcmk_cluster_t *cluster);
86
87 #ifdef __cplusplus
88 }
89 #endif
90
91 #endif // PCMK__CRM_CLUSTER_ELECTION_INTERNAL__H