This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11 #include <crm_internal.h>
12
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/common/xml_internal.h>
19 #include <crm/cluster/internal.h>
20
21 #include <pacemaker-controld.h>
22
23 gboolean membership_flux_hack = FALSE;
24 void post_cache_update(int instance);
25
26 int last_peer_update = 0;
27 guint highest_born_on = -1;
28
29 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
30
31 static void
32 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
33 {
34 crm_node_t *node = value;
35
36 if (crm_is_peer_active(node) == FALSE) {
37 crm_update_peer_join(__func__, node, crm_join_none);
38
39 if(node && node->uname) {
40 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
41 crm_err("We're not part of the cluster anymore");
42 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
43
44 } else if (AM_I_DC == FALSE && pcmk__str_eq(node->uname, fsa_our_dc, pcmk__str_casei)) {
45 crm_warn("Our DC node (%s) left the cluster", node->uname);
46 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
47 }
48 }
49
50 if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
51 check_join_state(fsa_state, __func__);
52 }
53 if(node && node->uuid) {
54 fail_incompletable_actions(transition_graph, node->uuid);
55 }
56 }
57 }
58
59 gboolean ever_had_quorum = FALSE;
60
61 void
62 post_cache_update(int instance)
63 {
64 xmlNode *no_op = NULL;
65
66 crm_peer_seq = instance;
67 crm_debug("Updated cache after membership event %d.", instance);
68
69 g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
70 controld_set_fsa_input_flags(R_MEMBERSHIP);
71
72 if (AM_I_DC) {
73 populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
74 node_update_expected, __func__);
75 }
76
77
78
79
80
81 controld_set_fsa_action_flags(A_ELECTION_CHECK);
82 trigger_fsa();
83
84
85
86
87 no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
88 AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
89 send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
90 free_xml(no_op);
91 }
92
93 static void
94 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
95 {
96 fsa_data_t *msg_data = NULL;
97
98 last_peer_update = 0;
99
100 if (rc == pcmk_ok) {
101 crm_trace("Node update %d complete", call_id);
102
103 } else if(call_id < pcmk_ok) {
104 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
105 crm_log_xml_debug(msg, "failed");
106 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
107
108 } else {
109 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
110 crm_log_xml_debug(msg, "failed");
111 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
112 }
113 }
114
115
116
117
118
119
120
121
122
123
124
125
126 xmlNode *
127 create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
128 const char *source)
129 {
130 const char *value = NULL;
131 xmlNode *node_state;
132
133 if (!node->state) {
134 crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
135 return NULL;
136 }
137
138 node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
139
140 if (pcmk_is_set(node->flags, crm_remote_node)) {
141 crm_xml_add(node_state, XML_NODE_IS_REMOTE, XML_BOOLEAN_TRUE);
142 }
143
144 set_uuid(node_state, XML_ATTR_UUID, node);
145
146 if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
147 crm_info("Node update for %s cancelled: no id", node->uname);
148 free_xml(node_state);
149 return NULL;
150 }
151
152 crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
153
154 if ((flags & node_update_cluster) && node->state) {
155 crm_xml_add_boolean(node_state, XML_NODE_IN_CLUSTER,
156 pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei));
157 }
158
159 if (!pcmk_is_set(node->flags, crm_remote_node)) {
160 if (flags & node_update_peer) {
161 value = OFFLINESTATUS;
162 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
163 value = ONLINESTATUS;
164 }
165 crm_xml_add(node_state, XML_NODE_IS_PEER, value);
166 }
167
168 if (flags & node_update_join) {
169 if (node->join <= crm_join_none) {
170 value = CRMD_JOINSTATE_DOWN;
171 } else {
172 value = CRMD_JOINSTATE_MEMBER;
173 }
174 crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
175 }
176
177 if (flags & node_update_expected) {
178 crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
179 }
180 }
181
182 crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
183
184 return node_state;
185 }
186
187 static void
188 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
189 xmlNode * output, void *user_data)
190 {
191 char *node_uuid = user_data;
192
193 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
194 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
195 node_uuid, pcmk_strerror(rc), rc);
196 }
197
198 static void
199 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
200 xmlNode * output, void *user_data)
201 {
202 char *new_node_uuid = user_data;
203 xmlNode *node_xml = NULL;
204
205 if (rc != pcmk_ok) {
206 if (rc != -ENXIO) {
207 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
208 new_node_uuid, pcmk_strerror(rc), rc);
209 }
210 return;
211
212 } else if (output == NULL) {
213 return;
214 }
215
216 if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) {
217 node_xml = output;
218
219 } else {
220 node_xml = pcmk__xml_first_child(output);
221 }
222
223 for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) {
224 const char *node_uuid = NULL;
225 const char *node_uname = NULL;
226 GHashTableIter iter;
227 crm_node_t *node = NULL;
228 gboolean known = FALSE;
229
230 if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) {
231 continue;
232 }
233
234 node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
235 node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
236
237 if (node_uuid == NULL || node_uname == NULL) {
238 continue;
239 }
240
241 g_hash_table_iter_init(&iter, crm_peer_cache);
242 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
243 if (node->uuid
244 && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
245 && node->uname
246 && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
247
248 known = TRUE;
249 break;
250 }
251 }
252
253 if (known == FALSE) {
254 int delete_call_id = 0;
255 xmlNode *node_state_xml = NULL;
256
257 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
258 node_uuid, node_uname, new_node_uuid);
259
260 delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
261 cib_scope_local | cib_quorum_override);
262 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
263 remove_conflicting_node_callback);
264
265 node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
266 crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
267 crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
268
269 delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
270 cib_scope_local | cib_quorum_override);
271 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
272 remove_conflicting_node_callback);
273 free_xml(node_state_xml);
274 }
275 }
276 }
277
278 static void
279 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
280 {
281 fsa_data_t *msg_data = NULL;
282
283 if(call_id < pcmk_ok) {
284 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
285 crm_log_xml_debug(msg, "update:failed");
286 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
287
288 } else if(rc < pcmk_ok) {
289 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
290 crm_log_xml_debug(msg, "update:failed");
291 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
292 }
293 }
294
295 #define NODE_PATH_MAX 512
296
297 void
298 populate_cib_nodes(enum node_update_flags flags, const char *source)
299 {
300 int call_id = 0;
301 gboolean from_hashtable = TRUE;
302 int call_options = cib_scope_local | cib_quorum_override;
303 xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
304
305 #if SUPPORT_COROSYNC
306 if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) {
307 from_hashtable = pcmk__corosync_add_nodes(node_list);
308 }
309 #endif
310
311 if (from_hashtable) {
312 GHashTableIter iter;
313 crm_node_t *node = NULL;
314
315 g_hash_table_iter_init(&iter, crm_peer_cache);
316 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
317 xmlNode *new_node = NULL;
318
319 crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
320 if(node->uuid && node->uname) {
321 char xpath[NODE_PATH_MAX];
322
323
324 new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
325 crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
326 crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
327
328
329 snprintf(xpath, NODE_PATH_MAX,
330 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES
331 "/" XML_CIB_TAG_NODE "[@uname='%s'][@id!='%s']",
332 node->uname, node->uuid);
333
334 call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, xpath, NULL,
335 cib_scope_local | cib_xpath);
336 fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
337 search_conflicting_node_callback);
338 }
339 }
340 }
341
342 crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
343
344 fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
345 fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
346
347 free_xml(node_list);
348
349 if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
350
351
352
353
354 GHashTableIter iter;
355 crm_node_t *node = NULL;
356
357 node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
358
359 g_hash_table_iter_init(&iter, crm_peer_cache);
360 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
361 create_node_state_update(node, flags, node_list, source);
362 }
363
364 if (crm_remote_peer_cache) {
365 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
366 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
367 create_node_state_update(node, flags, node_list, source);
368 }
369 }
370
371 fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
372 fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
373 last_peer_update = call_id;
374
375 free_xml(node_list);
376 }
377 }
378
379 static void
380 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
381 {
382 fsa_data_t *msg_data = NULL;
383
384 if (rc == pcmk_ok) {
385 crm_trace("Quorum update %d complete", call_id);
386
387 } else {
388 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
389 crm_log_xml_debug(msg, "failed");
390 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
391 }
392 }
393
394 void
395 crm_update_quorum(gboolean quorum, gboolean force_update)
396 {
397 ever_had_quorum |= quorum;
398
399 if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
400 pcmk__panic(__func__);
401 }
402
403 if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
404 int call_id = 0;
405 xmlNode *update = NULL;
406 int call_options = cib_scope_local | cib_quorum_override;
407
408 update = create_xml_node(NULL, XML_TAG_CIB);
409 crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
410 crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
411
412 fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
413 crm_debug("Updating quorum status to %s (call=%d)",
414 pcmk__btoa(quorum), call_id);
415 fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
416 free_xml(update);
417
418
419
420
421
422
423
424
425
426
427
428 if (quorum) {
429
430
431
432
433 abort_after_delay(INFINITY, tg_restart, "Quorum gained", 5000);
434 } else {
435 abort_transition(INFINITY, tg_restart, "Quorum lost", NULL);
436 }
437 }
438 fsa_has_quorum = quorum;
439 }