This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11 #include <crm_internal.h>
12
13 #include <string.h>
14
15 #include <crm/crm.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/common/xml_internal.h>
19 #include <crm/cluster/internal.h>
20
21 #include <pacemaker-controld.h>
22
23 gboolean membership_flux_hack = FALSE;
24 void post_cache_update(int instance);
25
26 int last_peer_update = 0;
27 guint highest_born_on = -1;
28
29 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
30
31 static void
32 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
33 {
34 crm_node_t *node = value;
35
36 if (crm_is_peer_active(node) == FALSE) {
37 crm_update_peer_join(__func__, node, crm_join_none);
38
39 if(node && node->uname) {
40 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
41 crm_err("We're not part of the cluster anymore");
42 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
43
44 } else if (AM_I_DC == FALSE && pcmk__str_eq(node->uname, fsa_our_dc, pcmk__str_casei)) {
45 crm_warn("Our DC node (%s) left the cluster", node->uname);
46 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
47 }
48 }
49
50 if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
51 check_join_state(fsa_state, __func__);
52 }
53 if(node && node->uuid) {
54 fail_incompletable_actions(transition_graph, node->uuid);
55 }
56 }
57 }
58
59 gboolean ever_had_quorum = FALSE;
60
61 void
62 post_cache_update(int instance)
63 {
64 xmlNode *no_op = NULL;
65
66 crm_peer_seq = instance;
67 crm_debug("Updated cache after membership event %d.", instance);
68
69 g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
70 controld_set_fsa_input_flags(R_MEMBERSHIP);
71
72 if (AM_I_DC) {
73 populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
74 node_update_expected, __func__);
75 }
76
77
78
79
80
81 controld_set_fsa_action_flags(A_ELECTION_CHECK);
82 trigger_fsa();
83
84
85
86
87 no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
88 AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
89 send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
90 free_xml(no_op);
91 }
92
93 static void
94 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
95 {
96 fsa_data_t *msg_data = NULL;
97
98 last_peer_update = 0;
99
100 if (rc == pcmk_ok) {
101 crm_trace("Node update %d complete", call_id);
102
103 } else if(call_id < pcmk_ok) {
104 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
105 crm_log_xml_debug(msg, "failed");
106 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
107
108 } else {
109 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
110 crm_log_xml_debug(msg, "failed");
111 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
112 }
113 }
114
115
116
117
118
119
120
121
122
123
124
125
126 xmlNode *
127 create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
128 const char *source)
129 {
130 const char *value = NULL;
131 xmlNode *node_state;
132
133 if (!node->state) {
134 crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
135 return NULL;
136 }
137
138 node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
139
140 if (pcmk_is_set(node->flags, crm_remote_node)) {
141 pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true);
142 }
143
144 set_uuid(node_state, XML_ATTR_UUID, node);
145
146 if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
147 crm_info("Node update for %s cancelled: no id", node->uname);
148 free_xml(node_state);
149 return NULL;
150 }
151
152 crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
153
154 if ((flags & node_update_cluster) && node->state) {
155 pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER,
156 pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei));
157 }
158
159 if (!pcmk_is_set(node->flags, crm_remote_node)) {
160 if (flags & node_update_peer) {
161 value = OFFLINESTATUS;
162 if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
163 value = ONLINESTATUS;
164 }
165 crm_xml_add(node_state, XML_NODE_IS_PEER, value);
166 }
167
168 if (flags & node_update_join) {
169 if (node->join <= crm_join_none) {
170 value = CRMD_JOINSTATE_DOWN;
171 } else {
172 value = CRMD_JOINSTATE_MEMBER;
173 }
174 crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
175 }
176
177 if (flags & node_update_expected) {
178 crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
179 }
180 }
181
182 crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
183
184 return node_state;
185 }
186
187 static void
188 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
189 xmlNode * output, void *user_data)
190 {
191 char *node_uuid = user_data;
192
193 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
194 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
195 node_uuid, pcmk_strerror(rc), rc);
196 }
197
198 static void
199 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
200 xmlNode * output, void *user_data)
201 {
202 char *new_node_uuid = user_data;
203 xmlNode *node_xml = NULL;
204
205 if (rc != pcmk_ok) {
206 if (rc != -ENXIO) {
207 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
208 new_node_uuid, pcmk_strerror(rc), rc);
209 }
210 return;
211
212 } else if (output == NULL) {
213 return;
214 }
215
216 if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) {
217 node_xml = output;
218
219 } else {
220 node_xml = pcmk__xml_first_child(output);
221 }
222
223 for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) {
224 const char *node_uuid = NULL;
225 const char *node_uname = NULL;
226 GHashTableIter iter;
227 crm_node_t *node = NULL;
228 gboolean known = FALSE;
229
230 if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) {
231 continue;
232 }
233
234 node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
235 node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
236
237 if (node_uuid == NULL || node_uname == NULL) {
238 continue;
239 }
240
241 g_hash_table_iter_init(&iter, crm_peer_cache);
242 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
243 if (node->uuid
244 && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei)
245 && node->uname
246 && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) {
247
248 known = TRUE;
249 break;
250 }
251 }
252
253 if (known == FALSE) {
254 int delete_call_id = 0;
255 xmlNode *node_state_xml = NULL;
256
257 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
258 node_uuid, node_uname, new_node_uuid);
259
260 delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
261 cib_scope_local | cib_quorum_override);
262 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
263 remove_conflicting_node_callback);
264
265 node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
266 crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
267 crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
268
269 delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
270 cib_scope_local | cib_quorum_override);
271 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
272 remove_conflicting_node_callback);
273 free_xml(node_state_xml);
274 }
275 }
276 }
277
278 static void
279 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
280 {
281 fsa_data_t *msg_data = NULL;
282
283 if(call_id < pcmk_ok) {
284 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
285 crm_log_xml_debug(msg, "update:failed");
286 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
287
288 } else if(rc < pcmk_ok) {
289 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
290 crm_log_xml_debug(msg, "update:failed");
291 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
292 }
293 }
294
295 void
296 populate_cib_nodes(enum node_update_flags flags, const char *source)
297 {
298 int call_id = 0;
299 gboolean from_hashtable = TRUE;
300 int call_options = cib_scope_local | cib_quorum_override;
301 xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
302
303 #if SUPPORT_COROSYNC
304 if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) {
305 from_hashtable = pcmk__corosync_add_nodes(node_list);
306 }
307 #endif
308
309 if (from_hashtable) {
310 GHashTableIter iter;
311 crm_node_t *node = NULL;
312 GString *xpath = NULL;
313
314 g_hash_table_iter_init(&iter, crm_peer_cache);
315 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
316 xmlNode *new_node = NULL;
317
318 if ((node->uuid != NULL) && (node->uname != NULL)) {
319 crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
320 if (xpath == NULL) {
321 xpath = g_string_sized_new(512);
322 } else {
323 g_string_truncate(xpath, 0);
324 }
325
326
327 new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
328 crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
329 crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
330
331
332 pcmk__g_strcat(xpath,
333 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION
334 "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE
335 "[@" XML_ATTR_UNAME "='", node->uname, "']"
336 "[@" XML_ATTR_ID "!='", node->uuid, "']", NULL);
337
338 call_id = fsa_cib_conn->cmds->query(fsa_cib_conn,
339 (const char *) xpath->str,
340 NULL,
341 cib_scope_local | cib_xpath);
342 fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
343 search_conflicting_node_callback);
344 }
345 }
346
347 if (xpath != NULL) {
348 g_string_free(xpath, TRUE);
349 }
350 }
351
352 crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
353
354 fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
355 fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
356
357 free_xml(node_list);
358
359 if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
360
361
362
363
364 GHashTableIter iter;
365 crm_node_t *node = NULL;
366
367 node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
368
369 g_hash_table_iter_init(&iter, crm_peer_cache);
370 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
371 create_node_state_update(node, flags, node_list, source);
372 }
373
374 if (crm_remote_peer_cache) {
375 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
376 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
377 create_node_state_update(node, flags, node_list, source);
378 }
379 }
380
381 fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
382 fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
383 last_peer_update = call_id;
384
385 free_xml(node_list);
386 }
387 }
388
389 static void
390 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
391 {
392 fsa_data_t *msg_data = NULL;
393
394 if (rc == pcmk_ok) {
395 crm_trace("Quorum update %d complete", call_id);
396
397 } else {
398 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
399 crm_log_xml_debug(msg, "failed");
400 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
401 }
402 }
403
404 void
405 crm_update_quorum(gboolean quorum, gboolean force_update)
406 {
407 ever_had_quorum |= quorum;
408
409 if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
410 pcmk__panic(__func__);
411 }
412
413 if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
414 int call_id = 0;
415 xmlNode *update = NULL;
416 int call_options = cib_scope_local | cib_quorum_override;
417
418 update = create_xml_node(NULL, XML_TAG_CIB);
419 crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
420 crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
421
422 fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
423 crm_debug("Updating quorum status to %s (call=%d)",
424 pcmk__btoa(quorum), call_id);
425 fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
426 free_xml(update);
427
428
429
430
431
432
433
434
435
436
437
438 if (quorum) {
439
440
441
442
443 abort_after_delay(INFINITY, pcmk__graph_restart, "Quorum gained",
444 5000);
445 } else {
446 abort_transition(INFINITY, pcmk__graph_restart, "Quorum lost",
447 NULL);
448 }
449 }
450 fsa_has_quorum = quorum;
451 }