This source file includes following definitions.
- reap_dead_nodes
- post_cache_update
- crmd_node_update_complete
- create_node_state_update
- remove_conflicting_node_callback
- search_conflicting_node_callback
- node_list_update_callback
- populate_cib_nodes
- cib_quorum_update_complete
- crm_update_quorum
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 #include <crm_internal.h>
21
22 #include <string.h>
23
24 #include <crm/crm.h>
25
26 #include <crm/msg_xml.h>
27 #include <crm/common/xml.h>
28 #include <crm/cluster/internal.h>
29 #include <crmd_messages.h>
30 #include <crmd_fsa.h>
31 #include <crmd_lrm.h>
32 #include <fsa_proto.h>
33 #include <crmd_callbacks.h>
34 #include <tengine.h>
35 #include <membership.h>
36 #include <crmd.h>
37
38 gboolean membership_flux_hack = FALSE;
39 void post_cache_update(int instance);
40
41 int last_peer_update = 0;
42 guint highest_born_on = -1;
43
44 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
45
46 static void
47 reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
48 {
49 crm_node_t *node = value;
50
51 if (crm_is_peer_active(node) == FALSE) {
52 crm_update_peer_join(__FUNCTION__, node, crm_join_none);
53
54 if(node && node->uname) {
55 election_remove(fsa_election, node->uname);
56
57 if (safe_str_eq(fsa_our_uname, node->uname)) {
58 crm_err("We're not part of the cluster anymore");
59 register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
60
61 } else if (AM_I_DC == FALSE && safe_str_eq(node->uname, fsa_our_dc)) {
62 crm_warn("Our DC node (%s) left the cluster", node->uname);
63 register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
64 }
65 }
66
67 if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) {
68 check_join_state(fsa_state, __FUNCTION__);
69 }
70 if(node && node->uuid) {
71 fail_incompletable_actions(transition_graph, node->uuid);
72 }
73 }
74 }
75
76 gboolean ever_had_quorum = FALSE;
77
78 void
79 post_cache_update(int instance)
80 {
81 xmlNode *no_op = NULL;
82
83 crm_peer_seq = instance;
84 crm_debug("Updated cache after membership event %d.", instance);
85
86 g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL);
87 set_bit(fsa_input_register, R_MEMBERSHIP);
88
89 if (AM_I_DC) {
90 populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
91 node_update_expected, __FUNCTION__);
92 }
93
94
95
96
97
98 register_fsa_action(A_ELECTION_CHECK);
99
100
101
102
103 no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
104 AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
105 send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE);
106 free_xml(no_op);
107 }
108
109 static void
110 crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
111 {
112 fsa_data_t *msg_data = NULL;
113
114 last_peer_update = 0;
115
116 if (rc == pcmk_ok) {
117 crm_trace("Node update %d complete", call_id);
118
119 } else if(call_id < pcmk_ok) {
120 crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
121 crm_log_xml_debug(msg, "failed");
122 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
123
124 } else {
125 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
126 crm_log_xml_debug(msg, "failed");
127 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
128 }
129 }
130
131
132
133
134
135
136
137
138
139
140
141
142 xmlNode *
143 create_node_state_update(crm_node_t *node, int flags, xmlNode *parent,
144 const char *source)
145 {
146 const char *value = NULL;
147 xmlNode *node_state;
148
149 if (!node->state) {
150 crm_info("Node update for %s cancelled: no state, not seen yet", node->uname);
151 return NULL;
152 }
153
154 node_state = create_xml_node(parent, XML_CIB_TAG_STATE);
155
156 if (is_set(node->flags, crm_remote_node)) {
157 crm_xml_add(node_state, XML_NODE_IS_REMOTE, XML_BOOLEAN_TRUE);
158 }
159
160 set_uuid(node_state, XML_ATTR_UUID, node);
161
162 if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) {
163 crm_info("Node update for %s cancelled: no id", node->uname);
164 free_xml(node_state);
165 return NULL;
166 }
167
168 crm_xml_add(node_state, XML_ATTR_UNAME, node->uname);
169
170 if ((flags & node_update_cluster) && node->state) {
171 crm_xml_add_boolean(node_state, XML_NODE_IN_CLUSTER,
172 safe_str_eq(node->state, CRM_NODE_MEMBER));
173 }
174
175 if (!is_set(node->flags, crm_remote_node)) {
176 if (flags & node_update_peer) {
177 value = OFFLINESTATUS;
178 if (node->processes & proc_flags) {
179 value = ONLINESTATUS;
180 }
181 crm_xml_add(node_state, XML_NODE_IS_PEER, value);
182 }
183
184 if (flags & node_update_join) {
185 if (node->join <= crm_join_none) {
186 value = CRMD_JOINSTATE_DOWN;
187 } else {
188 value = CRMD_JOINSTATE_MEMBER;
189 }
190 crm_xml_add(node_state, XML_NODE_JOIN_STATE, value);
191 }
192
193 if (flags & node_update_expected) {
194 crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected);
195 }
196 }
197
198 crm_xml_add(node_state, XML_ATTR_ORIGIN, source);
199
200 return node_state;
201 }
202
203 static void
204 remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
205 xmlNode * output, void *user_data)
206 {
207 char *node_uuid = user_data;
208
209 do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
210 "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
211 node_uuid, pcmk_strerror(rc), rc);
212 }
213
214 static void
215 search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
216 xmlNode * output, void *user_data)
217 {
218 char *new_node_uuid = user_data;
219 xmlNode *node_xml = NULL;
220
221 if (rc != pcmk_ok) {
222 if (rc != -ENXIO) {
223 crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
224 new_node_uuid, pcmk_strerror(rc), rc);
225 }
226 return;
227
228 } else if (output == NULL) {
229 return;
230 }
231
232 if (safe_str_eq(crm_element_name(output), XML_CIB_TAG_NODE)) {
233 node_xml = output;
234
235 } else {
236 node_xml = __xml_first_child(output);
237 }
238
239 for (; node_xml != NULL; node_xml = __xml_next(node_xml)) {
240 const char *node_uuid = NULL;
241 const char *node_uname = NULL;
242 GHashTableIter iter;
243 crm_node_t *node = NULL;
244 gboolean known = FALSE;
245
246 if (safe_str_neq(crm_element_name(node_xml), XML_CIB_TAG_NODE)) {
247 continue;
248 }
249
250 node_uuid = crm_element_value(node_xml, XML_ATTR_ID);
251 node_uname = crm_element_value(node_xml, XML_ATTR_UNAME);
252
253 if (node_uuid == NULL || node_uname == NULL) {
254 continue;
255 }
256
257 g_hash_table_iter_init(&iter, crm_peer_cache);
258 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
259 if (node->uuid
260 && safe_str_eq(node->uuid, node_uuid)
261 && node->uname
262 && safe_str_eq(node->uname, node_uname)) {
263
264 known = TRUE;
265 break;
266 }
267 }
268
269 if (known == FALSE) {
270 int delete_call_id = 0;
271 xmlNode *node_state_xml = NULL;
272
273 crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
274 node_uuid, node_uname, new_node_uuid);
275
276 delete_call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml,
277 cib_scope_local | cib_quorum_override);
278 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
279 remove_conflicting_node_callback);
280
281 node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE);
282 crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid);
283 crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname);
284
285 delete_call_id = fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml,
286 cib_scope_local | cib_quorum_override);
287 fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid),
288 remove_conflicting_node_callback);
289 free_xml(node_state_xml);
290 }
291 }
292 }
293
294 static void
295 node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
296 {
297 fsa_data_t *msg_data = NULL;
298
299 if(call_id < pcmk_ok) {
300 crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
301 crm_log_xml_debug(msg, "update:failed");
302 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
303
304 } else if(rc < pcmk_ok) {
305 crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
306 crm_log_xml_debug(msg, "update:failed");
307 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
308 }
309 }
310
311 #define NODE_PATH_MAX 512
312
313 void
314 populate_cib_nodes(enum node_update_flags flags, const char *source)
315 {
316 int call_id = 0;
317 gboolean from_hashtable = TRUE;
318 int call_options = cib_scope_local | cib_quorum_override;
319 xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES);
320
321 #if SUPPORT_HEARTBEAT
322 if (is_not_set(flags, node_update_quick) && is_heartbeat_cluster()) {
323 from_hashtable = heartbeat_initialize_nodelist(fsa_cluster_conn, FALSE, node_list);
324 }
325 #endif
326
327 #if SUPPORT_COROSYNC
328 # if !SUPPORT_PLUGIN
329 if (is_not_set(flags, node_update_quick) && is_corosync_cluster()) {
330 from_hashtable = corosync_initialize_nodelist(NULL, FALSE, node_list);
331 }
332 # endif
333 #endif
334
335 if (from_hashtable) {
336 GHashTableIter iter;
337 crm_node_t *node = NULL;
338
339 g_hash_table_iter_init(&iter, crm_peer_cache);
340 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
341 xmlNode *new_node = NULL;
342
343 crm_trace("Creating node entry for %s/%s", node->uname, node->uuid);
344 if(node->uuid && node->uname) {
345 char xpath[NODE_PATH_MAX];
346
347
348 new_node = create_xml_node(node_list, XML_CIB_TAG_NODE);
349 crm_xml_add(new_node, XML_ATTR_ID, node->uuid);
350 crm_xml_add(new_node, XML_ATTR_UNAME, node->uname);
351
352
353 snprintf(xpath, NODE_PATH_MAX,
354 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES
355 "/" XML_CIB_TAG_NODE "[@uname='%s'][@id!='%s']",
356 node->uname, node->uuid);
357
358 call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, xpath, NULL,
359 cib_scope_local | cib_xpath);
360 fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid),
361 search_conflicting_node_callback);
362 }
363 }
364 }
365
366 crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
367
368 fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL);
369 fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback);
370
371 free_xml(node_list);
372
373 if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) {
374
375
376
377
378 GHashTableIter iter;
379 crm_node_t *node = NULL;
380
381 node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS);
382
383 g_hash_table_iter_init(&iter, crm_peer_cache);
384 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
385 create_node_state_update(node, flags, node_list, source);
386 }
387
388 if (crm_remote_peer_cache) {
389 g_hash_table_iter_init(&iter, crm_remote_peer_cache);
390 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
391 create_node_state_update(node, flags, node_list, source);
392 }
393 }
394
395 fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL);
396 fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete);
397 last_peer_update = call_id;
398
399 free_xml(node_list);
400 }
401 }
402
403 static void
404 cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
405 {
406 fsa_data_t *msg_data = NULL;
407
408 if (rc == pcmk_ok) {
409 crm_trace("Quorum update %d complete", call_id);
410
411 } else {
412 crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
413 crm_log_xml_debug(msg, "failed");
414 register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
415 }
416 }
417
418 void
419 crm_update_quorum(gboolean quorum, gboolean force_update)
420 {
421 ever_had_quorum |= quorum;
422
423 if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) {
424 pcmk_panic(__FUNCTION__);
425 }
426
427 if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) {
428 int call_id = 0;
429 xmlNode *update = NULL;
430 int call_options = cib_scope_local | cib_quorum_override;
431
432 update = create_xml_node(NULL, XML_TAG_CIB);
433 crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum);
434 crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid);
435
436 fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL);
437 crm_debug("Updating quorum status to %s (call=%d)", quorum ? "true" : "false", call_id);
438 fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete);
439 free_xml(update);
440
441
442
443
444
445 if (quorum == FALSE) {
446 abort_transition(INFINITY, tg_restart, "Quorum loss", NULL);
447 }
448 }
449 fsa_has_quorum = quorum;
450 }