This source file includes following definitions.
- pe_subsystem_free
- save_cib_contents
- pe_ipc_destroy
- pe_ipc_dispatch
- pe_subsystem_new
- pe_subsystem_send
- do_pe_control
- controld_sched_timeout
- controld_stop_sched_timer
- controld_expect_sched_reply
- controld_free_sched_timer
- do_pe_invoke
- force_local_option
- do_pe_invoke_callback
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <unistd.h>
13
14 #include <crm/cib.h>
15 #include <crm/cluster.h>
16 #include <crm/common/xml.h>
17 #include <crm/crm.h>
18 #include <crm/msg_xml.h>
19 #include <crm/common/xml_internal.h>
20
21 #include <pacemaker-controld.h>
22
23 static mainloop_io_t *pe_subsystem = NULL;
24
25
26
27
28
29 void
30 pe_subsystem_free(void)
31 {
32 controld_clear_fsa_input_flags(R_PE_REQUIRED);
33 if (pe_subsystem) {
34 controld_expect_sched_reply(NULL);
35 mainloop_del_ipc_client(pe_subsystem);
36 pe_subsystem = NULL;
37 controld_clear_fsa_input_flags(R_PE_CONNECTED);
38 }
39 }
40
41
42
43
44
45
46
47
48
49
50
51
52
53 static void
54 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
55 void *user_data)
56 {
57 char *id = user_data;
58
59 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
60 CRM_CHECK(id != NULL, return);
61
62 if (rc == pcmk_ok) {
63 char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
64
65 if (write_xml_file(output, filename, TRUE) < 0) {
66 crm_err("Could not save Cluster Information Base to %s after scheduler crash",
67 filename);
68 } else {
69 crm_notice("Saved Cluster Information Base to %s after scheduler crash",
70 filename);
71 }
72 free(filename);
73 }
74 }
75
76
77
78
79
80
81
82 static void
83 pe_ipc_destroy(gpointer user_data)
84 {
85
86 controld_expect_sched_reply(NULL);
87
88 if (pcmk_is_set(fsa_input_register, R_PE_REQUIRED)) {
89 int rc = pcmk_ok;
90 char *uuid_str = crm_generate_uuid();
91
92 crm_crit("Connection to the scheduler failed "
93 CRM_XS " uuid=%s", uuid_str);
94
95
96
97
98
99
100
101
102
103
104
105 rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
106 fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
107
108 } else {
109 crm_info("Connection to the scheduler released");
110 }
111
112 controld_clear_fsa_input_flags(R_PE_CONNECTED);
113 pe_subsystem = NULL;
114 mainloop_set_trigger(fsa_source);
115 return;
116 }
117
118
119
120
121
122
123
124
125
126
127
128 static int
129 pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata)
130 {
131 xmlNode *msg = string2xml(buffer);
132
133 if (msg) {
134 route_message(C_IPC_MESSAGE, msg);
135 }
136 free_xml(msg);
137 return 0;
138 }
139
140
141
142
143
144
145
146 static bool
147 pe_subsystem_new(void)
148 {
149 struct ipc_client_callbacks pe_callbacks = {
150 .dispatch = pe_ipc_dispatch,
151 .destroy = pe_ipc_destroy
152 };
153 static bool retry_one = TRUE;
154
155 controld_set_fsa_input_flags(R_PE_REQUIRED);
156 retry:
157 pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE,
158 G_PRIORITY_DEFAULT,
159 5 * 1024 * 1024 ,
160 NULL, &pe_callbacks);
161 if (pe_subsystem == NULL) {
162 crm_debug("Could not connect to scheduler : %s(%d)", pcmk_rc_str(errno), errno);
163 if (errno == EAGAIN && retry_one) {
164
165
166 crm_debug("Scheduler connection attempt.");
167 retry_one = FALSE;
168 goto retry;
169 }
170 return FALSE;
171 }
172 controld_set_fsa_input_flags(R_PE_CONNECTED);
173 return TRUE;
174 }
175
176
177
178
179
180
181
182
183
184 static int
185 pe_subsystem_send(xmlNode *cmd)
186 {
187 if (pe_subsystem) {
188 int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd,
189 0, 0, NULL);
190
191 if (sent == 0) {
192 sent = -ENODATA;
193 } else if (sent > 0) {
194 sent = pcmk_ok;
195 }
196 return sent;
197 }
198 return -ENOTCONN;
199 }
200
201 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
202 xmlNode *output, void *user_data);
203
204
205 void
206 do_pe_control(long long action,
207 enum crmd_fsa_cause cause,
208 enum crmd_fsa_state cur_state,
209 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
210 {
211 if (action & A_PE_STOP) {
212 pe_subsystem_free();
213 }
214 if ((action & A_PE_START)
215 && !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
216
217 if (cur_state == S_STOPPING) {
218 crm_info("Ignoring request to connect to scheduler while shutting down");
219
220 } else if (!pe_subsystem_new()) {
221 crm_warn("Could not connect to scheduler");
222 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
223 }
224 }
225 }
226
227 int fsa_pe_query = 0;
228 char *fsa_pe_ref = NULL;
229 static mainloop_timer_t *controld_sched_timer = NULL;
230
231
232 #define SCHED_TIMEOUT_MS (120000)
233
234
235
236
237
238
239
240
241
242 static gboolean
243 controld_sched_timeout(gpointer user_data)
244 {
245 if (AM_I_DC) {
246
247
248
249
250
251
252
253
254
255 crmd_exit(CRM_EX_FATAL);
256 }
257 return FALSE;
258 }
259
260 void
261 controld_stop_sched_timer(void)
262 {
263 if (controld_sched_timer && fsa_pe_ref) {
264 crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref);
265 }
266 mainloop_timer_stop(controld_sched_timer);
267 }
268
269
270
271
272
273
274
275 void
276 controld_expect_sched_reply(xmlNode *msg)
277 {
278 char *ref = NULL;
279
280 if (msg) {
281 ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE);
282 CRM_ASSERT(ref != NULL);
283
284 if (controld_sched_timer == NULL) {
285 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
286 SCHED_TIMEOUT_MS, FALSE,
287 controld_sched_timeout,
288 NULL);
289 }
290 mainloop_timer_start(controld_sched_timer);
291 } else {
292 controld_stop_sched_timer();
293 }
294 free(fsa_pe_ref);
295 fsa_pe_ref = ref;
296 }
297
298
299
300
301
302 void
303 controld_free_sched_timer(void)
304 {
305 if (controld_sched_timer != NULL) {
306 mainloop_timer_del(controld_sched_timer);
307 controld_sched_timer = NULL;
308 }
309 }
310
311
312 void
313 do_pe_invoke(long long action,
314 enum crmd_fsa_cause cause,
315 enum crmd_fsa_state cur_state,
316 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
317 {
318 if (AM_I_DC == FALSE) {
319 crm_err("Not invoking scheduler because not DC: %s",
320 fsa_action2string(action));
321 return;
322 }
323
324 if (!pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
325 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
326 crm_err("Cannot shut down gracefully without the scheduler");
327 register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
328
329 } else {
330 crm_info("Waiting for the scheduler to connect");
331 crmd_fsa_stall(FALSE);
332 controld_set_fsa_action_flags(A_PE_START);
333 trigger_fsa();
334 }
335 return;
336 }
337
338 if (cur_state != S_POLICY_ENGINE) {
339 crm_notice("Not invoking scheduler because in state %s",
340 fsa_state2string(cur_state));
341 return;
342 }
343 if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
344 crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
345
346
347 register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
348 return;
349 }
350
351 fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
352
353 crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
354 fsa_state2string(fsa_state));
355
356 controld_expect_sched_reply(NULL);
357 fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
358 }
359
360 static void
361 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
362 {
363 int max = 0;
364 int lpc = 0;
365 char *xpath_string = NULL;
366 xmlXPathObjectPtr xpathObj = NULL;
367
368 xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']",
369 get_object_path(XML_CIB_TAG_CRMCONFIG),
370 XML_CIB_TAG_PROPSET, attr_name);
371 xpathObj = xpath_search(xml, xpath_string);
372 max = numXpathResults(xpathObj);
373 free(xpath_string);
374
375 for (lpc = 0; lpc < max; lpc++) {
376 xmlNode *match = getXpathResult(xpathObj, lpc);
377 crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
378 crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
379 }
380
381 if(max == 0) {
382 xmlNode *configuration = NULL;
383 xmlNode *crm_config = NULL;
384 xmlNode *cluster_property_set = NULL;
385
386 crm_trace("Creating %s-%s for %s=%s",
387 CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
388
389 configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
390 NULL);
391 if (configuration == NULL) {
392 configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
393 }
394
395 crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
396 NULL);
397 if (crm_config == NULL) {
398 crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
399 }
400
401 cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
402 NULL, NULL);
403 if (cluster_property_set == NULL) {
404 cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
405 crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
406 }
407
408 xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
409
410 crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
411 crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
412 crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
413 }
414 freeXpathObject(xpathObj);
415 }
416
417 static void
418 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
419 {
420 xmlNode *cmd = NULL;
421 pid_t watchdog = pcmk__locate_sbd();
422
423 if (rc != pcmk_ok) {
424 crm_err("Could not retrieve the Cluster Information Base: %s "
425 CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
426 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
427 return;
428
429 } else if (call_id != fsa_pe_query) {
430 crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
431 return;
432
433 } else if (!AM_I_DC || !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
434 crm_debug("No need to invoke the scheduler anymore");
435 return;
436
437 } else if (fsa_state != S_POLICY_ENGINE) {
438 crm_debug("Discarding scheduler request in state: %s",
439 fsa_state2string(fsa_state));
440 return;
441
442
443 } else if (num_cib_op_callbacks() > 1) {
444 crm_debug("Re-asking for the CIB: %d other peer updates still pending",
445 (num_cib_op_callbacks() - 1));
446 sleep(1);
447 controld_set_fsa_action_flags(A_PE_INVOKE);
448 trigger_fsa();
449 return;
450 }
451
452 CRM_LOG_ASSERT(output != NULL);
453
454
455
456 pcmk__refresh_node_caches_from_cib(output);
457
458 crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
459 crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
460
461 force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
462
463 if (ever_had_quorum && crm_have_quorum == FALSE) {
464 crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
465 }
466
467 cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL);
468
469 rc = pe_subsystem_send(cmd);
470 if (rc < 0) {
471 crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
472 pcmk_strerror(rc), rc);
473 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
474 } else {
475 controld_expect_sched_reply(cmd);
476 crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
477 fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
478 }
479 free_xml(cmd);
480 }