This source file includes following definitions.
- controld_shutdown_schedulerd_ipc
- save_cib_contents
- handle_disconnect
- handle_reply
- scheduler_event_callback
- new_schedulerd_ipc_connection
- do_pe_control
- controld_sched_timeout
- controld_stop_sched_timer
- controld_expect_sched_reply
- controld_free_sched_timer
- do_pe_invoke
- force_local_option
- do_pe_invoke_callback
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <unistd.h>
13
14 #include <crm/cib.h>
15 #include <crm/cluster.h>
16 #include <crm/common/xml.h>
17 #include <crm/crm.h>
18 #include <crm/common/xml_internal.h>
19 #include <crm/common/ipc.h>
20 #include <crm/common/ipc_schedulerd.h>
21
22 #include <libxml/xpath.h>
23
24 #include <pacemaker-controld.h>
25
26 static void handle_disconnect(void);
27
28 static pcmk_ipc_api_t *schedulerd_api = NULL;
29
30
31
32
33
34 void
35 controld_shutdown_schedulerd_ipc(void)
36 {
37 controld_clear_fsa_input_flags(R_PE_REQUIRED);
38 pcmk_disconnect_ipc(schedulerd_api);
39 handle_disconnect();
40
41 pcmk_free_ipc_api(schedulerd_api);
42 schedulerd_api = NULL;
43 }
44
45
46
47
48
49
50
51
52
53
54
55
56
57 static void
58 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
59 void *user_data)
60 {
61 const char *id = user_data;
62
63 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
64 CRM_CHECK(id != NULL, return);
65
66 if (rc == pcmk_ok) {
67 char *filename = crm_strdup_printf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id);
68
69 if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
70 crm_err("Could not save Cluster Information Base to %s after scheduler crash",
71 filename);
72 } else {
73 crm_notice("Saved Cluster Information Base to %s after scheduler crash",
74 filename);
75 }
76 free(filename);
77 }
78 }
79
80
81
82
83
84 static void
85 handle_disconnect(void)
86 {
87
88 controld_expect_sched_reply(NULL);
89
90 if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
91 int rc = pcmk_ok;
92 char *uuid_str = crm_generate_uuid();
93
94 crm_crit("Lost connection to the scheduler "
95 QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2",
96 uuid_str);
97
98
99
100
101
102
103
104
105
106
107
108 rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
109 NULL, NULL, cib_none);
110 fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
111 }
112
113 controld_clear_fsa_input_flags(R_PE_CONNECTED);
114 controld_trigger_fsa();
115 return;
116 }
117
118 static void
119 handle_reply(pcmk_schedulerd_api_reply_t *reply)
120 {
121 const char *msg_ref = NULL;
122
123 if (!AM_I_DC) {
124 return;
125 }
126
127 msg_ref = reply->data.graph.reference;
128
129 if (msg_ref == NULL) {
130 crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
131
132 } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
133 pcmk__str_none)) {
134 ha_msg_input_t fsa_input;
135 xmlNode *crm_data_node;
136
137 controld_stop_sched_timer();
138
139
140
141
142
143
144
145
146
147 fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
148 crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
149 crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
150 reply->data.graph.input);
151
152 crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
153 pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
154 register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
155
156 pcmk__xml_free(fsa_input.msg);
157
158 } else {
159 crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
160 }
161 }
162
163 static void
164 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
165 crm_exit_t status, void *event_data, void *user_data)
166 {
167 pcmk_schedulerd_api_reply_t *reply = event_data;
168
169 switch (event_type) {
170 case pcmk_ipc_event_disconnect:
171 handle_disconnect();
172 break;
173
174 case pcmk_ipc_event_reply:
175 handle_reply(reply);
176 break;
177
178 default:
179 break;
180 }
181 }
182
183 static bool
184 new_schedulerd_ipc_connection(void)
185 {
186 int rc;
187
188 controld_set_fsa_input_flags(R_PE_REQUIRED);
189
190 if (schedulerd_api == NULL) {
191 rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
192
193 if (rc != pcmk_rc_ok) {
194 crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
195 return false;
196 }
197 }
198
199 pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
200
201 rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api, pcmk_ipc_dispatch_main, 3);
202 if (rc != pcmk_rc_ok) {
203 crm_err("Error connecting to %s: %s",
204 pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
205 return false;
206 }
207
208 controld_set_fsa_input_flags(R_PE_CONNECTED);
209 return true;
210 }
211
212 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
213 xmlNode *output, void *user_data);
214
215
216 void
217 do_pe_control(long long action,
218 enum crmd_fsa_cause cause,
219 enum crmd_fsa_state cur_state,
220 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
221 {
222 if (pcmk_is_set(action, A_PE_STOP)) {
223 controld_clear_fsa_input_flags(R_PE_REQUIRED);
224 pcmk_disconnect_ipc(schedulerd_api);
225 handle_disconnect();
226 }
227 if (pcmk_is_set(action, A_PE_START)
228 && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
229
230 if (cur_state == S_STOPPING) {
231 crm_info("Ignoring request to connect to scheduler while shutting down");
232
233 } else if (!new_schedulerd_ipc_connection()) {
234 crm_warn("Could not connect to scheduler");
235 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
236 }
237 }
238 }
239
240 static int fsa_pe_query = 0;
241 static mainloop_timer_t *controld_sched_timer = NULL;
242
243
244 #define SCHED_TIMEOUT_MS (120000)
245
246
247
248
249
250
251
252
253
254 static gboolean
255 controld_sched_timeout(gpointer user_data)
256 {
257 if (AM_I_DC) {
258
259
260
261
262
263
264
265
266
267 crmd_exit(CRM_EX_FATAL);
268 }
269 return FALSE;
270 }
271
272 void
273 controld_stop_sched_timer(void)
274 {
275 if ((controld_sched_timer != NULL)
276 && (controld_globals.fsa_pe_ref != NULL)) {
277 crm_trace("Stopping timer for scheduler reply %s",
278 controld_globals.fsa_pe_ref);
279 }
280 mainloop_timer_stop(controld_sched_timer);
281 }
282
283
284
285
286
287
288
289
290
291 void
292 controld_expect_sched_reply(char *ref)
293 {
294 if (ref) {
295 if (controld_sched_timer == NULL) {
296 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
297 SCHED_TIMEOUT_MS, FALSE,
298 controld_sched_timeout,
299 NULL);
300 }
301 mainloop_timer_start(controld_sched_timer);
302 } else {
303 controld_stop_sched_timer();
304 }
305 free(controld_globals.fsa_pe_ref);
306 controld_globals.fsa_pe_ref = ref;
307 }
308
309
310
311
312
313 void
314 controld_free_sched_timer(void)
315 {
316 if (controld_sched_timer != NULL) {
317 mainloop_timer_del(controld_sched_timer);
318 controld_sched_timer = NULL;
319 }
320 }
321
322
323 void
324 do_pe_invoke(long long action,
325 enum crmd_fsa_cause cause,
326 enum crmd_fsa_state cur_state,
327 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
328 {
329 cib_t *cib_conn = controld_globals.cib_conn;
330
331 if (AM_I_DC == FALSE) {
332 crm_err("Not invoking scheduler because not DC: %s",
333 fsa_action2string(action));
334 return;
335 }
336
337 if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
338 if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
339 crm_err("Cannot shut down gracefully without the scheduler");
340 register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
341
342 } else {
343 crm_info("Waiting for the scheduler to connect");
344 crmd_fsa_stall(FALSE);
345 controld_set_fsa_action_flags(A_PE_START);
346 controld_trigger_fsa();
347 }
348 return;
349 }
350
351 if (cur_state != S_POLICY_ENGINE) {
352 crm_notice("Not invoking scheduler because in state %s",
353 fsa_state2string(cur_state));
354 return;
355 }
356 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
357 crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
358
359
360 register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
361 return;
362 }
363
364 fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);
365
366 crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
367 fsa_state2string(controld_globals.fsa_state));
368
369 controld_expect_sched_reply(NULL);
370 fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
371 }
372
373 static void
374 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
375 {
376 int max = 0;
377 int lpc = 0;
378 const char *xpath_base = NULL;
379 char *xpath_string = NULL;
380 xmlXPathObject *xpathObj = NULL;
381
382 xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
383 if (xpath_base == NULL) {
384 crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
385 return;
386 }
387
388 xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
389 xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
390 attr_name);
391 xpathObj = pcmk__xpath_search(xml->doc, xpath_string);
392 max = pcmk__xpath_num_results(xpathObj);
393 free(xpath_string);
394
395 for (lpc = 0; lpc < max; lpc++) {
396 xmlNode *match = pcmk__xpath_result(xpathObj, lpc);
397
398 if (match == NULL) {
399 continue;
400 }
401 crm_trace("Forcing %s/%s = %s",
402 pcmk__xe_id(match), attr_name, attr_value);
403 crm_xml_add(match, PCMK_XA_VALUE, attr_value);
404 }
405
406 if(max == 0) {
407 xmlNode *configuration = NULL;
408 xmlNode *crm_config = NULL;
409 xmlNode *cluster_property_set = NULL;
410
411 crm_trace("Creating %s-%s for %s=%s",
412 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
413 attr_value);
414
415 configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
416 NULL);
417 if (configuration == NULL) {
418 configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
419 }
420
421 crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
422 NULL, NULL);
423 if (crm_config == NULL) {
424 crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
425 }
426
427 cluster_property_set =
428 pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
429 NULL);
430 if (cluster_property_set == NULL) {
431 cluster_property_set =
432 pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
433 crm_xml_add(cluster_property_set, PCMK_XA_ID,
434 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
435 }
436
437 xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
438
439 pcmk__xe_set_id(xml, "%s-%s",
440 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
441 crm_xml_add(xml, PCMK_XA_NAME, attr_name);
442 crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
443 }
444 xmlXPathFreeObject(xpathObj);
445 }
446
447 static void
448 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
449 {
450 char *ref = NULL;
451 pid_t watchdog = pcmk__locate_sbd();
452
453 if (rc != pcmk_ok) {
454 crm_err("Could not retrieve the Cluster Information Base: %s "
455 QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
456 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
457 return;
458
459 } else if (call_id != fsa_pe_query) {
460 crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
461 return;
462
463 } else if (!AM_I_DC
464 || !pcmk_is_set(controld_globals.fsa_input_register,
465 R_PE_CONNECTED)) {
466 crm_debug("No need to invoke the scheduler anymore");
467 return;
468
469 } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
470 crm_debug("Discarding scheduler request in state: %s",
471 fsa_state2string(controld_globals.fsa_state));
472 return;
473
474
475 } else if (num_cib_op_callbacks() > 1) {
476 crm_debug("Re-asking for the CIB: %d other peer updates still pending",
477 (num_cib_op_callbacks() - 1));
478 sleep(1);
479 controld_set_fsa_action_flags(A_PE_INVOKE);
480 controld_trigger_fsa();
481 return;
482 }
483
484 CRM_LOG_ASSERT(output != NULL);
485
486
487
488 pcmk__refresh_node_caches_from_cib(output);
489
490 crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
491 pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
492 pcmk_is_set(controld_globals.flags,
493 controld_has_quorum));
494
495 force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
496
497 if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
498 && !pcmk__cluster_has_quorum()) {
499
500 crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
501 }
502
503 rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref);
504 if (rc != pcmk_rc_ok) {
505 free(ref);
506 crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d",
507 pcmk_rc_str(rc), rc);
508 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
509 } else {
510 pcmk__assert(ref != NULL);
511 controld_expect_sched_reply(ref);
512 crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
513 "quorate=%s",
514 fsa_pe_query, controld_globals.fsa_pe_ref,
515 controld_globals.peer_seq,
516 pcmk__flag_text(controld_globals.flags, controld_has_quorum));
517 }
518 }