This source file includes following definitions.
- controld_shutdown_schedulerd_ipc
- save_cib_contents
- handle_disconnect
- handle_reply
- scheduler_event_callback
- new_schedulerd_ipc_connection
- do_pe_control
- controld_sched_timeout
- controld_stop_sched_timer
- controld_expect_sched_reply
- controld_free_sched_timer
- do_pe_invoke
- force_local_option
- do_pe_invoke_callback
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <unistd.h>
13
14 #include <crm/cib.h>
15 #include <crm/cluster.h>
16 #include <crm/common/xml.h>
17 #include <crm/crm.h>
18 #include <crm/common/xml_internal.h>
19 #include <crm/common/ipc.h>
20 #include <crm/common/ipc_schedulerd.h>
21
22 #include <pacemaker-controld.h>
23
24 static void handle_disconnect(void);
25
26 static pcmk_ipc_api_t *schedulerd_api = NULL;
27
28
29
30
31
32 void
33 controld_shutdown_schedulerd_ipc(void)
34 {
35 controld_clear_fsa_input_flags(R_PE_REQUIRED);
36 pcmk_disconnect_ipc(schedulerd_api);
37 handle_disconnect();
38
39 pcmk_free_ipc_api(schedulerd_api);
40 schedulerd_api = NULL;
41 }
42
43
44
45
46
47
48
49
50
51
52
53
54
55 static void
56 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
57 void *user_data)
58 {
59 const char *id = user_data;
60
61 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
62 CRM_CHECK(id != NULL, return);
63
64 if (rc == pcmk_ok) {
65 char *filename = crm_strdup_printf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id);
66
67 if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
68 crm_err("Could not save Cluster Information Base to %s after scheduler crash",
69 filename);
70 } else {
71 crm_notice("Saved Cluster Information Base to %s after scheduler crash",
72 filename);
73 }
74 free(filename);
75 }
76 }
77
78
79
80
81
82 static void
83 handle_disconnect(void)
84 {
85
86 controld_expect_sched_reply(NULL);
87
88 if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
89 int rc = pcmk_ok;
90 char *uuid_str = crm_generate_uuid();
91
92 crm_crit("Lost connection to the scheduler "
93 QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2",
94 uuid_str);
95
96
97
98
99
100
101
102
103
104
105
106 rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
107 NULL, NULL, cib_none);
108 fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
109 }
110
111 controld_clear_fsa_input_flags(R_PE_CONNECTED);
112 controld_trigger_fsa();
113 return;
114 }
115
116 static void
117 handle_reply(pcmk_schedulerd_api_reply_t *reply)
118 {
119 const char *msg_ref = NULL;
120
121 if (!AM_I_DC) {
122 return;
123 }
124
125 msg_ref = reply->data.graph.reference;
126
127 if (msg_ref == NULL) {
128 crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
129
130 } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
131 pcmk__str_none)) {
132 ha_msg_input_t fsa_input;
133 xmlNode *crm_data_node;
134
135 controld_stop_sched_timer();
136
137
138
139
140
141
142
143
144
145 fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
146 crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
147 crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
148 reply->data.graph.input);
149
150 crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
151 pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
152 register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
153
154 pcmk__xml_free(fsa_input.msg);
155
156 } else {
157 crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
158 }
159 }
160
161 static void
162 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
163 crm_exit_t status, void *event_data, void *user_data)
164 {
165 pcmk_schedulerd_api_reply_t *reply = event_data;
166
167 switch (event_type) {
168 case pcmk_ipc_event_disconnect:
169 handle_disconnect();
170 break;
171
172 case pcmk_ipc_event_reply:
173 handle_reply(reply);
174 break;
175
176 default:
177 break;
178 }
179 }
180
181 static bool
182 new_schedulerd_ipc_connection(void)
183 {
184 int rc;
185
186 controld_set_fsa_input_flags(R_PE_REQUIRED);
187
188 if (schedulerd_api == NULL) {
189 rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
190
191 if (rc != pcmk_rc_ok) {
192 crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
193 return false;
194 }
195 }
196
197 pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
198
199 rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
200 if (rc != pcmk_rc_ok) {
201 crm_err("Error connecting to %s: %s",
202 pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
203 return false;
204 }
205
206 controld_set_fsa_input_flags(R_PE_CONNECTED);
207 return true;
208 }
209
210 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
211 xmlNode *output, void *user_data);
212
213
214 void
215 do_pe_control(long long action,
216 enum crmd_fsa_cause cause,
217 enum crmd_fsa_state cur_state,
218 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
219 {
220 if (pcmk_is_set(action, A_PE_STOP)) {
221 controld_clear_fsa_input_flags(R_PE_REQUIRED);
222 pcmk_disconnect_ipc(schedulerd_api);
223 handle_disconnect();
224 }
225 if (pcmk_is_set(action, A_PE_START)
226 && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
227
228 if (cur_state == S_STOPPING) {
229 crm_info("Ignoring request to connect to scheduler while shutting down");
230
231 } else if (!new_schedulerd_ipc_connection()) {
232 crm_warn("Could not connect to scheduler");
233 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
234 }
235 }
236 }
237
238 static int fsa_pe_query = 0;
239 static mainloop_timer_t *controld_sched_timer = NULL;
240
241
242 #define SCHED_TIMEOUT_MS (120000)
243
244
245
246
247
248
249
250
251
252 static gboolean
253 controld_sched_timeout(gpointer user_data)
254 {
255 if (AM_I_DC) {
256
257
258
259
260
261
262
263
264
265 crmd_exit(CRM_EX_FATAL);
266 }
267 return FALSE;
268 }
269
270 void
271 controld_stop_sched_timer(void)
272 {
273 if ((controld_sched_timer != NULL)
274 && (controld_globals.fsa_pe_ref != NULL)) {
275 crm_trace("Stopping timer for scheduler reply %s",
276 controld_globals.fsa_pe_ref);
277 }
278 mainloop_timer_stop(controld_sched_timer);
279 }
280
281
282
283
284
285
286
287
288
289 void
290 controld_expect_sched_reply(char *ref)
291 {
292 if (ref) {
293 if (controld_sched_timer == NULL) {
294 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
295 SCHED_TIMEOUT_MS, FALSE,
296 controld_sched_timeout,
297 NULL);
298 }
299 mainloop_timer_start(controld_sched_timer);
300 } else {
301 controld_stop_sched_timer();
302 }
303 free(controld_globals.fsa_pe_ref);
304 controld_globals.fsa_pe_ref = ref;
305 }
306
307
308
309
310
311 void
312 controld_free_sched_timer(void)
313 {
314 if (controld_sched_timer != NULL) {
315 mainloop_timer_del(controld_sched_timer);
316 controld_sched_timer = NULL;
317 }
318 }
319
320
321 void
322 do_pe_invoke(long long action,
323 enum crmd_fsa_cause cause,
324 enum crmd_fsa_state cur_state,
325 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
326 {
327 cib_t *cib_conn = controld_globals.cib_conn;
328
329 if (AM_I_DC == FALSE) {
330 crm_err("Not invoking scheduler because not DC: %s",
331 fsa_action2string(action));
332 return;
333 }
334
335 if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
336 if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
337 crm_err("Cannot shut down gracefully without the scheduler");
338 register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
339
340 } else {
341 crm_info("Waiting for the scheduler to connect");
342 crmd_fsa_stall(FALSE);
343 controld_set_fsa_action_flags(A_PE_START);
344 controld_trigger_fsa();
345 }
346 return;
347 }
348
349 if (cur_state != S_POLICY_ENGINE) {
350 crm_notice("Not invoking scheduler because in state %s",
351 fsa_state2string(cur_state));
352 return;
353 }
354 if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
355 crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
356
357
358 register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
359 return;
360 }
361
362 fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);
363
364 crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
365 fsa_state2string(controld_globals.fsa_state));
366
367 controld_expect_sched_reply(NULL);
368 fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
369 }
370
371 static void
372 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
373 {
374 int max = 0;
375 int lpc = 0;
376 const char *xpath_base = NULL;
377 char *xpath_string = NULL;
378 xmlXPathObjectPtr xpathObj = NULL;
379
380 xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
381 if (xpath_base == NULL) {
382 crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
383 return;
384 }
385
386 xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
387 xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
388 attr_name);
389 xpathObj = xpath_search(xml, xpath_string);
390 max = numXpathResults(xpathObj);
391 free(xpath_string);
392
393 for (lpc = 0; lpc < max; lpc++) {
394 xmlNode *match = getXpathResult(xpathObj, lpc);
395 crm_trace("Forcing %s/%s = %s",
396 pcmk__xe_id(match), attr_name, attr_value);
397 crm_xml_add(match, PCMK_XA_VALUE, attr_value);
398 }
399
400 if(max == 0) {
401 xmlNode *configuration = NULL;
402 xmlNode *crm_config = NULL;
403 xmlNode *cluster_property_set = NULL;
404
405 crm_trace("Creating %s-%s for %s=%s",
406 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
407 attr_value);
408
409 configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
410 NULL);
411 if (configuration == NULL) {
412 configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
413 }
414
415 crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
416 NULL, NULL);
417 if (crm_config == NULL) {
418 crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
419 }
420
421 cluster_property_set =
422 pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
423 NULL);
424 if (cluster_property_set == NULL) {
425 cluster_property_set =
426 pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
427 crm_xml_add(cluster_property_set, PCMK_XA_ID,
428 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
429 }
430
431 xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
432
433 pcmk__xe_set_id(xml, "%s-%s",
434 PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
435 crm_xml_add(xml, PCMK_XA_NAME, attr_name);
436 crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
437 }
438 freeXpathObject(xpathObj);
439 }
440
441 static void
442 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
443 {
444 char *ref = NULL;
445 pid_t watchdog = pcmk__locate_sbd();
446
447 if (rc != pcmk_ok) {
448 crm_err("Could not retrieve the Cluster Information Base: %s "
449 QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
450 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
451 return;
452
453 } else if (call_id != fsa_pe_query) {
454 crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
455 return;
456
457 } else if (!AM_I_DC
458 || !pcmk_is_set(controld_globals.fsa_input_register,
459 R_PE_CONNECTED)) {
460 crm_debug("No need to invoke the scheduler anymore");
461 return;
462
463 } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
464 crm_debug("Discarding scheduler request in state: %s",
465 fsa_state2string(controld_globals.fsa_state));
466 return;
467
468
469 } else if (num_cib_op_callbacks() > 1) {
470 crm_debug("Re-asking for the CIB: %d other peer updates still pending",
471 (num_cib_op_callbacks() - 1));
472 sleep(1);
473 controld_set_fsa_action_flags(A_PE_INVOKE);
474 controld_trigger_fsa();
475 return;
476 }
477
478 CRM_LOG_ASSERT(output != NULL);
479
480
481
482 pcmk__refresh_node_caches_from_cib(output);
483
484 crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
485 pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
486 pcmk_is_set(controld_globals.flags,
487 controld_has_quorum));
488
489 force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
490
491 if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
492 && !pcmk__cluster_has_quorum()) {
493
494 crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
495 }
496
497 rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref);
498 if (rc != pcmk_rc_ok) {
499 free(ref);
500 crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d",
501 pcmk_rc_str(rc), rc);
502 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
503 } else {
504 pcmk__assert(ref != NULL);
505 controld_expect_sched_reply(ref);
506 crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
507 "quorate=%s",
508 fsa_pe_query, controld_globals.fsa_pe_ref,
509 controld_globals.peer_seq,
510 pcmk__flag_text(controld_globals.flags, controld_has_quorum));
511 }
512 }