This source file includes following definitions.
- controld_shutdown_schedulerd_ipc
- save_cib_contents
- handle_disconnect
- handle_reply
- scheduler_event_callback
- new_schedulerd_ipc_connection
- do_pe_control
- controld_sched_timeout
- controld_stop_sched_timer
- controld_expect_sched_reply
- controld_free_sched_timer
- do_pe_invoke
- force_local_option
- do_pe_invoke_callback
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <unistd.h>
13
14 #include <crm/cib.h>
15 #include <crm/cluster.h>
16 #include <crm/common/xml.h>
17 #include <crm/crm.h>
18 #include <crm/msg_xml.h>
19 #include <crm/common/xml_internal.h>
20 #include <crm/common/ipc.h>
21 #include <crm/common/ipc_schedulerd.h>
22
23 #include <pacemaker-controld.h>
24
25 static void handle_disconnect(void);
26
27 static pcmk_ipc_api_t *schedulerd_api = NULL;
28
29
30
31
32
33 void
34 controld_shutdown_schedulerd_ipc(void)
35 {
36 controld_clear_fsa_input_flags(R_PE_REQUIRED);
37 pcmk_disconnect_ipc(schedulerd_api);
38 handle_disconnect();
39
40 pcmk_free_ipc_api(schedulerd_api);
41 schedulerd_api = NULL;
42 }
43
44
45
46
47
48
49
50
51
52
53
54
55
56 static void
57 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
58 void *user_data)
59 {
60 const char *id = user_data;
61
62 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
63 CRM_CHECK(id != NULL, return);
64
65 if (rc == pcmk_ok) {
66 char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
67
68 if (write_xml_file(output, filename, TRUE) < 0) {
69 crm_err("Could not save Cluster Information Base to %s after scheduler crash",
70 filename);
71 } else {
72 crm_notice("Saved Cluster Information Base to %s after scheduler crash",
73 filename);
74 }
75 free(filename);
76 }
77 }
78
79
80
81
82
83 static void
84 handle_disconnect(void)
85 {
86
87 controld_expect_sched_reply(NULL);
88
89 if (pcmk_is_set(fsa_input_register, R_PE_REQUIRED)) {
90 int rc = pcmk_ok;
91 char *uuid_str = crm_generate_uuid();
92
93 crm_crit("Connection to the scheduler failed "
94 CRM_XS " uuid=%s", uuid_str);
95
96
97
98
99
100
101
102
103
104
105
106 rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
107 fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents);
108
109 } else {
110 crm_info("Connection to the scheduler released");
111 }
112
113 controld_clear_fsa_input_flags(R_PE_CONNECTED);
114 mainloop_set_trigger(fsa_source);
115 return;
116 }
117
118 static void
119 handle_reply(pcmk_schedulerd_api_reply_t *reply)
120 {
121 const char *msg_ref = NULL;
122
123 if (!AM_I_DC) {
124 return;
125 }
126
127 msg_ref = reply->data.graph.reference;
128
129 if (msg_ref == NULL) {
130 crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
131
132 } else if (pcmk__str_eq(msg_ref, fsa_pe_ref, pcmk__str_none)) {
133 ha_msg_input_t fsa_input;
134 xmlNode *crm_data_node;
135
136 controld_stop_sched_timer();
137
138
139
140
141
142
143
144
145
146 fsa_input.msg = create_xml_node(NULL, "dummy-reply");
147 crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref);
148 crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input);
149
150 crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA);
151 add_node_copy(crm_data_node, reply->data.graph.tgraph);
152 register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
153
154 free_xml(fsa_input.msg);
155
156 } else {
157 crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
158 }
159 }
160
161 static void
162 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
163 crm_exit_t status, void *event_data, void *user_data)
164 {
165 pcmk_schedulerd_api_reply_t *reply = event_data;
166
167 switch (event_type) {
168 case pcmk_ipc_event_disconnect:
169 handle_disconnect();
170 break;
171
172 case pcmk_ipc_event_reply:
173 handle_reply(reply);
174 break;
175
176 default:
177 break;
178 }
179 }
180
181 static bool
182 new_schedulerd_ipc_connection(void)
183 {
184 int rc;
185
186 controld_set_fsa_input_flags(R_PE_REQUIRED);
187
188 if (schedulerd_api == NULL) {
189 rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
190
191 if (rc != pcmk_rc_ok) {
192 crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
193 return false;
194 }
195 }
196
197 pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
198
199 rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main);
200 if (rc != pcmk_rc_ok) {
201 crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
202 return false;
203 }
204
205 controld_set_fsa_input_flags(R_PE_CONNECTED);
206 return true;
207 }
208
209 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
210 xmlNode *output, void *user_data);
211
212
213 void
214 do_pe_control(long long action,
215 enum crmd_fsa_cause cause,
216 enum crmd_fsa_state cur_state,
217 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
218 {
219 if (action & A_PE_STOP) {
220 controld_clear_fsa_input_flags(R_PE_REQUIRED);
221 pcmk_disconnect_ipc(schedulerd_api);
222 handle_disconnect();
223 }
224 if ((action & A_PE_START)
225 && !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
226
227 if (cur_state == S_STOPPING) {
228 crm_info("Ignoring request to connect to scheduler while shutting down");
229
230 } else if (!new_schedulerd_ipc_connection()) {
231 crm_warn("Could not connect to scheduler");
232 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
233 }
234 }
235 }
236
237 int fsa_pe_query = 0;
238 char *fsa_pe_ref = NULL;
239 static mainloop_timer_t *controld_sched_timer = NULL;
240
241
242 #define SCHED_TIMEOUT_MS (120000)
243
244
245
246
247
248
249
250
251
252 static gboolean
253 controld_sched_timeout(gpointer user_data)
254 {
255 if (AM_I_DC) {
256
257
258
259
260
261
262
263
264
265 crmd_exit(CRM_EX_FATAL);
266 }
267 return FALSE;
268 }
269
270 void
271 controld_stop_sched_timer(void)
272 {
273 if (controld_sched_timer && fsa_pe_ref) {
274 crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref);
275 }
276 mainloop_timer_stop(controld_sched_timer);
277 }
278
279
280
281
282
283
284
285
286
287 void
288 controld_expect_sched_reply(char *ref)
289 {
290 if (ref) {
291 if (controld_sched_timer == NULL) {
292 controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
293 SCHED_TIMEOUT_MS, FALSE,
294 controld_sched_timeout,
295 NULL);
296 }
297 mainloop_timer_start(controld_sched_timer);
298 } else {
299 controld_stop_sched_timer();
300 }
301 free(fsa_pe_ref);
302 fsa_pe_ref = ref;
303 }
304
305
306
307
308
309 void
310 controld_free_sched_timer(void)
311 {
312 if (controld_sched_timer != NULL) {
313 mainloop_timer_del(controld_sched_timer);
314 controld_sched_timer = NULL;
315 }
316 }
317
318
319 void
320 do_pe_invoke(long long action,
321 enum crmd_fsa_cause cause,
322 enum crmd_fsa_state cur_state,
323 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
324 {
325 if (AM_I_DC == FALSE) {
326 crm_err("Not invoking scheduler because not DC: %s",
327 fsa_action2string(action));
328 return;
329 }
330
331 if (!pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
332 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
333 crm_err("Cannot shut down gracefully without the scheduler");
334 register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
335
336 } else {
337 crm_info("Waiting for the scheduler to connect");
338 crmd_fsa_stall(FALSE);
339 controld_set_fsa_action_flags(A_PE_START);
340 trigger_fsa();
341 }
342 return;
343 }
344
345 if (cur_state != S_POLICY_ENGINE) {
346 crm_notice("Not invoking scheduler because in state %s",
347 fsa_state2string(cur_state));
348 return;
349 }
350 if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) {
351 crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
352
353
354 register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
355 return;
356 }
357
358 fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local);
359
360 crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
361 fsa_state2string(fsa_state));
362
363 controld_expect_sched_reply(NULL);
364 fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback);
365 }
366
367 static void
368 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
369 {
370 int max = 0;
371 int lpc = 0;
372 const char *xpath_base = NULL;
373 char *xpath_string = NULL;
374 xmlXPathObjectPtr xpathObj = NULL;
375
376 xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG);
377 if (xpath_base == NULL) {
378 crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)");
379 return;
380 }
381
382 xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
383 xpath_base, XML_CIB_TAG_PROPSET,
384 attr_name);
385 xpathObj = xpath_search(xml, xpath_string);
386 max = numXpathResults(xpathObj);
387 free(xpath_string);
388
389 for (lpc = 0; lpc < max; lpc++) {
390 xmlNode *match = getXpathResult(xpathObj, lpc);
391 crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value);
392 crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value);
393 }
394
395 if(max == 0) {
396 xmlNode *configuration = NULL;
397 xmlNode *crm_config = NULL;
398 xmlNode *cluster_property_set = NULL;
399
400 crm_trace("Creating %s-%s for %s=%s",
401 CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value);
402
403 configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL,
404 NULL);
405 if (configuration == NULL) {
406 configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION);
407 }
408
409 crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL,
410 NULL);
411 if (crm_config == NULL) {
412 crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG);
413 }
414
415 cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET,
416 NULL, NULL);
417 if (cluster_property_set == NULL) {
418 cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET);
419 crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST);
420 }
421
422 xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR);
423
424 crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name);
425 crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name);
426 crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value);
427 }
428 freeXpathObject(xpathObj);
429 }
430
431 static void
432 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
433 {
434 char *ref = NULL;
435 pid_t watchdog = pcmk__locate_sbd();
436
437 if (rc != pcmk_ok) {
438 crm_err("Could not retrieve the Cluster Information Base: %s "
439 CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
440 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
441 return;
442
443 } else if (call_id != fsa_pe_query) {
444 crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
445 return;
446
447 } else if (!AM_I_DC || !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) {
448 crm_debug("No need to invoke the scheduler anymore");
449 return;
450
451 } else if (fsa_state != S_POLICY_ENGINE) {
452 crm_debug("Discarding scheduler request in state: %s",
453 fsa_state2string(fsa_state));
454 return;
455
456
457 } else if (num_cib_op_callbacks() > 1) {
458 crm_debug("Re-asking for the CIB: %d other peer updates still pending",
459 (num_cib_op_callbacks() - 1));
460 sleep(1);
461 controld_set_fsa_action_flags(A_PE_INVOKE);
462 trigger_fsa();
463 return;
464 }
465
466 CRM_LOG_ASSERT(output != NULL);
467
468
469
470 pcmk__refresh_node_caches_from_cib(output);
471
472 crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid);
473 crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum);
474
475 force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog));
476
477 if (ever_had_quorum && crm_have_quorum == FALSE) {
478 crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1);
479 }
480
481 rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref));
482
483 if (rc < 0) {
484 crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
485 pcmk_strerror(rc), rc);
486 register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
487 } else {
488 CRM_ASSERT(ref != NULL);
489 controld_expect_sched_reply(ref);
490 crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d",
491 fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum);
492 }
493 }