This source file includes following definitions.
- controld_remove_all_outside_events
- controld_destroy_outside_events_table
- record_outside_event
- fail_incompletable_actions
- update_failcount
- controld_get_action
- get_cancel_action
- confirm_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13
14 #include <libxml/xpath.h>
15
16 #include <crm/crm.h>
17 #include <crm/cib.h>
18 #include <crm/common/xml.h>
19
20 #include <pacemaker-controld.h>
21
22 #include <crm/common/attrs_internal.h>
23 #include <crm/common/ipc_attrd_internal.h>
24
25
26
27
28
29
30
31
32
33
34
35 static GHashTable *outside_events = NULL;
36
37
38
39
40
41 void
42 controld_remove_all_outside_events(void)
43 {
44 if (outside_events != NULL) {
45 g_hash_table_remove_all(outside_events);
46 }
47 }
48
49
50
51
52
53 void
54 controld_destroy_outside_events_table(void)
55 {
56 if (outside_events != NULL) {
57 g_hash_table_destroy(outside_events);
58 outside_events = NULL;
59 }
60 }
61
62
63
64
65
66
67
68
69 static int
70 record_outside_event(gint action_num)
71 {
72 if (outside_events == NULL) {
73 outside_events = g_hash_table_new(NULL, NULL);
74 }
75
76 if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
77 return pcmk_rc_ok;
78 }
79 return pcmk_rc_already;
80 }
81
82 gboolean
83 fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
84 {
85 const char *target_uuid = NULL;
86 const char *router = NULL;
87 const char *router_uuid = NULL;
88 xmlNode *last_action = NULL;
89
90 GList *gIter = NULL;
91 GList *gIter2 = NULL;
92
93 if (graph == NULL || graph->complete) {
94 return FALSE;
95 }
96
97 gIter = graph->synapses;
98 for (; gIter != NULL; gIter = gIter->next) {
99 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
100
101 if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
102
103 continue;
104 }
105
106 gIter2 = synapse->actions;
107 for (; gIter2 != NULL; gIter2 = gIter2->next) {
108 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
109
110 if ((action->type == pcmk__pseudo_graph_action)
111 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
112 continue;
113 } else if (action->type == pcmk__cluster_graph_action) {
114 const char *task = crm_element_value(action->xml,
115 PCMK_XA_OPERATION);
116
117 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
118 continue;
119 }
120 }
121
122 target_uuid = crm_element_value(action->xml,
123 PCMK__META_ON_NODE_UUID);
124 router = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
125 if (router) {
126 const pcmk__node_status_t *node =
127 pcmk__get_node(0, router, NULL,
128 pcmk__node_search_cluster_member);
129
130 if (node != NULL) {
131 router_uuid = node->xml_id;
132 }
133 }
134
135 if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
136 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
137 pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
138 last_action = action->xml;
139 stop_te_timer(action);
140 pcmk__update_graph(graph, action);
141
142 if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
143 crm_notice("Action %d (%s) was pending on %s (offline)",
144 action->id,
145 crm_element_value(action->xml,
146 PCMK__XA_OPERATION_KEY),
147 down_node);
148 } else {
149 crm_info("Action %d (%s) is scheduled for %s (offline)",
150 action->id,
151 crm_element_value(action->xml, PCMK__XA_OPERATION_KEY),
152 down_node);
153 }
154 }
155 }
156 }
157
158 if (last_action != NULL) {
159 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
160 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
161 "Node failure", last_action);
162 return TRUE;
163 }
164
165 return FALSE;
166 }
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 static gboolean
182 update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
183 int target_rc, gboolean do_update, gboolean ignore_failures)
184 {
185 guint interval_ms = 0;
186
187 char *task = NULL;
188 char *rsc_id = NULL;
189
190 const char *value = NULL;
191 const char *id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
192 const char *on_uname = pcmk__node_name_from_uuid(event_node_uuid);
193 const char *origin = crm_element_value(event, PCMK_XA_CRM_DEBUG_ORIGIN);
194
195
196 if (rc == target_rc) {
197 return FALSE;
198 } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
199 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
200 id, rc, on_uname);
201 return FALSE;
202 }
203
204
205 CRM_CHECK(on_uname != NULL, return TRUE);
206 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
207 crm_err("Couldn't parse: %s", pcmk__xe_id(event)); goto bail);
208
209
210 if ((interval_ms > 0)
211 || pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none)
212 || pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
213 do_update = TRUE;
214
215 } else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) {
216 do_update = TRUE;
217 value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
218 PCMK_VALUE_INFINITY);
219
220 } else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) {
221 do_update = TRUE;
222 value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
223 PCMK_VALUE_INFINITY);
224 }
225
226 if (do_update) {
227 pcmk__attrd_query_pair_t *fail_pair = NULL;
228 pcmk__attrd_query_pair_t *last_pair = NULL;
229 char *fail_name = NULL;
230 char *last_name = NULL;
231 GList *attrs = NULL;
232
233 uint32_t opts = pcmk__node_attr_none;
234
235 char *now = pcmk__ttoa(time(NULL));
236
237
238 if (!pcmk_str_is_infinity(value)) {
239 value = PCMK_XA_VALUE "++";
240 }
241
242 if (g_hash_table_lookup(pcmk__remote_peer_cache, event_node_uuid)) {
243 opts |= pcmk__node_attr_remote;
244 }
245
246 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
247 (ignore_failures? "last failure" : "failcount"),
248 rsc_id, on_uname, task, rc, value, now);
249
250
251 if (!ignore_failures) {
252 fail_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
253
254 fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
255 fail_pair->name = fail_name;
256 fail_pair->value = value;
257 fail_pair->node = on_uname;
258
259 attrs = g_list_prepend(attrs, fail_pair);
260 }
261
262
263
264
265 last_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
266
267 last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
268 last_pair->name = last_name;
269 last_pair->value = now;
270 last_pair->node = on_uname;
271
272 attrs = g_list_prepend(attrs, last_pair);
273
274 update_attrd_list(attrs, opts);
275
276 free(fail_name);
277 free(fail_pair);
278
279 free(last_name);
280 free(last_pair);
281 g_list_free(attrs);
282
283 free(now);
284 }
285
286 bail:
287 free(rsc_id);
288 free(task);
289 return TRUE;
290 }
291
292 pcmk__graph_action_t *
293 controld_get_action(int id)
294 {
295 for (GList *item = controld_globals.transition_graph->synapses;
296 item != NULL; item = item->next) {
297 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
298
299 for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
300 pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
301
302 if (action->id == id) {
303 return action;
304 }
305 }
306 }
307 return NULL;
308 }
309
310 pcmk__graph_action_t *
311 get_cancel_action(const char *id, const char *node)
312 {
313 GList *gIter = NULL;
314 GList *gIter2 = NULL;
315
316 gIter = controld_globals.transition_graph->synapses;
317 for (; gIter != NULL; gIter = gIter->next) {
318 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
319
320 gIter2 = synapse->actions;
321 for (; gIter2 != NULL; gIter2 = gIter2->next) {
322 const char *task = NULL;
323 const char *target = NULL;
324 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
325
326 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
327 if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) {
328 continue;
329 }
330
331 task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
332 if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
333 continue;
334 }
335
336 target = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
337 if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
338 crm_trace("Wrong node %s for %s on %s", target, id, node);
339 continue;
340 }
341
342 crm_trace("Found %s on %s", id, node);
343 return action;
344 }
345 }
346
347 return NULL;
348 }
349
350 bool
351 confirm_cancel_action(const char *id, const char *node_id)
352 {
353 const char *op_key = NULL;
354 const char *node_name = NULL;
355 pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
356
357 if (cancel == NULL) {
358 return FALSE;
359 }
360 op_key = crm_element_value(cancel->xml, PCMK__XA_OPERATION_KEY);
361 node_name = crm_element_value(cancel->xml, PCMK__META_ON_NODE);
362
363 stop_te_timer(cancel);
364 te_action_confirmed(cancel, controld_globals.transition_graph);
365
366 crm_info("Cancellation of %s on %s confirmed (action %d)",
367 op_key, node_name, cancel->id);
368 return TRUE;
369 }
370
371
372 #define XPATH_DOWNED "//" PCMK__XE_DOWNED \
373 "/" PCMK_XE_NODE "[@" PCMK_XA_ID "='%s']"
374
375
376
377
378
379
380
381
382 pcmk__graph_action_t *
383 match_down_event(const char *target)
384 {
385 pcmk__graph_action_t *match = NULL;
386 xmlXPathObject *xpath_ret = NULL;
387 GList *gIter, *gIter2;
388
389 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
390
391 for (gIter = controld_globals.transition_graph->synapses;
392 gIter != NULL && match == NULL;
393 gIter = gIter->next) {
394
395 for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
396 gIter2 != NULL && match == NULL;
397 gIter2 = gIter2->next) {
398
399 match = (pcmk__graph_action_t *) gIter2->data;
400 if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
401 xpath_ret = pcmk__xpath_search(match->xml->doc, xpath);
402 if (pcmk__xpath_num_results(xpath_ret) == 0) {
403 match = NULL;
404 }
405 xmlXPathFreeObject(xpath_ret);
406 } else {
407
408 match = NULL;
409 }
410 }
411 }
412
413 free(xpath);
414
415 if (match != NULL) {
416 crm_debug("Shutdown action %d (%s) found for node %s", match->id,
417 crm_element_value(match->xml, PCMK__XA_OPERATION_KEY),
418 target);
419 } else {
420 crm_debug("No reason to expect node %s to be down", target);
421 }
422 return match;
423 }
424
425 void
426 process_graph_event(xmlNode *event, const char *event_node)
427 {
428 int rc = -1;
429 int target_rc = -1;
430 int status = -1;
431 int callid = -1;
432 int transition_num = -1;
433 int action_num = -1;
434 char *update_te_uuid = NULL;
435 bool ignore_failures = FALSE;
436 const char *id = NULL;
437 const char *desc = NULL;
438 const char *magic = NULL;
439 const char *uname = NULL;
440
441 pcmk__assert(event != NULL);
442
443
444
445
446
447 magic = crm_element_value(event, PCMK__XA_TRANSITION_KEY);
448 if (magic == NULL) {
449
450 return;
451 }
452
453 crm_element_value_int(event, PCMK__XA_OP_STATUS, &status);
454 if (status == PCMK_EXEC_PENDING) {
455 return;
456 }
457
458 id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
459 crm_element_value_int(event, PCMK__XA_RC_CODE, &rc);
460 crm_element_value_int(event, PCMK__XA_CALL_ID, &callid);
461
462 rc = pcmk__effective_rc(rc);
463
464 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
465 &action_num, &target_rc) == FALSE) {
466
467 crm_err("Can't process action %s result: Incompatible versions? "
468 QB_XS " call-id=%d", id, callid);
469 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
470 "Bad event", event);
471 return;
472 }
473
474 if (transition_num == -1) {
475
476 if (record_outside_event(action_num) != pcmk_rc_ok) {
477 crm_debug("Outside event with transition key '%s' has already been "
478 "processed", magic);
479 goto bail;
480 }
481 desc = "initiated outside of the cluster";
482 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
483 "Unexpected event", event);
484
485 } else if ((action_num < 0)
486 || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
487 pcmk__str_none)) {
488 desc = "initiated by a different DC";
489 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
490 "Foreign event", event);
491
492 } else if ((controld_globals.transition_graph->id != transition_num)
493 || controld_globals.transition_graph->complete) {
494
495
496
497 guint interval_ms = 0;
498
499 if (parse_op_key(id, NULL, NULL, &interval_ms)
500 && (interval_ms != 0)) {
501
502
503
504
505 if (status == PCMK_EXEC_CANCELLED) {
506 confirm_cancel_action(id, get_node_id(event));
507 goto bail;
508 }
509
510 desc = "arrived after initial scheduling";
511 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
512 "Change in recurring result", event);
513
514 } else if (controld_globals.transition_graph->id != transition_num) {
515 desc = "arrived really late";
516 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
517 "Old event", event);
518 } else {
519 desc = "arrived late";
520 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
521 "Inactive graph", event);
522 }
523
524 } else {
525
526 pcmk__graph_action_t *action = controld_get_action(action_num);
527
528 if (action == NULL) {
529
530 desc = "unknown";
531 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
532 "Unknown event", event);
533
534 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
535
536
537
538
539
540
541 crm_log_xml_debug(event, "Event already confirmed:");
542 goto bail;
543
544 } else {
545
546
547
548
549 if (pcmk__str_eq(crm_meta_value(action->params, PCMK_META_ON_FAIL),
550 PCMK_VALUE_IGNORE, pcmk__str_casei)) {
551 ignore_failures = TRUE;
552
553 } else if (rc != target_rc) {
554 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
555 }
556
557 stop_te_timer(action);
558 te_action_confirmed(action, controld_globals.transition_graph);
559
560 if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
561 abort_transition(action->synapse->priority + 1,
562 pcmk__graph_restart, "Event failed", event);
563 }
564 }
565 }
566
567 if (id == NULL) {
568 id = "unknown action";
569 }
570 uname = crm_element_value(event, PCMK__META_ON_NODE);
571 if (uname == NULL) {
572 uname = "unknown node";
573 }
574
575 if (status == PCMK_EXEC_INVALID) {
576
577 crm_info("Transition %d action %d (%s on %s): %s",
578 transition_num, action_num, id, uname,
579 pcmk_exec_status_str(status));
580
581 } else if (desc && update_failcount(event, event_node, rc, target_rc,
582 (transition_num == -1), FALSE)) {
583 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
584 QB_XS " target-rc=%d rc=%d call-id=%d event='%s'",
585 transition_num, action_num, id, uname,
586 crm_exit_str(target_rc), crm_exit_str(rc),
587 target_rc, rc, callid, desc);
588
589 } else if (desc) {
590 crm_info("Transition %d action %d (%s on %s): %s "
591 QB_XS " rc=%d target-rc=%d call-id=%d",
592 transition_num, action_num, id, uname,
593 desc, rc, target_rc, callid);
594
595 } else if (rc == target_rc) {
596 crm_info("Transition %d action %d (%s on %s) confirmed: %s "
597 QB_XS " rc=%d call-id=%d",
598 transition_num, action_num, id, uname,
599 crm_exit_str(rc), rc, callid);
600
601 } else {
602 update_failcount(event, event_node, rc, target_rc,
603 (transition_num == -1), ignore_failures);
604 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
605 QB_XS " target-rc=%d rc=%d call-id=%d",
606 transition_num, action_num, id, uname,
607 crm_exit_str(target_rc), crm_exit_str(rc),
608 target_rc, rc, callid);
609 }
610
611 bail:
612 free(update_te_uuid);
613 }