This source file includes following definitions.
- controld_remove_all_outside_events
- controld_destroy_outside_events_table
- record_outside_event
- fail_incompletable_actions
- update_failcount
- controld_get_action
- get_cancel_action
- confirm_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/common/xml.h>
16
17 #include <pacemaker-controld.h>
18
19 #include <crm/common/attrs_internal.h>
20 #include <crm/common/ipc_attrd_internal.h>
21
22
23
24
25
26
27
28
29
30
31
32 static GHashTable *outside_events = NULL;
33
34
35
36
37
38 void
39 controld_remove_all_outside_events(void)
40 {
41 if (outside_events != NULL) {
42 g_hash_table_remove_all(outside_events);
43 }
44 }
45
46
47
48
49
50 void
51 controld_destroy_outside_events_table(void)
52 {
53 if (outside_events != NULL) {
54 g_hash_table_destroy(outside_events);
55 outside_events = NULL;
56 }
57 }
58
59
60
61
62
63
64
65
66 static int
67 record_outside_event(gint action_num)
68 {
69 if (outside_events == NULL) {
70 outside_events = g_hash_table_new(NULL, NULL);
71 }
72
73 if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
74 return pcmk_rc_ok;
75 }
76 return pcmk_rc_already;
77 }
78
79 gboolean
80 fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
81 {
82 const char *target_uuid = NULL;
83 const char *router = NULL;
84 const char *router_uuid = NULL;
85 xmlNode *last_action = NULL;
86
87 GList *gIter = NULL;
88 GList *gIter2 = NULL;
89
90 if (graph == NULL || graph->complete) {
91 return FALSE;
92 }
93
94 gIter = graph->synapses;
95 for (; gIter != NULL; gIter = gIter->next) {
96 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
97
98 if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
99
100 continue;
101 }
102
103 gIter2 = synapse->actions;
104 for (; gIter2 != NULL; gIter2 = gIter2->next) {
105 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
106
107 if ((action->type == pcmk__pseudo_graph_action)
108 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
109 continue;
110 } else if (action->type == pcmk__cluster_graph_action) {
111 const char *task = crm_element_value(action->xml,
112 PCMK_XA_OPERATION);
113
114 if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
115 continue;
116 }
117 }
118
119 target_uuid = crm_element_value(action->xml,
120 PCMK__META_ON_NODE_UUID);
121 router = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
122 if (router) {
123 const crm_node_t *node =
124 pcmk__get_node(0, router, NULL,
125 pcmk__node_search_cluster_member);
126
127 if (node) {
128 router_uuid = node->uuid;
129 }
130 }
131
132 if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
133 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
134 pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
135 last_action = action->xml;
136 stop_te_timer(action);
137 pcmk__update_graph(graph, action);
138
139 if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
140 crm_notice("Action %d (%s) was pending on %s (offline)",
141 action->id,
142 crm_element_value(action->xml,
143 PCMK__XA_OPERATION_KEY),
144 down_node);
145 } else {
146 crm_info("Action %d (%s) is scheduled for %s (offline)",
147 action->id,
148 crm_element_value(action->xml, PCMK__XA_OPERATION_KEY),
149 down_node);
150 }
151 }
152 }
153 }
154
155 if (last_action != NULL) {
156 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
157 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
158 "Node failure", last_action);
159 return TRUE;
160 }
161
162 return FALSE;
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 static gboolean
179 update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
180 int target_rc, gboolean do_update, gboolean ignore_failures)
181 {
182 guint interval_ms = 0;
183
184 char *task = NULL;
185 char *rsc_id = NULL;
186
187 const char *value = NULL;
188 const char *id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
189 const char *on_uname = pcmk__node_name_from_uuid(event_node_uuid);
190 const char *origin = crm_element_value(event, PCMK_XA_CRM_DEBUG_ORIGIN);
191
192
193 if (rc == target_rc) {
194 return FALSE;
195 } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
196 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
197 id, rc, on_uname);
198 return FALSE;
199 }
200
201
202 CRM_CHECK(on_uname != NULL, return TRUE);
203 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
204 crm_err("Couldn't parse: %s", pcmk__xe_id(event)); goto bail);
205
206
207 if ((interval_ms > 0)
208 || pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_none)
209 || pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_none)) {
210 do_update = TRUE;
211
212 } else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_none)) {
213 do_update = TRUE;
214 value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
215 PCMK_VALUE_INFINITY);
216
217 } else if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_none)) {
218 do_update = TRUE;
219 value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
220 PCMK_VALUE_INFINITY);
221 }
222
223 if (do_update) {
224 pcmk__attrd_query_pair_t *fail_pair = NULL;
225 pcmk__attrd_query_pair_t *last_pair = NULL;
226 char *fail_name = NULL;
227 char *last_name = NULL;
228 GList *attrs = NULL;
229
230 uint32_t opts = pcmk__node_attr_none;
231
232 char *now = pcmk__ttoa(time(NULL));
233
234
235 if (!pcmk_str_is_infinity(value)) {
236 value = PCMK_XA_VALUE "++";
237 }
238
239 if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
240 opts |= pcmk__node_attr_remote;
241 }
242
243 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
244 (ignore_failures? "last failure" : "failcount"),
245 rsc_id, on_uname, task, rc, value, now);
246
247
248 if (!ignore_failures) {
249 fail_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
250
251 fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
252 fail_pair->name = fail_name;
253 fail_pair->value = value;
254 fail_pair->node = on_uname;
255
256 attrs = g_list_prepend(attrs, fail_pair);
257 }
258
259
260
261
262 last_pair = pcmk__assert_alloc(1, sizeof(pcmk__attrd_query_pair_t));
263
264 last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
265 last_pair->name = last_name;
266 last_pair->value = now;
267 last_pair->node = on_uname;
268
269 attrs = g_list_prepend(attrs, last_pair);
270
271 update_attrd_list(attrs, opts);
272
273 free(fail_name);
274 free(fail_pair);
275
276 free(last_name);
277 free(last_pair);
278 g_list_free(attrs);
279
280 free(now);
281 }
282
283 bail:
284 free(rsc_id);
285 free(task);
286 return TRUE;
287 }
288
289 pcmk__graph_action_t *
290 controld_get_action(int id)
291 {
292 for (GList *item = controld_globals.transition_graph->synapses;
293 item != NULL; item = item->next) {
294 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
295
296 for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
297 pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
298
299 if (action->id == id) {
300 return action;
301 }
302 }
303 }
304 return NULL;
305 }
306
307 pcmk__graph_action_t *
308 get_cancel_action(const char *id, const char *node)
309 {
310 GList *gIter = NULL;
311 GList *gIter2 = NULL;
312
313 gIter = controld_globals.transition_graph->synapses;
314 for (; gIter != NULL; gIter = gIter->next) {
315 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
316
317 gIter2 = synapse->actions;
318 for (; gIter2 != NULL; gIter2 = gIter2->next) {
319 const char *task = NULL;
320 const char *target = NULL;
321 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
322
323 task = crm_element_value(action->xml, PCMK_XA_OPERATION);
324 if (!pcmk__str_eq(PCMK_ACTION_CANCEL, task, pcmk__str_casei)) {
325 continue;
326 }
327
328 task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
329 if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
330 crm_trace("Wrong key %s for %s on %s", task, id, node);
331 continue;
332 }
333
334 target = crm_element_value(action->xml, PCMK__META_ON_NODE_UUID);
335 if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
336 crm_trace("Wrong node %s for %s on %s", target, id, node);
337 continue;
338 }
339
340 crm_trace("Found %s on %s", id, node);
341 return action;
342 }
343 }
344
345 return NULL;
346 }
347
348 bool
349 confirm_cancel_action(const char *id, const char *node_id)
350 {
351 const char *op_key = NULL;
352 const char *node_name = NULL;
353 pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
354
355 if (cancel == NULL) {
356 return FALSE;
357 }
358 op_key = crm_element_value(cancel->xml, PCMK__XA_OPERATION_KEY);
359 node_name = crm_element_value(cancel->xml, PCMK__META_ON_NODE);
360
361 stop_te_timer(cancel);
362 te_action_confirmed(cancel, controld_globals.transition_graph);
363
364 crm_info("Cancellation of %s on %s confirmed (action %d)",
365 op_key, node_name, cancel->id);
366 return TRUE;
367 }
368
369
370 #define XPATH_DOWNED "//" PCMK__XE_DOWNED \
371 "/" PCMK_XE_NODE "[@" PCMK_XA_ID "='%s']"
372
373
374
375
376
377
378
379
380 pcmk__graph_action_t *
381 match_down_event(const char *target)
382 {
383 pcmk__graph_action_t *match = NULL;
384 xmlXPathObjectPtr xpath_ret = NULL;
385 GList *gIter, *gIter2;
386
387 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
388
389 for (gIter = controld_globals.transition_graph->synapses;
390 gIter != NULL && match == NULL;
391 gIter = gIter->next) {
392
393 for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
394 gIter2 != NULL && match == NULL;
395 gIter2 = gIter2->next) {
396
397 match = (pcmk__graph_action_t *) gIter2->data;
398 if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
399 xpath_ret = xpath_search(match->xml, xpath);
400 if (numXpathResults(xpath_ret) < 1) {
401 match = NULL;
402 }
403 freeXpathObject(xpath_ret);
404 } else {
405
406 match = NULL;
407 }
408 }
409 }
410
411 free(xpath);
412
413 if (match != NULL) {
414 crm_debug("Shutdown action %d (%s) found for node %s", match->id,
415 crm_element_value(match->xml, PCMK__XA_OPERATION_KEY),
416 target);
417 } else {
418 crm_debug("No reason to expect node %s to be down", target);
419 }
420 return match;
421 }
422
423 void
424 process_graph_event(xmlNode *event, const char *event_node)
425 {
426 int rc = -1;
427 int target_rc = -1;
428 int status = -1;
429 int callid = -1;
430 int transition_num = -1;
431 int action_num = -1;
432 char *update_te_uuid = NULL;
433 bool ignore_failures = FALSE;
434 const char *id = NULL;
435 const char *desc = NULL;
436 const char *magic = NULL;
437 const char *uname = NULL;
438
439 CRM_ASSERT(event != NULL);
440
441
442
443
444
445 magic = crm_element_value(event, PCMK__XA_TRANSITION_KEY);
446 if (magic == NULL) {
447
448 return;
449 }
450
451 crm_element_value_int(event, PCMK__XA_OP_STATUS, &status);
452 if (status == PCMK_EXEC_PENDING) {
453 return;
454 }
455
456 id = crm_element_value(event, PCMK__XA_OPERATION_KEY);
457 crm_element_value_int(event, PCMK__XA_RC_CODE, &rc);
458 crm_element_value_int(event, PCMK__XA_CALL_ID, &callid);
459
460 rc = pcmk__effective_rc(rc);
461
462 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
463 &action_num, &target_rc) == FALSE) {
464
465 crm_err("Can't process action %s result: Incompatible versions? "
466 CRM_XS " call-id=%d", id, callid);
467 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
468 "Bad event", event);
469 return;
470 }
471
472 if (transition_num == -1) {
473
474 if (record_outside_event(action_num) != pcmk_rc_ok) {
475 crm_debug("Outside event with transition key '%s' has already been "
476 "processed", magic);
477 goto bail;
478 }
479 desc = "initiated outside of the cluster";
480 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
481 "Unexpected event", event);
482
483 } else if ((action_num < 0)
484 || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
485 pcmk__str_none)) {
486 desc = "initiated by a different DC";
487 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
488 "Foreign event", event);
489
490 } else if ((controld_globals.transition_graph->id != transition_num)
491 || controld_globals.transition_graph->complete) {
492
493
494
495 guint interval_ms = 0;
496
497 if (parse_op_key(id, NULL, NULL, &interval_ms)
498 && (interval_ms != 0)) {
499
500
501
502
503 if (status == PCMK_EXEC_CANCELLED) {
504 confirm_cancel_action(id, get_node_id(event));
505 goto bail;
506 }
507
508 desc = "arrived after initial scheduling";
509 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
510 "Change in recurring result", event);
511
512 } else if (controld_globals.transition_graph->id != transition_num) {
513 desc = "arrived really late";
514 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
515 "Old event", event);
516 } else {
517 desc = "arrived late";
518 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
519 "Inactive graph", event);
520 }
521
522 } else {
523
524 pcmk__graph_action_t *action = controld_get_action(action_num);
525
526 if (action == NULL) {
527
528 desc = "unknown";
529 abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
530 "Unknown event", event);
531
532 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
533
534
535
536
537
538
539 crm_log_xml_debug(event, "Event already confirmed:");
540 goto bail;
541
542 } else {
543
544
545
546
547 if (pcmk__str_eq(crm_meta_value(action->params, PCMK_META_ON_FAIL),
548 PCMK_VALUE_IGNORE, pcmk__str_casei)) {
549 ignore_failures = TRUE;
550
551 } else if (rc != target_rc) {
552 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
553 }
554
555 stop_te_timer(action);
556 te_action_confirmed(action, controld_globals.transition_graph);
557
558 if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
559 abort_transition(action->synapse->priority + 1,
560 pcmk__graph_restart, "Event failed", event);
561 }
562 }
563 }
564
565 if (id == NULL) {
566 id = "unknown action";
567 }
568 uname = crm_element_value(event, PCMK__META_ON_NODE);
569 if (uname == NULL) {
570 uname = "unknown node";
571 }
572
573 if (status == PCMK_EXEC_INVALID) {
574
575 crm_info("Transition %d action %d (%s on %s): %s",
576 transition_num, action_num, id, uname,
577 pcmk_exec_status_str(status));
578
579 } else if (desc && update_failcount(event, event_node, rc, target_rc,
580 (transition_num == -1), FALSE)) {
581 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
582 CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
583 transition_num, action_num, id, uname,
584 services_ocf_exitcode_str(target_rc),
585 services_ocf_exitcode_str(rc),
586 target_rc, rc, callid, desc);
587
588 } else if (desc) {
589 crm_info("Transition %d action %d (%s on %s): %s "
590 CRM_XS " rc=%d target-rc=%d call-id=%d",
591 transition_num, action_num, id, uname,
592 desc, rc, target_rc, callid);
593
594 } else if (rc == target_rc) {
595 crm_info("Transition %d action %d (%s on %s) confirmed: %s "
596 CRM_XS " rc=%d call-id=%d",
597 transition_num, action_num, id, uname,
598 services_ocf_exitcode_str(rc), rc, callid);
599
600 } else {
601 update_failcount(event, event_node, rc, target_rc,
602 (transition_num == -1), ignore_failures);
603 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
604 CRM_XS " target-rc=%d rc=%d call-id=%d",
605 transition_num, action_num, id, uname,
606 services_ocf_exitcode_str(target_rc),
607 services_ocf_exitcode_str(rc),
608 target_rc, rc, callid);
609 }
610
611 bail:
612 free(update_te_uuid);
613 }