This source file includes following definitions.
- controld_remove_all_outside_events
- controld_destroy_outside_events_table
- record_outside_event
- fail_incompletable_actions
- update_failcount
- controld_get_action
- get_cancel_action
- confirm_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17
18 #include <pacemaker-controld.h>
19
20 #include <crm/common/attrd_internal.h>
21 #include <crm/common/ipc_attrd_internal.h>
22
23
24
25
26
27
28
29
30
31
32
33 static GHashTable *outside_events = NULL;
34
35
36
37
38
39 void
40 controld_remove_all_outside_events(void)
41 {
42 if (outside_events != NULL) {
43 g_hash_table_remove_all(outside_events);
44 }
45 }
46
47
48
49
50
51 void
52 controld_destroy_outside_events_table(void)
53 {
54 if (outside_events != NULL) {
55 g_hash_table_destroy(outside_events);
56 outside_events = NULL;
57 }
58 }
59
60
61
62
63
64
65
66
67 static int
68 record_outside_event(gint action_num)
69 {
70 if (outside_events == NULL) {
71 outside_events = g_hash_table_new(NULL, NULL);
72 }
73
74 if (g_hash_table_add(outside_events, GINT_TO_POINTER(action_num))) {
75 return pcmk_rc_ok;
76 }
77 return pcmk_rc_already;
78 }
79
80 gboolean
81 fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
82 {
83 const char *target_uuid = NULL;
84 const char *router = NULL;
85 const char *router_uuid = NULL;
86 xmlNode *last_action = NULL;
87
88 GList *gIter = NULL;
89 GList *gIter2 = NULL;
90
91 if (graph == NULL || graph->complete) {
92 return FALSE;
93 }
94
95 gIter = graph->synapses;
96 for (; gIter != NULL; gIter = gIter->next) {
97 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
98
99 if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
100
101 continue;
102 }
103
104 gIter2 = synapse->actions;
105 for (; gIter2 != NULL; gIter2 = gIter2->next) {
106 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
107
108 if ((action->type == pcmk__pseudo_graph_action)
109 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
110 continue;
111 } else if (action->type == pcmk__cluster_graph_action) {
112 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
113
114 if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
115 continue;
116 }
117 }
118
119 target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
120 router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
121 if (router) {
122 crm_node_t *node = crm_get_peer(0, router);
123 if (node) {
124 router_uuid = node->uuid;
125 }
126 }
127
128 if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
129 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
130 pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
131 last_action = action->xml;
132 stop_te_timer(action);
133 pcmk__update_graph(graph, action);
134
135 if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
136 crm_notice("Action %d (%s) was pending on %s (offline)",
137 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
138 } else {
139 crm_info("Action %d (%s) is scheduled for %s (offline)",
140 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
141 }
142 }
143 }
144 }
145
146 if (last_action != NULL) {
147 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
148 abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
149 last_action);
150 return TRUE;
151 }
152
153 return FALSE;
154 }
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169 static gboolean
170 update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
171 int target_rc, gboolean do_update, gboolean ignore_failures)
172 {
173 guint interval_ms = 0;
174
175 char *task = NULL;
176 char *rsc_id = NULL;
177
178 const char *value = NULL;
179 const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
180 const char *on_uname = crm_peer_uname(event_node_uuid);
181 const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
182
183
184 if (rc == target_rc) {
185 return FALSE;
186 } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
187 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
188 id, rc, on_uname);
189 return FALSE;
190 }
191
192
193 CRM_CHECK(on_uname != NULL, return TRUE);
194 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
195 crm_err("Couldn't parse: %s", ID(event)); goto bail);
196
197
198 if ((interval_ms > 0)
199 || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_none)
200 || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_none)) {
201 do_update = TRUE;
202
203 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_none)) {
204 do_update = TRUE;
205 value = pcmk__s(controld_globals.transition_graph->failed_start_offset,
206 CRM_INFINITY_S);
207
208 } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_none)) {
209 do_update = TRUE;
210 value = pcmk__s(controld_globals.transition_graph->failed_stop_offset,
211 CRM_INFINITY_S);
212 }
213
214 if (do_update) {
215 pcmk__attrd_query_pair_t *fail_pair = NULL;
216 pcmk__attrd_query_pair_t *last_pair = NULL;
217 char *fail_name = NULL;
218 char *last_name = NULL;
219 GList *attrs = NULL;
220
221 uint32_t opts = pcmk__node_attr_none;
222
223 char *now = pcmk__ttoa(time(NULL));
224
225
226 if (!pcmk_str_is_infinity(value)) {
227 value = XML_NVPAIR_ATTR_VALUE "++";
228 }
229
230 if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
231 opts |= pcmk__node_attr_remote;
232 }
233
234 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
235 (ignore_failures? "last failure" : "failcount"),
236 rsc_id, on_uname, task, rc, value, now);
237
238
239 if (!ignore_failures) {
240 fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
241 CRM_ASSERT(fail_pair != NULL);
242
243 fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
244 fail_pair->name = fail_name;
245 fail_pair->value = value;
246 fail_pair->node = on_uname;
247
248 attrs = g_list_prepend(attrs, fail_pair);
249 }
250
251
252
253
254 last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
255 CRM_ASSERT(last_pair != NULL);
256
257 last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
258 last_pair->name = last_name;
259 last_pair->value = now;
260 last_pair->node = on_uname;
261
262 attrs = g_list_prepend(attrs, last_pair);
263
264 update_attrd_list(attrs, opts);
265
266 free(fail_name);
267 free(fail_pair);
268
269 free(last_name);
270 free(last_pair);
271 g_list_free(attrs);
272
273 free(now);
274 }
275
276 bail:
277 free(rsc_id);
278 free(task);
279 return TRUE;
280 }
281
282 pcmk__graph_action_t *
283 controld_get_action(int id)
284 {
285 for (GList *item = controld_globals.transition_graph->synapses;
286 item != NULL; item = item->next) {
287 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
288
289 for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
290 pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
291
292 if (action->id == id) {
293 return action;
294 }
295 }
296 }
297 return NULL;
298 }
299
300 pcmk__graph_action_t *
301 get_cancel_action(const char *id, const char *node)
302 {
303 GList *gIter = NULL;
304 GList *gIter2 = NULL;
305
306 gIter = controld_globals.transition_graph->synapses;
307 for (; gIter != NULL; gIter = gIter->next) {
308 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
309
310 gIter2 = synapse->actions;
311 for (; gIter2 != NULL; gIter2 = gIter2->next) {
312 const char *task = NULL;
313 const char *target = NULL;
314 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
315
316 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
317 if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
318 continue;
319 }
320
321 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
322 if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
323 crm_trace("Wrong key %s for %s on %s", task, id, node);
324 continue;
325 }
326
327 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
328 if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
329 crm_trace("Wrong node %s for %s on %s", target, id, node);
330 continue;
331 }
332
333 crm_trace("Found %s on %s", id, node);
334 return action;
335 }
336 }
337
338 return NULL;
339 }
340
341 bool
342 confirm_cancel_action(const char *id, const char *node_id)
343 {
344 const char *op_key = NULL;
345 const char *node_name = NULL;
346 pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
347
348 if (cancel == NULL) {
349 return FALSE;
350 }
351 op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
352 node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
353
354 stop_te_timer(cancel);
355 te_action_confirmed(cancel, controld_globals.transition_graph);
356
357 crm_info("Cancellation of %s on %s confirmed (action %d)",
358 op_key, node_name, cancel->id);
359 return TRUE;
360 }
361
362
363 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
364 "/" XML_CIB_TAG_NODE "[@" XML_ATTR_ID "='%s']"
365
366
367
368
369
370
371
372
373 pcmk__graph_action_t *
374 match_down_event(const char *target)
375 {
376 pcmk__graph_action_t *match = NULL;
377 xmlXPathObjectPtr xpath_ret = NULL;
378 GList *gIter, *gIter2;
379
380 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
381
382 for (gIter = controld_globals.transition_graph->synapses;
383 gIter != NULL && match == NULL;
384 gIter = gIter->next) {
385
386 for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
387 gIter2 != NULL && match == NULL;
388 gIter2 = gIter2->next) {
389
390 match = (pcmk__graph_action_t *) gIter2->data;
391 if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
392 xpath_ret = xpath_search(match->xml, xpath);
393 if (numXpathResults(xpath_ret) < 1) {
394 match = NULL;
395 }
396 freeXpathObject(xpath_ret);
397 } else {
398
399 match = NULL;
400 }
401 }
402 }
403
404 free(xpath);
405
406 if (match != NULL) {
407 crm_debug("Shutdown action %d (%s) found for node %s", match->id,
408 crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
409 } else {
410 crm_debug("No reason to expect node %s to be down", target);
411 }
412 return match;
413 }
414
415 void
416 process_graph_event(xmlNode *event, const char *event_node)
417 {
418 int rc = -1;
419 int target_rc = -1;
420 int status = -1;
421 int callid = -1;
422 int transition_num = -1;
423 int action_num = -1;
424 char *update_te_uuid = NULL;
425 bool ignore_failures = FALSE;
426 const char *id = NULL;
427 const char *desc = NULL;
428 const char *magic = NULL;
429 const char *uname = NULL;
430
431 CRM_ASSERT(event != NULL);
432
433
434
435
436
437 magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
438 if (magic == NULL) {
439
440 return;
441 }
442
443 crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
444 if (status == PCMK_EXEC_PENDING) {
445 return;
446 }
447
448 id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
449 crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
450 crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
451
452 rc = pcmk__effective_rc(rc);
453
454 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
455 &action_num, &target_rc) == FALSE) {
456
457 crm_err("Can't process action %s result: Incompatible versions? "
458 CRM_XS " call-id=%d", id, callid);
459 abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event);
460 return;
461 }
462
463 if (transition_num == -1) {
464
465 if (record_outside_event(action_num) != pcmk_rc_ok) {
466 crm_debug("Outside event with transition key '%s' has already been "
467 "processed", magic);
468 goto bail;
469 }
470 desc = "initiated outside of the cluster";
471 abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event",
472 event);
473
474 } else if ((action_num < 0)
475 || !pcmk__str_eq(update_te_uuid, controld_globals.te_uuid,
476 pcmk__str_none)) {
477 desc = "initiated by a different DC";
478 abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event);
479
480 } else if ((controld_globals.transition_graph->id != transition_num)
481 || controld_globals.transition_graph->complete) {
482
483
484
485 guint interval_ms = 0;
486
487 if (parse_op_key(id, NULL, NULL, &interval_ms)
488 && (interval_ms != 0)) {
489
490
491
492
493 if (status == PCMK_EXEC_CANCELLED) {
494 confirm_cancel_action(id, get_node_id(event));
495 goto bail;
496 }
497
498 desc = "arrived after initial scheduling";
499 abort_transition(INFINITY, pcmk__graph_restart,
500 "Change in recurring result", event);
501
502 } else if (controld_globals.transition_graph->id != transition_num) {
503 desc = "arrived really late";
504 abort_transition(INFINITY, pcmk__graph_restart, "Old event", event);
505 } else {
506 desc = "arrived late";
507 abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph",
508 event);
509 }
510
511 } else {
512
513 pcmk__graph_action_t *action = controld_get_action(action_num);
514
515 if (action == NULL) {
516
517 desc = "unknown";
518 abort_transition(INFINITY, pcmk__graph_restart, "Unknown event",
519 event);
520
521 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
522
523
524
525
526
527
528 crm_log_xml_debug(event, "Event already confirmed:");
529 goto bail;
530
531 } else {
532
533
534
535
536 if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
537 ignore_failures = TRUE;
538
539 } else if (rc != target_rc) {
540 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
541 }
542
543 stop_te_timer(action);
544 te_action_confirmed(action, controld_globals.transition_graph);
545
546 if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
547 abort_transition(action->synapse->priority + 1,
548 pcmk__graph_restart, "Event failed", event);
549 }
550 }
551 }
552
553 if (id == NULL) {
554 id = "unknown action";
555 }
556 uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
557 if (uname == NULL) {
558 uname = "unknown node";
559 }
560
561 if (status == PCMK_EXEC_INVALID) {
562
563 crm_info("Transition %d action %d (%s on %s): %s",
564 transition_num, action_num, id, uname,
565 pcmk_exec_status_str(status));
566
567 } else if (desc && update_failcount(event, event_node, rc, target_rc,
568 (transition_num == -1), FALSE)) {
569 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
570 CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
571 transition_num, action_num, id, uname,
572 services_ocf_exitcode_str(target_rc),
573 services_ocf_exitcode_str(rc),
574 target_rc, rc, callid, desc);
575
576 } else if (desc) {
577 crm_info("Transition %d action %d (%s on %s): %s "
578 CRM_XS " rc=%d target-rc=%d call-id=%d",
579 transition_num, action_num, id, uname,
580 desc, rc, target_rc, callid);
581
582 } else if (rc == target_rc) {
583 crm_info("Transition %d action %d (%s on %s) confirmed: %s "
584 CRM_XS " rc=%d call-id=%d",
585 transition_num, action_num, id, uname,
586 services_ocf_exitcode_str(rc), rc, callid);
587
588 } else {
589 update_failcount(event, event_node, rc, target_rc,
590 (transition_num == -1), ignore_failures);
591 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
592 CRM_XS " target-rc=%d rc=%d call-id=%d",
593 transition_num, action_num, id, uname,
594 services_ocf_exitcode_str(target_rc),
595 services_ocf_exitcode_str(rc),
596 target_rc, rc, callid);
597 }
598
599 bail:
600 free(update_te_uuid);
601 }