This source file includes following definitions.
- fail_incompletable_actions
- update_failcount
- controld_get_action
- get_cancel_action
- confirm_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17
18 #include <pacemaker-controld.h>
19
20 #include <crm/common/attrd_internal.h>
21 #include <crm/common/ipc_attrd_internal.h>
22
23 char *failed_stop_offset = NULL;
24 char *failed_start_offset = NULL;
25
26 gboolean
27 fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node)
28 {
29 const char *target_uuid = NULL;
30 const char *router = NULL;
31 const char *router_uuid = NULL;
32 xmlNode *last_action = NULL;
33
34 GList *gIter = NULL;
35 GList *gIter2 = NULL;
36
37 if (graph == NULL || graph->complete) {
38 return FALSE;
39 }
40
41 gIter = graph->synapses;
42 for (; gIter != NULL; gIter = gIter->next) {
43 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
44
45 if (pcmk_any_flags_set(synapse->flags, pcmk__synapse_confirmed|pcmk__synapse_failed)) {
46
47 continue;
48 }
49
50 gIter2 = synapse->actions;
51 for (; gIter2 != NULL; gIter2 = gIter2->next) {
52 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
53
54 if ((action->type == pcmk__pseudo_graph_action)
55 || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
56 continue;
57 } else if (action->type == pcmk__cluster_graph_action) {
58 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
59
60 if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
61 continue;
62 }
63 }
64
65 target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
66 router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
67 if (router) {
68 crm_node_t *node = crm_get_peer(0, router);
69 if (node) {
70 router_uuid = node->uuid;
71 }
72 }
73
74 if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
75 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
76 pcmk__set_synapse_flags(synapse, pcmk__synapse_failed);
77 last_action = action->xml;
78 stop_te_timer(action);
79 pcmk__update_graph(graph, action);
80
81 if (pcmk_is_set(synapse->flags, pcmk__synapse_executed)) {
82 crm_notice("Action %d (%s) was pending on %s (offline)",
83 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
84 } else {
85 crm_info("Action %d (%s) is scheduled for %s (offline)",
86 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
87 }
88 }
89 }
90 }
91
92 if (last_action != NULL) {
93 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
94 abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
95 last_action);
96 return TRUE;
97 }
98
99 return FALSE;
100 }
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115 static gboolean
116 update_failcount(const xmlNode *event, const char *event_node_uuid, int rc,
117 int target_rc, gboolean do_update, gboolean ignore_failures)
118 {
119 guint interval_ms = 0;
120
121 char *task = NULL;
122 char *rsc_id = NULL;
123
124 const char *value = NULL;
125 const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
126 const char *on_uname = crm_peer_uname(event_node_uuid);
127 const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
128
129
130 if (rc == target_rc) {
131 return FALSE;
132 } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
133 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
134 id, rc, on_uname);
135 return FALSE;
136 }
137
138
139 CRM_CHECK(on_uname != NULL, return TRUE);
140 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
141 crm_err("Couldn't parse: %s", ID(event)); goto bail);
142
143
144 if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)
145 || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
146 do_update = TRUE;
147
148 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
149 do_update = TRUE;
150 if (failed_start_offset == NULL) {
151 failed_start_offset = strdup(CRM_INFINITY_S);
152 }
153 value = failed_start_offset;
154
155 } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
156 do_update = TRUE;
157 if (failed_stop_offset == NULL) {
158 failed_stop_offset = strdup(CRM_INFINITY_S);
159 }
160 value = failed_stop_offset;
161 }
162
163
164 if (!pcmk_str_is_infinity(value)) {
165 value = XML_NVPAIR_ATTR_VALUE "++";
166 }
167
168 if (do_update) {
169 pcmk__attrd_query_pair_t *fail_pair = NULL;
170 pcmk__attrd_query_pair_t *last_pair = NULL;
171 char *fail_name = NULL;
172 char *last_name = NULL;
173 GList *attrs = NULL;
174
175 uint32_t opts = pcmk__node_attr_none;
176
177 char *now = pcmk__ttoa(time(NULL));
178
179 if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
180 opts |= pcmk__node_attr_remote;
181 }
182
183 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
184 (ignore_failures? "last failure" : "failcount"),
185 rsc_id, on_uname, task, rc, value, now);
186
187
188 if (!ignore_failures) {
189 fail_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
190 CRM_ASSERT(fail_pair != NULL);
191
192 fail_name = pcmk__failcount_name(rsc_id, task, interval_ms);
193 fail_pair->name = fail_name;
194 fail_pair->value = value;
195 fail_pair->node = on_uname;
196
197 attrs = g_list_prepend(attrs, fail_pair);
198 }
199
200
201
202
203 last_pair = calloc(1, sizeof(pcmk__attrd_query_pair_t));
204 CRM_ASSERT(last_pair != NULL);
205
206 last_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
207 last_pair->name = last_name;
208 last_pair->value = now;
209 last_pair->node = on_uname;
210
211 attrs = g_list_prepend(attrs, last_pair);
212
213 update_attrd_list(attrs, opts);
214
215 if (!ignore_failures) {
216 free(fail_name);
217 free(fail_pair);
218 }
219
220 free(last_name);
221 free(last_pair);
222 g_list_free(attrs);
223
224 free(now);
225 }
226
227 bail:
228 free(rsc_id);
229 free(task);
230 return TRUE;
231 }
232
233 pcmk__graph_action_t *
234 controld_get_action(int id)
235 {
236 for (GList *item = transition_graph->synapses; item; item = item->next) {
237 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) item->data;
238
239 for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
240 pcmk__graph_action_t *action = (pcmk__graph_action_t *) item2->data;
241
242 if (action->id == id) {
243 return action;
244 }
245 }
246 }
247 return NULL;
248 }
249
250 pcmk__graph_action_t *
251 get_cancel_action(const char *id, const char *node)
252 {
253 GList *gIter = NULL;
254 GList *gIter2 = NULL;
255
256 gIter = transition_graph->synapses;
257 for (; gIter != NULL; gIter = gIter->next) {
258 pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) gIter->data;
259
260 gIter2 = synapse->actions;
261 for (; gIter2 != NULL; gIter2 = gIter2->next) {
262 const char *task = NULL;
263 const char *target = NULL;
264 pcmk__graph_action_t *action = (pcmk__graph_action_t *) gIter2->data;
265
266 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
267 if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
268 continue;
269 }
270
271 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
272 if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
273 crm_trace("Wrong key %s for %s on %s", task, id, node);
274 continue;
275 }
276
277 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
278 if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
279 crm_trace("Wrong node %s for %s on %s", target, id, node);
280 continue;
281 }
282
283 crm_trace("Found %s on %s", id, node);
284 return action;
285 }
286 }
287
288 return NULL;
289 }
290
291 bool
292 confirm_cancel_action(const char *id, const char *node_id)
293 {
294 const char *op_key = NULL;
295 const char *node_name = NULL;
296 pcmk__graph_action_t *cancel = get_cancel_action(id, node_id);
297
298 if (cancel == NULL) {
299 return FALSE;
300 }
301 op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
302 node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
303
304 stop_te_timer(cancel);
305 te_action_confirmed(cancel, transition_graph);
306
307 crm_info("Cancellation of %s on %s confirmed (action %d)",
308 op_key, node_name, cancel->id);
309 return TRUE;
310 }
311
312
313 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
314 "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
315
316
317
318
319
320
321
322
323 pcmk__graph_action_t *
324 match_down_event(const char *target)
325 {
326 pcmk__graph_action_t *match = NULL;
327 xmlXPathObjectPtr xpath_ret = NULL;
328 GList *gIter, *gIter2;
329
330 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
331
332 for (gIter = transition_graph->synapses;
333 gIter != NULL && match == NULL;
334 gIter = gIter->next) {
335
336 for (gIter2 = ((pcmk__graph_synapse_t * ) gIter->data)->actions;
337 gIter2 != NULL && match == NULL;
338 gIter2 = gIter2->next) {
339
340 match = (pcmk__graph_action_t *) gIter2->data;
341 if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
342 xpath_ret = xpath_search(match->xml, xpath);
343 if (numXpathResults(xpath_ret) < 1) {
344 match = NULL;
345 }
346 freeXpathObject(xpath_ret);
347 } else {
348
349 match = NULL;
350 }
351 }
352 }
353
354 free(xpath);
355
356 if (match != NULL) {
357 crm_debug("Shutdown action %d (%s) found for node %s", match->id,
358 crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
359 } else {
360 crm_debug("No reason to expect node %s to be down", target);
361 }
362 return match;
363 }
364
365 void
366 process_graph_event(xmlNode *event, const char *event_node)
367 {
368 int rc = -1;
369 int target_rc = -1;
370 int status = -1;
371 int callid = -1;
372 int transition_num = -1;
373 int action_num = -1;
374 char *update_te_uuid = NULL;
375 bool ignore_failures = FALSE;
376 const char *id = NULL;
377 const char *desc = NULL;
378 const char *magic = NULL;
379 const char *uname = NULL;
380
381 CRM_ASSERT(event != NULL);
382
383
384
385
386
387 magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
388 if (magic == NULL) {
389
390 return;
391 }
392
393 crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
394 if (status == PCMK_EXEC_PENDING) {
395 return;
396 }
397
398 id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
399 crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
400 crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
401
402 rc = pcmk__effective_rc(rc);
403
404 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
405 &action_num, &target_rc) == FALSE) {
406
407 crm_err("Can't process action %s result: Incompatible versions? "
408 CRM_XS " call-id=%d", id, callid);
409 abort_transition(INFINITY, pcmk__graph_restart, "Bad event", event);
410 return;
411 }
412
413 if (transition_num == -1) {
414
415 desc = "initiated outside of the cluster";
416 abort_transition(INFINITY, pcmk__graph_restart, "Unexpected event",
417 event);
418
419 } else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) {
420 desc = "initiated by a different DC";
421 abort_transition(INFINITY, pcmk__graph_restart, "Foreign event", event);
422
423 } else if ((transition_graph->id != transition_num)
424 || transition_graph->complete) {
425
426
427
428 guint interval_ms = 0;
429
430 if (parse_op_key(id, NULL, NULL, &interval_ms)
431 && (interval_ms != 0)) {
432
433
434
435
436 if (status == PCMK_EXEC_CANCELLED) {
437 confirm_cancel_action(id, get_node_id(event));
438 goto bail;
439 }
440
441 desc = "arrived after initial scheduling";
442 abort_transition(INFINITY, pcmk__graph_restart,
443 "Change in recurring result", event);
444
445 } else if (transition_graph->id != transition_num) {
446 desc = "arrived really late";
447 abort_transition(INFINITY, pcmk__graph_restart, "Old event", event);
448 } else {
449 desc = "arrived late";
450 abort_transition(INFINITY, pcmk__graph_restart, "Inactive graph",
451 event);
452 }
453
454 } else {
455
456 pcmk__graph_action_t *action = controld_get_action(action_num);
457
458 if (action == NULL) {
459
460 desc = "unknown";
461 abort_transition(INFINITY, pcmk__graph_restart, "Unknown event",
462 event);
463
464 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
465
466
467
468
469
470
471 crm_log_xml_debug(event, "Event already confirmed:");
472 goto bail;
473
474 } else {
475
476
477
478
479 if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
480 ignore_failures = TRUE;
481
482 } else if (rc != target_rc) {
483 pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
484 }
485
486 stop_te_timer(action);
487 te_action_confirmed(action, transition_graph);
488
489 if (pcmk_is_set(action->flags, pcmk__graph_action_failed)) {
490 abort_transition(action->synapse->priority + 1,
491 pcmk__graph_restart, "Event failed", event);
492 }
493 }
494 }
495
496 if (id == NULL) {
497 id = "unknown action";
498 }
499 uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
500 if (uname == NULL) {
501 uname = "unknown node";
502 }
503
504 if (status == PCMK_EXEC_INVALID) {
505
506 crm_info("Transition %d action %d (%s on %s): %s",
507 transition_num, action_num, id, uname,
508 pcmk_exec_status_str(status));
509
510 } else if (desc && update_failcount(event, event_node, rc, target_rc,
511 (transition_num == -1), FALSE)) {
512 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
513 CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
514 transition_num, action_num, id, uname,
515 services_ocf_exitcode_str(target_rc),
516 services_ocf_exitcode_str(rc),
517 target_rc, rc, callid, desc);
518
519 } else if (desc) {
520 crm_info("Transition %d action %d (%s on %s): %s "
521 CRM_XS " rc=%d target-rc=%d call-id=%d",
522 transition_num, action_num, id, uname,
523 desc, rc, target_rc, callid);
524
525 } else if (rc == target_rc) {
526 crm_info("Transition %d action %d (%s on %s) confirmed: %s "
527 CRM_XS " rc=%d call-id=%d",
528 transition_num, action_num, id, uname,
529 services_ocf_exitcode_str(rc), rc, callid);
530
531 } else {
532 update_failcount(event, event_node, rc, target_rc,
533 (transition_num == -1), ignore_failures);
534 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
535 CRM_XS " target-rc=%d rc=%d call-id=%d",
536 transition_num, action_num, id, uname,
537 services_ocf_exitcode_str(target_rc),
538 services_ocf_exitcode_str(rc),
539 target_rc, rc, callid);
540 }
541
542 bail:
543 free(update_te_uuid);
544 }