This source file includes following definitions.
- fail_incompletable_actions
- update_failcount
- status_from_rc
- match_graph_event
- get_action
- get_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <crm_internal.h>
20
21 #include <sys/param.h>
22 #include <crm/crm.h>
23 #include <crm/cib.h>
24 #include <crm/msg_xml.h>
25
26 #include <crm/common/xml.h>
27 #include <tengine.h>
28
29 #include <crmd_fsa.h>
30
31 char *failed_stop_offset = NULL;
32 char *failed_start_offset = NULL;
33
34 gboolean
35 fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
36 {
37 const char *target_uuid = NULL;
38 const char *router = NULL;
39 const char *router_uuid = NULL;
40 xmlNode *last_action = NULL;
41
42 GListPtr gIter = NULL;
43 GListPtr gIter2 = NULL;
44
45 if (graph == NULL || graph->complete) {
46 return FALSE;
47 }
48
49 gIter = graph->synapses;
50 for (; gIter != NULL; gIter = gIter->next) {
51 synapse_t *synapse = (synapse_t *) gIter->data;
52
53 if (synapse->confirmed || synapse->failed) {
54
55 continue;
56 }
57
58 gIter2 = synapse->actions;
59 for (; gIter2 != NULL; gIter2 = gIter2->next) {
60 crm_action_t *action = (crm_action_t *) gIter2->data;
61
62 if (action->type == action_type_pseudo || action->confirmed) {
63 continue;
64 } else if (action->type == action_type_crm) {
65 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
66
67 if (safe_str_eq(task, CRM_OP_FENCE)) {
68 continue;
69 }
70 }
71
72 target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
73 router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
74 if (router) {
75 crm_node_t *node = crm_get_peer(0, router);
76 if (node) {
77 router_uuid = node->uuid;
78 }
79 }
80
81 if (safe_str_eq(target_uuid, down_node) || safe_str_eq(router_uuid, down_node)) {
82 action->failed = TRUE;
83 synapse->failed = TRUE;
84 last_action = action->xml;
85 stop_te_timer(action->timer);
86 update_graph(graph, action);
87
88 if (synapse->executed) {
89 crm_notice("Action %d (%s) was pending on %s (offline)",
90 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
91 } else {
92 crm_info("Action %d (%s) is scheduled for %s (offline)",
93 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
94 }
95 }
96 }
97 }
98
99 if (last_action != NULL) {
100 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
101 abort_transition(INFINITY, tg_restart, "Node failure", last_action);
102 return TRUE;
103 }
104
105 return FALSE;
106 }
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 static gboolean
122 update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
123 int target_rc, gboolean do_update, gboolean ignore_failures)
124 {
125 int interval = 0;
126
127 char *task = NULL;
128 char *rsc_id = NULL;
129
130 const char *value = NULL;
131 const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
132 const char *on_uname = crm_peer_uname(event_node_uuid);
133 const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
134
135
136
137
138 if ((rc == CRM_DIRECT_NACK_RC) || (rc == target_rc)) {
139 return FALSE;
140 } else if (safe_str_eq(origin, "build_active_RAs")) {
141 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
142 id, rc, on_uname);
143 return FALSE;
144 }
145
146
147 CRM_CHECK(on_uname != NULL, return TRUE);
148 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval),
149 crm_err("Couldn't parse: %s", ID(event)); goto bail);
150 CRM_CHECK(task != NULL, goto bail);
151 CRM_CHECK(rsc_id != NULL, goto bail);
152
153
154 if ((interval > 0) || safe_str_eq(task, CRMD_ACTION_PROMOTE)
155 || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
156 do_update = TRUE;
157
158 } else if (safe_str_eq(task, CRMD_ACTION_START)) {
159 do_update = TRUE;
160 if (failed_start_offset == NULL) {
161 failed_start_offset = strdup(INFINITY_S);
162 }
163 value = failed_start_offset;
164
165 } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
166 do_update = TRUE;
167 if (failed_stop_offset == NULL) {
168 failed_stop_offset = strdup(INFINITY_S);
169 }
170 value = failed_stop_offset;
171 }
172
173
174 if (value == NULL || safe_str_neq(value, INFINITY_S)) {
175 value = XML_NVPAIR_ATTR_VALUE "++";
176 }
177
178 if (do_update) {
179 char *now = crm_itoa(time(NULL));
180 char *attr_name = NULL;
181 gboolean is_remote_node = FALSE;
182
183 if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
184 is_remote_node = TRUE;
185 }
186
187 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
188 (ignore_failures? "last failure" : "failcount"),
189 rsc_id, on_uname, task, rc, value, now);
190
191
192 if (!ignore_failures) {
193 attr_name = crm_failcount_name(rsc_id, task, interval);
194 update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
195 free(attr_name);
196 }
197
198
199
200
201 attr_name = crm_lastfailure_name(rsc_id, task, interval);
202 update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
203 free(attr_name);
204
205 free(now);
206 }
207
208 bail:
209 free(rsc_id);
210 free(task);
211 return TRUE;
212 }
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228 static int
229 status_from_rc(crm_action_t * action, int orig_status, int rc, int target_rc)
230 {
231 if (target_rc == rc) {
232 crm_trace("Target rc: == %d", rc);
233 if (orig_status != PCMK_LRM_OP_DONE) {
234 crm_trace("Re-mapping op status to PCMK_LRM_OP_DONE for rc=%d", rc);
235 }
236 return PCMK_LRM_OP_DONE;
237 }
238
239 if (rc != CRM_DIRECT_NACK_RC) {
240 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
241 const char *uname = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
242
243 crm_warn("Action %d (%s) on %s failed (target: %d vs. rc: %d): %s",
244 action->id, task, uname, target_rc, rc,
245 services_lrm_status_str(PCMK_LRM_OP_ERROR));
246 }
247 return PCMK_LRM_OP_ERROR;
248 }
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264 static void
265 match_graph_event(crm_action_t *action, xmlNode *event, int op_status,
266 int op_rc, int target_rc, gboolean ignore_failures)
267 {
268 const char *target = NULL;
269 const char *this_event = NULL;
270 const char *ignore_s = "";
271
272
273 op_status = status_from_rc(action, op_status, op_rc, target_rc);
274
275
276 switch (op_status) {
277 case PCMK_LRM_OP_DONE:
278 break;
279 case PCMK_LRM_OP_ERROR:
280 case PCMK_LRM_OP_TIMEOUT:
281 case PCMK_LRM_OP_NOTSUPPORTED:
282 if (ignore_failures) {
283 ignore_s = ", ignoring failure";
284 } else {
285 action->failed = TRUE;
286 }
287 break;
288 case PCMK_LRM_OP_CANCELLED:
289
290 crm_err("Don't know what to do for cancelled ops yet");
291 break;
292 default:
293
294
295
296
297
298 action->failed = TRUE;
299 crm_err("Unsupported action result: %d", op_status);
300 }
301
302
303 stop_te_timer(action->timer);
304 te_action_confirmed(action);
305
306 update_graph(transition_graph, action);
307 trigger_graph();
308
309 if (action->failed) {
310 abort_transition(action->synapse->priority + 1, tg_restart, "Event failed", event);
311 }
312
313 this_event = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
314 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
315 crm_info("Action %s (%d) confirmed on %s (rc=%d%s)",
316 crm_str(this_event), action->id, crm_str(target), op_rc, ignore_s);
317 }
318
319 crm_action_t *
320 get_action(int id, gboolean confirmed)
321 {
322 GListPtr gIter = NULL;
323 GListPtr gIter2 = NULL;
324
325 gIter = transition_graph->synapses;
326 for (; gIter != NULL; gIter = gIter->next) {
327 synapse_t *synapse = (synapse_t *) gIter->data;
328
329 gIter2 = synapse->actions;
330 for (; gIter2 != NULL; gIter2 = gIter2->next) {
331 crm_action_t *action = (crm_action_t *) gIter2->data;
332
333 if (action->id == id) {
334 if (confirmed) {
335 stop_te_timer(action->timer);
336 te_action_confirmed(action);
337 }
338 return action;
339 }
340 }
341 }
342
343 return NULL;
344 }
345
346 crm_action_t *
347 get_cancel_action(const char *id, const char *node)
348 {
349 GListPtr gIter = NULL;
350 GListPtr gIter2 = NULL;
351
352 gIter = transition_graph->synapses;
353 for (; gIter != NULL; gIter = gIter->next) {
354 synapse_t *synapse = (synapse_t *) gIter->data;
355
356 gIter2 = synapse->actions;
357 for (; gIter2 != NULL; gIter2 = gIter2->next) {
358 const char *task = NULL;
359 const char *target = NULL;
360 crm_action_t *action = (crm_action_t *) gIter2->data;
361
362 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
363 if (safe_str_neq(CRMD_ACTION_CANCEL, task)) {
364 continue;
365 }
366
367 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
368 if (safe_str_neq(task, id)) {
369 crm_trace("Wrong key %s for %s on %s", task, id, node);
370 continue;
371 }
372
373 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
374 if (node && safe_str_neq(target, node)) {
375 crm_trace("Wrong node %s for %s on %s", target, id, node);
376 continue;
377 }
378
379 crm_trace("Found %s on %s", id, node);
380 return action;
381 }
382 }
383
384 return NULL;
385 }
386
387
388 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
389 "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
390
391
392
393
394
395
396
397
398
399 crm_action_t *
400 match_down_event(const char *target, bool quiet)
401 {
402 crm_action_t *match = NULL;
403 xmlXPathObjectPtr xpath_ret = NULL;
404 GListPtr gIter, gIter2;
405
406 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
407
408 for (gIter = transition_graph->synapses;
409 gIter != NULL && match == NULL;
410 gIter = gIter->next) {
411
412 for (gIter2 = ((synapse_t*)gIter->data)->actions;
413 gIter2 != NULL && match == NULL;
414 gIter2 = gIter2->next) {
415
416 match = (crm_action_t*)gIter2->data;
417 xpath_ret = xpath_search(match->xml, xpath);
418 if (numXpathResults(xpath_ret) < 1) {
419 match = NULL;
420 }
421 freeXpathObject(xpath_ret);
422 }
423 }
424
425 free(xpath);
426
427 if (match != NULL) {
428 crm_debug("Shutdown action found for node %s: action %d (%s)",
429 target, match->id,
430 crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY));
431
432 } else if(quiet == FALSE) {
433 crm_warn("No reason to expect node %s to be down", target);
434 }
435
436 return match;
437 }
438
439 gboolean
440 process_graph_event(xmlNode * event, const char *event_node)
441 {
442 int rc = -1;
443 int status = -1;
444 int callid = -1;
445
446 int action_num = -1;
447 crm_action_t *action = NULL;
448
449 int target_rc = -1;
450 int transition_num = -1;
451 char *update_te_uuid = NULL;
452
453 gboolean stop_early = FALSE;
454 gboolean ignore_failures = FALSE;
455 const char *id = NULL;
456 const char *desc = NULL;
457 const char *magic = NULL;
458
459 CRM_ASSERT(event != NULL);
460
461
462
463
464
465 id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
466 crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
467 crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
468 crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
469
470 magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
471 if (magic == NULL) {
472
473 return FALSE;
474 }
475
476 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
477 &action_num, &target_rc) == FALSE) {
478 crm_err("Invalid event %s.%d detected: %s", id, callid, magic);
479 abort_transition(INFINITY, tg_restart, "Bad event", event);
480 return FALSE;
481 }
482
483 if (status == PCMK_LRM_OP_PENDING) {
484 goto bail;
485 }
486
487 if (transition_num == -1) {
488 desc = "initiated outside of the cluster";
489 abort_transition(INFINITY, tg_restart, "Unexpected event", event);
490
491 } else if ((action_num < 0) || (crm_str_eq(update_te_uuid, te_uuid, TRUE) == FALSE)) {
492 desc = "initiated by a different node";
493 abort_transition(INFINITY, tg_restart, "Foreign event", event);
494 stop_early = TRUE;
495
496 } else if (transition_graph->id != transition_num) {
497 desc = "arrived really late";
498 abort_transition(INFINITY, tg_restart, "Old event", event);
499 stop_early = TRUE;
500
501 } else if (transition_graph->complete) {
502 desc = "arrived late";
503 abort_transition(INFINITY, tg_restart, "Inactive graph", event);
504
505 } else {
506 action = get_action(action_num, FALSE);
507
508 if (action == NULL) {
509 desc = "unknown";
510 abort_transition(INFINITY, tg_restart, "Unknown event", event);
511
512 } else {
513 ignore_failures = safe_str_eq(
514 crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore");
515 match_graph_event(action, event, status, rc, target_rc, ignore_failures);
516 }
517 }
518
519 if (action && (rc == target_rc)) {
520 crm_trace("Processed update to %s: %s", id, magic);
521 } else {
522 if (update_failcount(event, event_node, rc, target_rc,
523 (transition_num == -1), ignore_failures)) {
524
525 stop_early = FALSE;
526 desc = "failed";
527 }
528 crm_info("Detected action (%d.%d) %s.%d=%s: %s", transition_num,
529 action_num, id, callid, services_ocf_exitcode_str(rc), desc);
530 }
531
532 bail:
533 free(update_te_uuid);
534 return stop_early;
535 }