This source file includes following definitions.
- fail_incompletable_actions
- update_failcount
- controld_get_action
- get_cancel_action
- confirm_cancel_action
- match_down_event
- process_graph_event
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17
18 #include <pacemaker-controld.h>
19
20 char *failed_stop_offset = NULL;
21 char *failed_start_offset = NULL;
22
23 gboolean
24 fail_incompletable_actions(crm_graph_t * graph, const char *down_node)
25 {
26 const char *target_uuid = NULL;
27 const char *router = NULL;
28 const char *router_uuid = NULL;
29 xmlNode *last_action = NULL;
30
31 GList *gIter = NULL;
32 GList *gIter2 = NULL;
33
34 if (graph == NULL || graph->complete) {
35 return FALSE;
36 }
37
38 gIter = graph->synapses;
39 for (; gIter != NULL; gIter = gIter->next) {
40 synapse_t *synapse = (synapse_t *) gIter->data;
41
42 if (synapse->confirmed || synapse->failed) {
43
44 continue;
45 }
46
47 gIter2 = synapse->actions;
48 for (; gIter2 != NULL; gIter2 = gIter2->next) {
49 crm_action_t *action = (crm_action_t *) gIter2->data;
50
51 if (action->type == action_type_pseudo || action->confirmed) {
52 continue;
53 } else if (action->type == action_type_crm) {
54 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
55
56 if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
57 continue;
58 }
59 }
60
61 target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
62 router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
63 if (router) {
64 crm_node_t *node = crm_get_peer(0, router);
65 if (node) {
66 router_uuid = node->uuid;
67 }
68 }
69
70 if (pcmk__str_eq(target_uuid, down_node, pcmk__str_casei) || pcmk__str_eq(router_uuid, down_node, pcmk__str_casei)) {
71 action->failed = TRUE;
72 synapse->failed = TRUE;
73 last_action = action->xml;
74 stop_te_timer(action->timer);
75 update_graph(graph, action);
76
77 if (synapse->executed) {
78 crm_notice("Action %d (%s) was pending on %s (offline)",
79 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
80 } else {
81 crm_info("Action %d (%s) is scheduled for %s (offline)",
82 action->id, crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY), down_node);
83 }
84 }
85 }
86 }
87
88 if (last_action != NULL) {
89 crm_info("Node %s shutdown resulted in un-runnable actions", down_node);
90 abort_transition(INFINITY, tg_restart, "Node failure", last_action);
91 return TRUE;
92 }
93
94 return FALSE;
95 }
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110 static gboolean
111 update_failcount(xmlNode * event, const char *event_node_uuid, int rc,
112 int target_rc, gboolean do_update, gboolean ignore_failures)
113 {
114 guint interval_ms = 0;
115
116 char *task = NULL;
117 char *rsc_id = NULL;
118
119 const char *value = NULL;
120 const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
121 const char *on_uname = crm_peer_uname(event_node_uuid);
122 const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);
123
124
125 if (rc == target_rc) {
126 return FALSE;
127 } else if (pcmk__str_eq(origin, "build_active_RAs", pcmk__str_casei)) {
128 crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
129 id, rc, on_uname);
130 return FALSE;
131 }
132
133
134 CRM_CHECK(on_uname != NULL, return TRUE);
135 CRM_CHECK(parse_op_key(id, &rsc_id, &task, &interval_ms),
136 crm_err("Couldn't parse: %s", ID(event)); goto bail);
137
138
139 if ((interval_ms > 0) || pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)
140 || pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
141 do_update = TRUE;
142
143 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
144 do_update = TRUE;
145 if (failed_start_offset == NULL) {
146 failed_start_offset = strdup(CRM_INFINITY_S);
147 }
148 value = failed_start_offset;
149
150 } else if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
151 do_update = TRUE;
152 if (failed_stop_offset == NULL) {
153 failed_stop_offset = strdup(CRM_INFINITY_S);
154 }
155 value = failed_stop_offset;
156 }
157
158
159 if (!pcmk_str_is_infinity(value)) {
160 value = XML_NVPAIR_ATTR_VALUE "++";
161 }
162
163 if (do_update) {
164 char *now = pcmk__ttoa(time(NULL));
165 char *attr_name = NULL;
166 gboolean is_remote_node = FALSE;
167
168 if (g_hash_table_lookup(crm_remote_peer_cache, event_node_uuid)) {
169 is_remote_node = TRUE;
170 }
171
172 crm_info("Updating %s for %s on %s after failed %s: rc=%d (update=%s, time=%s)",
173 (ignore_failures? "last failure" : "failcount"),
174 rsc_id, on_uname, task, rc, value, now);
175
176
177 if (!ignore_failures) {
178 attr_name = pcmk__failcount_name(rsc_id, task, interval_ms);
179 update_attrd(on_uname, attr_name, value, NULL, is_remote_node);
180 free(attr_name);
181 }
182
183
184
185
186 attr_name = pcmk__lastfailure_name(rsc_id, task, interval_ms);
187 update_attrd(on_uname, attr_name, now, NULL, is_remote_node);
188 free(attr_name);
189
190 free(now);
191 }
192
193 bail:
194 free(rsc_id);
195 free(task);
196 return TRUE;
197 }
198
199 crm_action_t *
200 controld_get_action(int id)
201 {
202 for (GList *item = transition_graph->synapses; item; item = item->next) {
203 synapse_t *synapse = (synapse_t *) item->data;
204
205 for (GList *item2 = synapse->actions; item2; item2 = item2->next) {
206 crm_action_t *action = (crm_action_t *) item2->data;
207
208 if (action->id == id) {
209 return action;
210 }
211 }
212 }
213 return NULL;
214 }
215
216 crm_action_t *
217 get_cancel_action(const char *id, const char *node)
218 {
219 GList *gIter = NULL;
220 GList *gIter2 = NULL;
221
222 gIter = transition_graph->synapses;
223 for (; gIter != NULL; gIter = gIter->next) {
224 synapse_t *synapse = (synapse_t *) gIter->data;
225
226 gIter2 = synapse->actions;
227 for (; gIter2 != NULL; gIter2 = gIter2->next) {
228 const char *task = NULL;
229 const char *target = NULL;
230 crm_action_t *action = (crm_action_t *) gIter2->data;
231
232 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
233 if (!pcmk__str_eq(CRMD_ACTION_CANCEL, task, pcmk__str_casei)) {
234 continue;
235 }
236
237 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
238 if (!pcmk__str_eq(task, id, pcmk__str_casei)) {
239 crm_trace("Wrong key %s for %s on %s", task, id, node);
240 continue;
241 }
242
243 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
244 if (node && !pcmk__str_eq(target, node, pcmk__str_casei)) {
245 crm_trace("Wrong node %s for %s on %s", target, id, node);
246 continue;
247 }
248
249 crm_trace("Found %s on %s", id, node);
250 return action;
251 }
252 }
253
254 return NULL;
255 }
256
257 bool
258 confirm_cancel_action(const char *id, const char *node_id)
259 {
260 const char *op_key = NULL;
261 const char *node_name = NULL;
262 crm_action_t *cancel = get_cancel_action(id, node_id);
263
264 if (cancel == NULL) {
265 return FALSE;
266 }
267 op_key = crm_element_value(cancel->xml, XML_LRM_ATTR_TASK_KEY);
268 node_name = crm_element_value(cancel->xml, XML_LRM_ATTR_TARGET);
269
270 stop_te_timer(cancel->timer);
271 te_action_confirmed(cancel, transition_graph);
272
273 crm_info("Cancellation of %s on %s confirmed (action %d)",
274 op_key, node_name, cancel->id);
275 return TRUE;
276 }
277
278
279 #define XPATH_DOWNED "//" XML_GRAPH_TAG_DOWNED \
280 "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UUID "='%s']"
281
282
283
284
285
286
287
288
289 crm_action_t *
290 match_down_event(const char *target)
291 {
292 crm_action_t *match = NULL;
293 xmlXPathObjectPtr xpath_ret = NULL;
294 GList *gIter, *gIter2;
295
296 char *xpath = crm_strdup_printf(XPATH_DOWNED, target);
297
298 for (gIter = transition_graph->synapses;
299 gIter != NULL && match == NULL;
300 gIter = gIter->next) {
301
302 for (gIter2 = ((synapse_t*)gIter->data)->actions;
303 gIter2 != NULL && match == NULL;
304 gIter2 = gIter2->next) {
305
306 match = (crm_action_t*)gIter2->data;
307 if (match->executed) {
308 xpath_ret = xpath_search(match->xml, xpath);
309 if (numXpathResults(xpath_ret) < 1) {
310 match = NULL;
311 }
312 freeXpathObject(xpath_ret);
313 } else {
314
315 match = NULL;
316 }
317 }
318 }
319
320 free(xpath);
321
322 if (match != NULL) {
323 crm_debug("Shutdown action %d (%s) found for node %s", match->id,
324 crm_element_value(match->xml, XML_LRM_ATTR_TASK_KEY), target);
325 } else {
326 crm_debug("No reason to expect node %s to be down", target);
327 }
328 return match;
329 }
330
331 void
332 process_graph_event(xmlNode *event, const char *event_node)
333 {
334 int rc = -1;
335 int target_rc = -1;
336 int status = -1;
337 int callid = -1;
338 int transition_num = -1;
339 int action_num = -1;
340 char *update_te_uuid = NULL;
341 bool ignore_failures = FALSE;
342 const char *id = NULL;
343 const char *desc = NULL;
344 const char *magic = NULL;
345 const char *uname = NULL;
346
347 CRM_ASSERT(event != NULL);
348
349
350
351
352
353 magic = crm_element_value(event, XML_ATTR_TRANSITION_KEY);
354 if (magic == NULL) {
355
356 return;
357 }
358
359 crm_element_value_int(event, XML_LRM_ATTR_OPSTATUS, &status);
360 if (status == PCMK_LRM_OP_PENDING) {
361 return;
362 }
363
364 id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
365 crm_element_value_int(event, XML_LRM_ATTR_RC, &rc);
366 crm_element_value_int(event, XML_LRM_ATTR_CALLID, &callid);
367
368 rc = pcmk__effective_rc(rc);
369
370 if (decode_transition_key(magic, &update_te_uuid, &transition_num,
371 &action_num, &target_rc) == FALSE) {
372
373 crm_err("Can't process action %s result: Incompatible versions? "
374 CRM_XS " call-id=%d", id, callid);
375 abort_transition(INFINITY, tg_restart, "Bad event", event);
376 return;
377 }
378
379 if (transition_num == -1) {
380
381 desc = "initiated outside of the cluster";
382 abort_transition(INFINITY, tg_restart, "Unexpected event", event);
383
384 } else if ((action_num < 0) || !pcmk__str_eq(update_te_uuid, te_uuid, pcmk__str_none)) {
385 desc = "initiated by a different DC";
386 abort_transition(INFINITY, tg_restart, "Foreign event", event);
387
388 } else if ((transition_graph->id != transition_num)
389 || (transition_graph->complete)) {
390
391
392
393 guint interval_ms = 0;
394
395 if (parse_op_key(id, NULL, NULL, &interval_ms)
396 && (interval_ms != 0)) {
397
398
399
400
401 if (status == PCMK_LRM_OP_CANCELLED) {
402 confirm_cancel_action(id, get_node_id(event));
403 goto bail;
404 }
405
406 desc = "arrived after initial scheduling";
407 abort_transition(INFINITY, tg_restart, "Change in recurring result",
408 event);
409
410 } else if (transition_graph->id != transition_num) {
411 desc = "arrived really late";
412 abort_transition(INFINITY, tg_restart, "Old event", event);
413 } else {
414 desc = "arrived late";
415 abort_transition(INFINITY, tg_restart, "Inactive graph", event);
416 }
417
418 } else {
419
420 crm_action_t *action = controld_get_action(action_num);
421
422 if (action == NULL) {
423
424 desc = "unknown";
425 abort_transition(INFINITY, tg_restart, "Unknown event", event);
426
427 } else if (action->confirmed == TRUE) {
428
429
430
431
432
433
434 crm_log_xml_debug(event, "Event already confirmed:");
435 goto bail;
436
437 } else {
438
439
440
441
442 if (pcmk__str_eq(crm_meta_value(action->params, XML_OP_ATTR_ON_FAIL), "ignore", pcmk__str_casei)) {
443 ignore_failures = TRUE;
444
445 } else if (rc != target_rc) {
446 action->failed = TRUE;
447 }
448
449 stop_te_timer(action->timer);
450 te_action_confirmed(action, transition_graph);
451
452 if (action->failed) {
453 abort_transition(action->synapse->priority + 1, tg_restart,
454 "Event failed", event);
455 }
456 }
457 }
458
459 if (id == NULL) {
460 id = "unknown action";
461 }
462 uname = crm_element_value(event, XML_LRM_ATTR_TARGET);
463 if (uname == NULL) {
464 uname = "unknown node";
465 }
466
467 if (status == PCMK_LRM_OP_INVALID) {
468
469 crm_info("Transition %d action %d (%s on %s): %s",
470 transition_num, action_num, id, uname,
471 services_lrm_status_str(status));
472
473 } else if (desc && update_failcount(event, event_node, rc, target_rc,
474 (transition_num == -1), FALSE)) {
475 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
476 CRM_XS " target-rc=%d rc=%d call-id=%d event='%s'",
477 transition_num, action_num, id, uname,
478 services_ocf_exitcode_str(target_rc),
479 services_ocf_exitcode_str(rc),
480 target_rc, rc, callid, desc);
481
482 } else if (desc) {
483 crm_info("Transition %d action %d (%s on %s): %s "
484 CRM_XS " rc=%d target-rc=%d call-id=%d",
485 transition_num, action_num, id, uname,
486 desc, rc, target_rc, callid);
487
488 } else if (rc == target_rc) {
489 crm_info("Transition %d action %d (%s on %s) confirmed: %s "
490 CRM_XS " rc=%d call-id=%d",
491 transition_num, action_num, id, uname,
492 services_ocf_exitcode_str(rc), rc, callid);
493
494 } else {
495 update_failcount(event, event_node, rc, target_rc,
496 (transition_num == -1), ignore_failures);
497 crm_notice("Transition %d action %d (%s on %s): expected '%s' but got '%s' "
498 CRM_XS " target-rc=%d rc=%d call-id=%d",
499 transition_num, action_num, id, uname,
500 services_ocf_exitcode_str(target_rc),
501 services_ocf_exitcode_str(rc),
502 target_rc, rc, callid);
503 }
504
505 bail:
506 free(update_te_uuid);
507 }