This source file includes following definitions.
- is_matched_failure
- block_failure
- rsc_fail_name
- generate_fail_regex
- generate_fail_regexes
- pe_get_failcount
- pe__clear_failcount
1
2
3
4
5
6
7
8 #include <crm_internal.h>
9
10 #include <sys/types.h>
11 #include <regex.h>
12 #include <glib.h>
13
14 #include <crm/crm.h>
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17 #include <crm/common/util.h>
18 #include <crm/pengine/internal.h>
19
20 static gboolean
21 is_matched_failure(const char *rsc_id, const xmlNode *conf_op_xml,
22 const xmlNode *lrm_op_xml)
23 {
24 gboolean matched = FALSE;
25 const char *conf_op_name = NULL;
26 const char *lrm_op_task = NULL;
27 const char *conf_op_interval_spec = NULL;
28 guint conf_op_interval_ms = 0;
29 guint lrm_op_interval_ms = 0;
30 const char *lrm_op_id = NULL;
31 char *last_failure_key = NULL;
32
33 if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) {
34 return FALSE;
35 }
36
37
38 conf_op_name = crm_element_value(conf_op_xml, "name");
39 conf_op_interval_spec = crm_element_value(conf_op_xml,
40 XML_LRM_ATTR_INTERVAL);
41 conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec);
42
43
44 lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK);
45 crm_element_value_ms(lrm_op_xml, XML_LRM_ATTR_INTERVAL_MS,
46 &lrm_op_interval_ms);
47
48 if ((conf_op_interval_ms != lrm_op_interval_ms)
49 || !pcmk__str_eq(conf_op_name, lrm_op_task, pcmk__str_casei)) {
50 return FALSE;
51 }
52
53 lrm_op_id = ID(lrm_op_xml);
54 last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0);
55
56 if (pcmk__str_eq(last_failure_key, lrm_op_id, pcmk__str_casei)) {
57 matched = TRUE;
58
59 } else {
60 char *expected_op_key = pcmk__op_key(rsc_id, conf_op_name,
61 conf_op_interval_ms);
62
63 if (pcmk__str_eq(expected_op_key, lrm_op_id, pcmk__str_casei)) {
64 int rc = 0;
65 int target_rc = pe__target_rc_from_xml(lrm_op_xml);
66
67 crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc);
68 if (rc != target_rc) {
69 matched = TRUE;
70 }
71 }
72 free(expected_op_key);
73 }
74
75 free(last_failure_key);
76 return matched;
77 }
78
79 static gboolean
80 block_failure(const pe_node_t *node, pe_resource_t *rsc, const xmlNode *xml_op)
81 {
82 char *xml_name = clone_strip(rsc->id);
83
84
85
86
87
88
89
90
91
92
93
94 char *xpath = crm_strdup_printf("//" XML_CIB_TAG_RESOURCE
95 "[@" XML_ATTR_ID "='%s']"
96 "//" XML_ATTR_OP
97 "[@" XML_OP_ATTR_ON_FAIL "='block']",
98 xml_name);
99
100 xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath);
101 gboolean should_block = FALSE;
102
103 free(xpath);
104
105 if (xpathObj) {
106 int max = numXpathResults(xpathObj);
107 int lpc = 0;
108
109 for (lpc = 0; lpc < max; lpc++) {
110 xmlNode *pref = getXpathResult(xpathObj, lpc);
111
112 if (xml_op) {
113 should_block = is_matched_failure(xml_name, pref, xml_op);
114 if (should_block) {
115 break;
116 }
117
118 } else {
119 const char *conf_op_name = NULL;
120 const char *conf_op_interval_spec = NULL;
121 guint conf_op_interval_ms = 0;
122 char *lrm_op_xpath = NULL;
123 xmlXPathObject *lrm_op_xpathObj = NULL;
124
125
126 conf_op_name = crm_element_value(pref, "name");
127 conf_op_interval_spec = crm_element_value(pref, XML_LRM_ATTR_INTERVAL);
128 conf_op_interval_ms = crm_parse_interval_spec(conf_op_interval_spec);
129
130 #define XPATH_FMT "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
131 "//" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \
132 "/" XML_LRM_TAG_RSC_OP "[@" XML_LRM_ATTR_TASK "='%s']" \
133 "[@" XML_LRM_ATTR_INTERVAL "='%u']"
134
135 lrm_op_xpath = crm_strdup_printf(XPATH_FMT,
136 node->details->uname, xml_name,
137 conf_op_name,
138 conf_op_interval_ms);
139 lrm_op_xpathObj = xpath_search(rsc->cluster->input, lrm_op_xpath);
140
141 free(lrm_op_xpath);
142
143 if (lrm_op_xpathObj) {
144 int max2 = numXpathResults(lrm_op_xpathObj);
145 int lpc2 = 0;
146
147 for (lpc2 = 0; lpc2 < max2; lpc2++) {
148 xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj,
149 lpc2);
150
151 should_block = is_matched_failure(xml_name, pref,
152 lrm_op_xml);
153 if (should_block) {
154 break;
155 }
156 }
157 }
158 freeXpathObject(lrm_op_xpathObj);
159
160 if (should_block) {
161 break;
162 }
163 }
164 }
165 }
166
167 free(xml_name);
168 freeXpathObject(xpathObj);
169
170 return should_block;
171 }
172
173
174
175
176
177
178
179
180
181
182 static inline char *
183 rsc_fail_name(const pe_resource_t *rsc)
184 {
185 const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);
186
187 return pcmk_is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name);
188 }
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204 static int
205 generate_fail_regex(const char *prefix, const char *rsc_name,
206 gboolean is_legacy, gboolean is_unique, regex_t *re)
207 {
208 char *pattern;
209
210
211
212
213 const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+");
214
215
216
217
218
219
220
221 const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?");
222
223 pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name,
224 instance_pattern, op_pattern);
225 if (regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) != 0) {
226 free(pattern);
227 return EINVAL;
228 }
229
230 free(pattern);
231 return pcmk_rc_ok;
232 }
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247 static int
248 generate_fail_regexes(const pe_resource_t *rsc,
249 const pe_working_set_t *data_set,
250 regex_t *failcount_re, regex_t *lastfailure_re)
251 {
252 char *rsc_name = rsc_fail_name(rsc);
253 const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION);
254 gboolean is_legacy = (compare_version(version, "3.0.13") < 0);
255 int rc = pcmk_rc_ok;
256
257 if (generate_fail_regex(PCMK__FAIL_COUNT_PREFIX, rsc_name, is_legacy,
258 pcmk_is_set(rsc->flags, pe_rsc_unique),
259 failcount_re) != pcmk_rc_ok) {
260 rc = EINVAL;
261
262 } else if (generate_fail_regex(PCMK__LAST_FAILURE_PREFIX, rsc_name,
263 is_legacy,
264 pcmk_is_set(rsc->flags, pe_rsc_unique),
265 lastfailure_re) != pcmk_rc_ok) {
266 rc = EINVAL;
267 regfree(failcount_re);
268 }
269
270 free(rsc_name);
271 return rc;
272 }
273
274 int
275 pe_get_failcount(const pe_node_t *node, pe_resource_t *rsc,
276 time_t *last_failure, uint32_t flags, const xmlNode *xml_op)
277 {
278 char *key = NULL;
279 const char *value = NULL;
280 regex_t failcount_re, lastfailure_re;
281 int failcount = 0;
282 time_t last = 0;
283 GHashTableIter iter;
284
285 CRM_CHECK(generate_fail_regexes(rsc, rsc->cluster, &failcount_re,
286 &lastfailure_re) == pcmk_rc_ok,
287 return 0);
288
289
290 g_hash_table_iter_init(&iter, node->details->attrs);
291 while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
292 if (regexec(&failcount_re, key, 0, NULL, 0) == 0) {
293 failcount = pcmk__add_scores(failcount, char2score(value));
294 crm_trace("Added %s (%s) to %s fail count (now %s)",
295 key, value, rsc->id, pcmk_readable_score(failcount));
296 } else if (regexec(&lastfailure_re, key, 0, NULL, 0) == 0) {
297 long long last_ll;
298
299 if (pcmk__scan_ll(value, &last_ll, 0LL) == pcmk_rc_ok) {
300 last = (time_t) QB_MAX(last, last_ll);
301 }
302 }
303 }
304
305 regfree(&failcount_re);
306 regfree(&lastfailure_re);
307
308 if ((failcount > 0) && (last > 0) && (last_failure != NULL)) {
309 *last_failure = last;
310 }
311
312
313 if ((failcount > 0) && rsc->failure_timeout
314 && block_failure(node, rsc, xml_op)) {
315
316 pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block",
317 rsc->failure_timeout, rsc->id);
318 rsc->failure_timeout = 0;
319 }
320
321
322 if (pcmk_is_set(flags, pe_fc_effective) && (failcount > 0) && (last > 0)
323 && rsc->failure_timeout) {
324
325 time_t now = get_effective_time(rsc->cluster);
326
327 if (now > (last + rsc->failure_timeout)) {
328 crm_debug("Failcount for %s on %s expired after %ds",
329 rsc->id, pe__node_name(node), rsc->failure_timeout);
330 failcount = 0;
331 }
332 }
333
334
335
336
337
338
339
340
341
342
343
344 if (pcmk_is_set(flags, pe_fc_fillers) && rsc->fillers
345 && !pe_rsc_is_bundled(rsc)) {
346
347 GList *gIter = NULL;
348
349 for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
350 pe_resource_t *filler = (pe_resource_t *) gIter->data;
351 time_t filler_last_failure = 0;
352
353 failcount += pe_get_failcount(node, filler, &filler_last_failure,
354 flags, xml_op);
355
356 if (last_failure && filler_last_failure > *last_failure) {
357 *last_failure = filler_last_failure;
358 }
359 }
360
361 if (failcount > 0) {
362 crm_info("Container %s and the resources within it "
363 "have failed %s time%s on %s",
364 rsc->id, pcmk_readable_score(failcount),
365 pcmk__plural_s(failcount), pe__node_name(node));
366 }
367
368 } else if (failcount > 0) {
369 crm_info("%s has failed %s time%s on %s",
370 rsc->id, pcmk_readable_score(failcount),
371 pcmk__plural_s(failcount), pe__node_name(node));
372 }
373
374 return failcount;
375 }
376
377
378
379
380
381
382
383
384
385
386
387 pe_action_t *
388 pe__clear_failcount(pe_resource_t *rsc, const pe_node_t *node,
389 const char *reason, pe_working_set_t *data_set)
390 {
391 char *key = NULL;
392 pe_action_t *clear = NULL;
393
394 CRM_CHECK(rsc && node && reason && data_set, return NULL);
395
396 key = pcmk__op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
397 clear = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE,
398 data_set);
399 add_hash_param(clear->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
400 crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s",
401 rsc->id, pe__node_name(node), reason, clear->uuid);
402 return clear;
403 }