This source file includes following definitions.
- is_matched_failure
- block_failure
- rsc_fail_name
- generate_fail_regex
- generate_fail_regexes
- pe_get_failcount
1
2
3
4
5
6
7
8 #include <crm_internal.h>
9
10 #include <sys/types.h>
11 #include <regex.h>
12 #include <glib.h>
13
14 #include <crm/crm.h>
15 #include <crm/msg_xml.h>
16 #include <crm/common/xml.h>
17 #include <crm/common/util.h>
18 #include <crm/pengine/internal.h>
19
20 static gboolean
21 is_matched_failure(const char *rsc_id, xmlNode *conf_op_xml,
22 xmlNode *lrm_op_xml)
23 {
24 gboolean matched = FALSE;
25 const char *conf_op_name = NULL;
26 int conf_op_interval = 0;
27 const char *lrm_op_task = NULL;
28 int lrm_op_interval = 0;
29 const char *lrm_op_id = NULL;
30 char *last_failure_key = NULL;
31
32 if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) {
33 return FALSE;
34 }
35
36 conf_op_name = crm_element_value(conf_op_xml, "name");
37 conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval"));
38 lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK);
39 crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval);
40
41 if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE
42 || conf_op_interval != lrm_op_interval) {
43 return FALSE;
44 }
45
46 lrm_op_id = ID(lrm_op_xml);
47 last_failure_key = generate_op_key(rsc_id, "last_failure", 0);
48
49 if (safe_str_eq(last_failure_key, lrm_op_id)) {
50 matched = TRUE;
51
52 } else {
53 char *expected_op_key = generate_op_key(rsc_id, conf_op_name,
54 conf_op_interval);
55
56 if (safe_str_eq(expected_op_key, lrm_op_id)) {
57 int rc = 0;
58 int target_rc = get_target_rc(lrm_op_xml);
59
60 crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc);
61 if (rc != target_rc) {
62 matched = TRUE;
63 }
64 }
65 free(expected_op_key);
66 }
67
68 free(last_failure_key);
69 return matched;
70 }
71
72 static gboolean
73 block_failure(node_t *node, resource_t *rsc, xmlNode *xml_op,
74 pe_working_set_t *data_set)
75 {
76 char *xml_name = clone_strip(rsc->id);
77 char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']",
78 xml_name);
79 xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath);
80 gboolean should_block = FALSE;
81
82 free(xpath);
83
84 #if 0
85
86 if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
87
88 return TRUE;
89 }
90 #endif
91
92 if (xpathObj) {
93 int max = numXpathResults(xpathObj);
94 int lpc = 0;
95
96 for (lpc = 0; lpc < max; lpc++) {
97 xmlNode *pref = getXpathResult(xpathObj, lpc);
98
99 if (xml_op) {
100 should_block = is_matched_failure(xml_name, pref, xml_op);
101 if (should_block) {
102 break;
103 }
104
105 } else {
106 const char *conf_op_name = NULL;
107 int conf_op_interval = 0;
108 char *lrm_op_xpath = NULL;
109 xmlXPathObject *lrm_op_xpathObj = NULL;
110
111 conf_op_name = crm_element_value(pref, "name");
112 conf_op_interval = crm_get_msec(crm_element_value(pref, "interval"));
113
114 lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']"
115 "//lrm_resource[@id='%s']"
116 "/lrm_rsc_op[@operation='%s'][@interval='%d']",
117 node->details->uname, xml_name,
118 conf_op_name, conf_op_interval);
119 lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath);
120
121 free(lrm_op_xpath);
122
123 if (lrm_op_xpathObj) {
124 int max2 = numXpathResults(lrm_op_xpathObj);
125 int lpc2 = 0;
126
127 for (lpc2 = 0; lpc2 < max2; lpc2++) {
128 xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj,
129 lpc2);
130
131 should_block = is_matched_failure(xml_name, pref,
132 lrm_op_xml);
133 if (should_block) {
134 break;
135 }
136 }
137 }
138 freeXpathObject(lrm_op_xpathObj);
139
140 if (should_block) {
141 break;
142 }
143 }
144 }
145 }
146
147 free(xml_name);
148 freeXpathObject(xpathObj);
149
150 return should_block;
151 }
152
153
154
155
156
157
158
159
160
161
162 static inline char *
163 rsc_fail_name(resource_t *rsc)
164 {
165 const char *name = (rsc->clone_name? rsc->clone_name : rsc->id);
166
167 return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name);
168 }
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183 static void
184 generate_fail_regex(const char *prefix, const char *rsc_name,
185 gboolean is_legacy, gboolean is_unique, regex_t *re)
186 {
187 char *pattern;
188
189
190
191
192 const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+");
193
194
195
196
197
198
199
200 const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?");
201
202 pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name,
203 instance_pattern, op_pattern);
204 CRM_LOG_ASSERT(regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) == 0);
205 free(pattern);
206 }
207
208
209
210
211
212
213
214
215
216
217
218
219 static void
220 generate_fail_regexes(resource_t *rsc, pe_working_set_t *data_set,
221 regex_t *failcount_re, regex_t *lastfailure_re)
222 {
223 char *rsc_name = rsc_fail_name(rsc);
224 const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION);
225 gboolean is_legacy = (compare_version(version, "3.0.13") < 0);
226
227 generate_fail_regex(CRM_FAIL_COUNT_PREFIX, rsc_name, is_legacy,
228 is_set(rsc->flags, pe_rsc_unique), failcount_re);
229
230 generate_fail_regex(CRM_LAST_FAILURE_PREFIX, rsc_name, is_legacy,
231 is_set(rsc->flags, pe_rsc_unique), lastfailure_re);
232
233 free(rsc_name);
234 }
235
236 int
237 pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure,
238 uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
239 {
240 char *key = NULL;
241 const char *value = NULL;
242 regex_t failcount_re, lastfailure_re;
243 int failcount = 0;
244 time_t last = 0;
245 GHashTableIter iter;
246
247 generate_fail_regexes(rsc, data_set, &failcount_re, &lastfailure_re);
248
249
250 g_hash_table_iter_init(&iter, node->details->attrs);
251 while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) {
252 if (regexec(&failcount_re, key, 0, NULL, 0) == 0) {
253 failcount = merge_weights(failcount, char2score(value));
254 } else if (regexec(&lastfailure_re, key, 0, NULL, 0) == 0) {
255 last = QB_MAX(last, crm_int_helper(value, NULL));
256 }
257 }
258
259 regfree(&failcount_re);
260 regfree(&lastfailure_re);
261
262 if ((failcount > 0) && (last > 0) && (last_failure != NULL)) {
263 *last_failure = last;
264 }
265
266
267 if ((failcount > 0) && rsc->failure_timeout
268 && block_failure(node, rsc, xml_op, data_set)) {
269
270 pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block",
271 rsc->id, rsc->failure_timeout);
272 rsc->failure_timeout = 0;
273 }
274
275
276 if (is_set(flags, pe_fc_effective) && (failcount > 0) && (last > 0)
277 && rsc->failure_timeout) {
278
279 time_t now = get_effective_time(data_set);
280
281 if (now > (last + rsc->failure_timeout)) {
282 crm_debug("Failcount for %s on %s expired after %ds",
283 rsc->id, node->details->uname, rsc->failure_timeout);
284 failcount = 0;
285 }
286 }
287
288 if (is_set(flags, pe_fc_fillers) && rsc->fillers) {
289 GListPtr gIter = NULL;
290
291 for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
292 resource_t *filler = (resource_t *) gIter->data;
293 time_t filler_last_failure = 0;
294
295 failcount += pe_get_failcount(node, filler, &filler_last_failure,
296 flags, xml_op, data_set);
297
298 if (last_failure && filler_last_failure > *last_failure) {
299 *last_failure = filler_last_failure;
300 }
301 }
302
303 if (failcount > 0) {
304 char *score = score2char(failcount);
305
306 crm_info("Container %s and the resources within it have failed %s times on %s",
307 rsc->id, score, node->details->uname);
308 free(score);
309 }
310
311 } else if (failcount > 0) {
312 char *score = score2char(failcount);
313
314 crm_info("%s has failed %s times on %s",
315 rsc->id, score, node->details->uname);
316 free(score);
317 }
318
319
320 return failcount;
321 }