This source file includes following definitions.
- stop_te_timer
- te_graph_trigger
- controld_init_transition_trigger
- controld_destroy_transition_trigger
- controld_trigger_graph_as
- abort_timer_popped
- abort_after_delay
- free_node_pending_timer
- node_pending_timer_popped
- init_node_pending_timer
- remove_node_pending_timer
- controld_node_pending_timer
- controld_free_node_pending_timers
- abort2text
- update_abort_priority
- abort_transition_graph
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/common/xml.h>
13
14 #include <pacemaker-controld.h>
15
16
17 static crm_trigger_t *transition_trigger = NULL;
18
19 static GHashTable *node_pending_timers = NULL;
20
21 gboolean
22 stop_te_timer(pcmk__graph_action_t *action)
23 {
24 if (action == NULL) {
25 return FALSE;
26 }
27 if (action->timer != 0) {
28 crm_trace("Stopping action timer");
29 g_source_remove(action->timer);
30 action->timer = 0;
31 } else {
32 crm_trace("Action timer was already stopped");
33 return FALSE;
34 }
35 return TRUE;
36 }
37
38 static gboolean
39 te_graph_trigger(gpointer user_data)
40 {
41 if (controld_globals.transition_graph == NULL) {
42 crm_debug("Nothing to do");
43 return TRUE;
44 }
45
46 crm_trace("Invoking graph %d in state %s",
47 controld_globals.transition_graph->id,
48 fsa_state2string(controld_globals.fsa_state));
49
50 switch (controld_globals.fsa_state) {
51 case S_STARTING:
52 case S_PENDING:
53 case S_NOT_DC:
54 case S_HALT:
55 case S_ILLEGAL:
56 case S_STOPPING:
57 case S_TERMINATE:
58 return TRUE;
59 default:
60 break;
61 }
62
63 if (!controld_globals.transition_graph->complete) {
64 enum pcmk__graph_status graph_rc;
65 int orig_limit = controld_globals.transition_graph->batch_limit;
66 int throttled_limit = throttle_get_total_job_limit(orig_limit);
67
68 controld_globals.transition_graph->batch_limit = throttled_limit;
69 graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
70 controld_globals.transition_graph->batch_limit = orig_limit;
71
72 if (graph_rc == pcmk__graph_active) {
73 crm_trace("Transition not yet complete");
74 return TRUE;
75
76 } else if (graph_rc == pcmk__graph_pending) {
77 crm_trace("Transition not yet complete - no actions fired");
78 return TRUE;
79 }
80
81 if (graph_rc != pcmk__graph_complete) {
82 crm_warn("Transition failed: %s",
83 pcmk__graph_status2text(graph_rc));
84 pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
85 }
86 }
87
88 crm_debug("Transition %d is now complete",
89 controld_globals.transition_graph->id);
90 controld_globals.transition_graph->complete = true;
91 notify_crmd(controld_globals.transition_graph);
92
93 return TRUE;
94 }
95
96
97
98
99
100 void
101 controld_init_transition_trigger(void)
102 {
103 transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
104 NULL);
105 }
106
107
108
109
110
111 void
112 controld_destroy_transition_trigger(void)
113 {
114 mainloop_destroy_trigger(transition_trigger);
115 transition_trigger = NULL;
116 }
117
118 void
119 controld_trigger_graph_as(const char *fn, int line)
120 {
121 crm_trace("%s:%d - Triggered graph processing", fn, line);
122 mainloop_set_trigger(transition_trigger);
123 }
124
125 static struct abort_timer_s {
126 bool aborted;
127 guint id;
128 int priority;
129 enum pcmk__graph_next action;
130 const char *text;
131 } abort_timer = { 0, };
132
133 static gboolean
134 abort_timer_popped(gpointer data)
135 {
136 struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
137
138 if (AM_I_DC && (abort_timer->aborted == FALSE)) {
139 abort_transition(abort_timer->priority, abort_timer->action,
140 abort_timer->text, NULL);
141 }
142 abort_timer->id = 0;
143 return FALSE;
144 }
145
146
147
148
149
150
151
152 void
153 abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
154 const char *abort_text, guint delay_ms)
155 {
156 if (abort_timer.id) {
157
158 g_source_remove(abort_timer.id);
159 }
160 abort_timer.aborted = FALSE;
161 abort_timer.priority = abort_priority;
162 abort_timer.action = abort_action;
163 abort_timer.text = abort_text;
164 abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer);
165 }
166
167 static void
168 free_node_pending_timer(gpointer data)
169 {
170 struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
171
172 if (node_pending_timer->id != 0) {
173 g_source_remove(node_pending_timer->id);
174 node_pending_timer->id = 0;
175 }
176
177 free(node_pending_timer);
178 }
179
180 static gboolean
181 node_pending_timer_popped(gpointer key)
182 {
183 struct abort_timer_s *node_pending_timer = NULL;
184
185 if (node_pending_timers == NULL) {
186 return FALSE;
187 }
188
189 node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
190 if (node_pending_timer == NULL) {
191 return FALSE;
192 }
193
194 crm_warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) "
195 "on joining the process group",
196 (const char *) key, controld_globals.node_pending_timeout);
197
198 if (controld_globals.node_pending_timeout > 0) {
199 abort_timer_popped(node_pending_timer);
200 }
201
202 g_hash_table_remove(node_pending_timers, key);
203
204 return FALSE;
205 }
206
207 static void
208 init_node_pending_timer(const crm_node_t *node, guint timeout)
209 {
210 struct abort_timer_s *node_pending_timer = NULL;
211 char *key = NULL;
212
213 if (node->uuid == NULL) {
214 return;
215 }
216
217 if (node_pending_timers == NULL) {
218 node_pending_timers = pcmk__strikey_table(free,
219 free_node_pending_timer);
220
221
222 } else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) {
223 return;
224 }
225
226 crm_notice("Waiting for pending %s with " PCMK_XA_ID " '%s' "
227 "to join the process group (timeout=%us)",
228 node->uname ? node->uname : "node", node->uuid,
229 controld_globals.node_pending_timeout);
230
231 key = pcmk__str_copy(node->uuid);
232 node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
233
234 node_pending_timer->aborted = FALSE;
235 node_pending_timer->priority = PCMK_SCORE_INFINITY;
236 node_pending_timer->action = pcmk__graph_restart;
237 node_pending_timer->text = "Node pending timed out";
238
239 g_hash_table_replace(node_pending_timers, key, node_pending_timer);
240
241 node_pending_timer->id = g_timeout_add_seconds(timeout,
242 node_pending_timer_popped,
243 key);
244 pcmk__assert(node_pending_timer->id != 0);
245 }
246
247 static void
248 remove_node_pending_timer(const char *node_uuid)
249 {
250 if (node_pending_timers == NULL) {
251 return;
252 }
253
254 g_hash_table_remove(node_pending_timers, node_uuid);
255 }
256
257 void
258 controld_node_pending_timer(const crm_node_t *node)
259 {
260 long long remaining_timeout = 0;
261
262
263
264
265
266 if (pcmk_is_set(node->flags, crm_remote_node)
267 || (node->when_member <= 1) || (node->when_online > 0)
268 || (controld_globals.node_pending_timeout == 0)) {
269 remove_node_pending_timer(node->uuid);
270 return;
271 }
272
273
274
275 remaining_timeout = node->when_member - time(NULL)
276 + controld_globals.node_pending_timeout;
277
278
279
280
281 if (remaining_timeout <= 0) {
282 remove_node_pending_timer(node->uuid);
283 return;
284 }
285
286 init_node_pending_timer(node, remaining_timeout);
287 }
288
289 void
290 controld_free_node_pending_timers(void)
291 {
292 if (node_pending_timers == NULL) {
293 return;
294 }
295
296 g_hash_table_destroy(node_pending_timers);
297 node_pending_timers = NULL;
298 }
299
300 static const char *
301 abort2text(enum pcmk__graph_next abort_action)
302 {
303 switch (abort_action) {
304 case pcmk__graph_done: return "done";
305 case pcmk__graph_wait: return "stop";
306 case pcmk__graph_restart: return "restart";
307 case pcmk__graph_shutdown: return "shutdown";
308 }
309 return "unknown";
310 }
311
312 static bool
313 update_abort_priority(pcmk__graph_t *graph, int priority,
314 enum pcmk__graph_next action, const char *abort_reason)
315 {
316 bool change = FALSE;
317
318 if (graph == NULL) {
319 return change;
320 }
321
322 if (graph->abort_priority < priority) {
323 crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
324 graph->abort_priority = priority;
325 if (graph->abort_reason != NULL) {
326 crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
327 }
328 graph->abort_reason = abort_reason;
329 change = TRUE;
330 }
331
332 if (graph->completion_action < action) {
333 crm_debug("Abort action %s superseded by %s: %s",
334 abort2text(graph->completion_action), abort2text(action), abort_reason);
335 graph->completion_action = action;
336 change = TRUE;
337 }
338
339 return change;
340 }
341
342 void
343 abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
344 const char *abort_text, const xmlNode *reason,
345 const char *fn, int line)
346 {
347 int add[] = { 0, 0, 0 };
348 int del[] = { 0, 0, 0 };
349 int level = LOG_INFO;
350 const xmlNode *diff = NULL;
351 const xmlNode *change = NULL;
352
353 CRM_CHECK(controld_globals.transition_graph != NULL, return);
354
355 switch (controld_globals.fsa_state) {
356 case S_STARTING:
357 case S_PENDING:
358 case S_NOT_DC:
359 case S_HALT:
360 case S_ILLEGAL:
361 case S_STOPPING:
362 case S_TERMINATE:
363 crm_info("Abort %s suppressed: state=%s (%scomplete)",
364 abort_text, fsa_state2string(controld_globals.fsa_state),
365 (controld_globals.transition_graph->complete? "" : "in"));
366 return;
367 default:
368 break;
369 }
370
371 abort_timer.aborted = TRUE;
372 controld_expect_sched_reply(NULL);
373
374 if (!controld_globals.transition_graph->complete
375 && update_abort_priority(controld_globals.transition_graph,
376 abort_priority, abort_action,
377 abort_text)) {
378 level = LOG_NOTICE;
379 }
380
381 if (reason != NULL) {
382 const xmlNode *search = NULL;
383
384 for(search = reason; search; search = search->parent) {
385 if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
386 diff = search;
387 break;
388 }
389 }
390
391 if(diff) {
392 xml_patch_versions(diff, add, del);
393 for(search = reason; search; search = search->parent) {
394 if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
395 change = search;
396 break;
397 }
398 }
399 }
400 }
401
402 if (reason == NULL) {
403 do_crm_log(level,
404 "Transition %d aborted: %s " CRM_XS " source=%s:%d "
405 "complete=%s", controld_globals.transition_graph->id,
406 abort_text, fn, line,
407 pcmk__btoa(controld_globals.transition_graph->complete));
408
409 } else if(change == NULL) {
410 GString *local_path = pcmk__element_xpath(reason);
411 pcmk__assert(local_path != NULL);
412
413 do_crm_log(level, "Transition %d aborted by %s.%s: %s "
414 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
415 controld_globals.transition_graph->id, reason->name,
416 pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
417 line, (const char *) local_path->str,
418 pcmk__btoa(controld_globals.transition_graph->complete));
419 g_string_free(local_path, TRUE);
420
421 } else {
422 const char *op = crm_element_value(change, PCMK_XA_OPERATION);
423 const char *path = crm_element_value(change, PCMK_XA_PATH);
424
425 if(change == reason) {
426 if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
427 reason = reason->children;
428
429 } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
430 reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
431 NULL, NULL);
432 if(reason) {
433 reason = reason->children;
434 }
435 }
436 CRM_CHECK(reason != NULL, goto done);
437 }
438
439 if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
440 const char *shortpath = strrchr(path, '/');
441
442 do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
443 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
444 controld_globals.transition_graph->id,
445 (shortpath? (shortpath + 1) : path), abort_text,
446 add[0], add[1], add[2], fn, line, path,
447 pcmk__btoa(controld_globals.transition_graph->complete));
448
449 } else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
450 do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
451 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
452 controld_globals.transition_graph->id,
453 crm_element_value(reason, PCMK_XA_ID), op,
454 crm_element_value(reason, PCMK_XA_NAME),
455 crm_element_value(reason, PCMK_XA_VALUE),
456 abort_text, add[0], add[1], add[2], fn, line, path,
457 pcmk__btoa(controld_globals.transition_graph->complete));
458
459 } else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
460 const char *magic = crm_element_value(reason,
461 PCMK__XA_TRANSITION_MAGIC);
462
463 do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
464 CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
465 controld_globals.transition_graph->id,
466 crm_element_value(reason, PCMK__XA_OPERATION_KEY), op,
467 crm_element_value(reason, PCMK__META_ON_NODE),
468 abort_text,
469 magic, add[0], add[1], add[2], fn, line,
470 pcmk__btoa(controld_globals.transition_graph->complete));
471
472 } else if (pcmk__str_any_of((const char *) reason->name,
473 PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
474 const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
475
476 do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
477 CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
478 controld_globals.transition_graph->id,
479 reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
480 abort_text, add[0], add[1], add[2], fn, line,
481 pcmk__btoa(controld_globals.transition_graph->complete));
482
483 } else {
484 const char *id = pcmk__xe_id(reason);
485
486 do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
487 CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
488 controld_globals.transition_graph->id,
489 reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
490 abort_text, add[0], add[1], add[2], fn, line, path,
491 pcmk__btoa(controld_globals.transition_graph->complete));
492 }
493 }
494
495 done:
496 if (controld_globals.transition_graph->complete) {
497 if (controld_get_period_transition_timer() > 0) {
498 controld_stop_transition_timer();
499 controld_start_transition_timer();
500 } else {
501 register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
502 }
503 return;
504 }
505
506 trigger_graph();
507 }