This source file includes following definitions.
- te_start_action_timer
- te_pseudo_action
- get_target_rc
- te_crm_command
- synthesize_timeout_event
- controld_record_action_timeout
- te_rsc_command
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- te_should_perform_action_on
- te_should_perform_action
- te_action_confirmed
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 char *te_uuid = NULL;
24 GHashTable *te_targets = NULL;
25 void send_rsc_command(crm_action_t * action);
26 static void te_update_job_count(crm_action_t * action, int offset);
27
28 static void
29 te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
30 {
31 action->timer = calloc(1, sizeof(crm_action_timer_t));
32 action->timer->timeout = action->timeout;
33 action->timer->action = action;
34 action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
35 action_timer_callback, (void *)action->timer);
36
37 CRM_ASSERT(action->timer->source_id != 0);
38 }
39
40 static gboolean
41 te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
42 {
43 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
44
45
46 if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
47 GHashTableIter iter;
48 crm_node_t *node = NULL;
49
50 g_hash_table_iter_init(&iter, crm_peer_cache);
51 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
52 xmlNode *cmd = NULL;
53
54 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
55 continue;
56 }
57
58 cmd = create_request(task, pseudo->xml, node->uname,
59 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
60 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
61 free_xml(cmd);
62 }
63
64 remote_ra_process_maintenance_nodes(pseudo->xml);
65 } else {
66
67 remote_ra_process_pseudo(pseudo->xml);
68 }
69
70 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
71 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
72 te_action_confirmed(pseudo, graph);
73 return TRUE;
74 }
75
76 static int
77 get_target_rc(crm_action_t * action)
78 {
79 int exit_status;
80
81 pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
82 &exit_status, 0);
83 return exit_status;
84 }
85
86 static gboolean
87 te_crm_command(crm_graph_t * graph, crm_action_t * action)
88 {
89 char *counter = NULL;
90 xmlNode *cmd = NULL;
91 gboolean is_local = FALSE;
92
93 const char *id = NULL;
94 const char *task = NULL;
95 const char *value = NULL;
96 const char *on_node = NULL;
97 const char *router_node = NULL;
98
99 gboolean rc = TRUE;
100 gboolean no_wait = FALSE;
101
102 id = ID(action->xml);
103 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
104 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
105 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
106
107 if (!router_node) {
108 router_node = on_node;
109 if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
110 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
111
112 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
113 router_node = fsa_our_uname;
114 }
115 }
116 }
117
118 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
119 crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
120 return FALSE);
121
122 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
123 is_local = TRUE;
124 }
125
126 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
127 if (crm_is_true(value)) {
128 no_wait = TRUE;
129 }
130
131 crm_info("Executing crm-event (%s)%s%s: %s on %s",
132 crm_str(id), (is_local? " locally" : ""),
133 (no_wait? " without waiting" : ""), crm_str(task), on_node);
134
135 if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
136
137 crm_info("crm-event (%s) is a local shutdown", crm_str(id));
138 graph->completion_action = tg_shutdown;
139 graph->abort_reason = "local shutdown";
140 te_action_confirmed(action, graph);
141 return TRUE;
142
143 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
144 crm_node_t *peer = crm_get_peer(0, router_node);
145
146 pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
147 }
148
149 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
150
151 counter = pcmk__transition_key(transition_graph->id, action->id,
152 get_target_rc(action), te_uuid);
153 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
154
155 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
156 free(counter);
157 free_xml(cmd);
158
159 if (rc == FALSE) {
160 crm_err("Action %d failed: send", action->id);
161 return FALSE;
162
163 } else if (no_wait) {
164 te_action_confirmed(action, graph);
165
166 } else {
167 if (action->timeout <= 0) {
168 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
169 action->id, task, on_node, action->timeout, graph->network_delay);
170 action->timeout = (int) graph->network_delay;
171 }
172 te_start_action_timer(graph, action);
173 }
174
175 return TRUE;
176 }
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194 static lrmd_event_data_t *
195 synthesize_timeout_event(crm_action_t *action, int target_rc)
196 {
197 lrmd_event_data_t *op = NULL;
198 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
199 const char *reason = NULL;
200 char *dynamic_reason = NULL;
201
202 if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
203 reason = "Local executor did not return result in time";
204 } else {
205 const char *router_node = NULL;
206
207 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
208 if (router_node == NULL) {
209 router_node = target;
210 }
211 dynamic_reason = crm_strdup_printf("Controller on %s did not return "
212 "result in time", router_node);
213 reason = dynamic_reason;
214 }
215
216 op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
217 PCMK_OCF_UNKNOWN_ERROR, reason);
218 op->call_id = -1;
219 op->user_data = pcmk__transition_key(transition_graph->id, action->id,
220 target_rc, te_uuid);
221 free(dynamic_reason);
222 return op;
223 }
224
225 void
226 controld_record_action_timeout(crm_action_t *action)
227 {
228 lrmd_event_data_t *op = NULL;
229 xmlNode *state = NULL;
230 xmlNode *rsc = NULL;
231 xmlNode *action_rsc = NULL;
232
233 int rc = pcmk_ok;
234
235 const char *rsc_id = NULL;
236 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
237 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
238 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
239
240 int call_options = cib_quorum_override | cib_scope_local;
241 int target_rc = get_target_rc(action);
242
243 crm_warn("%s %d: %s on %s timed out",
244 crm_element_name(action->xml), action->id, task_uuid, target);
245
246 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
247 if (action_rsc == NULL) {
248 return;
249 }
250
251 rsc_id = ID(action_rsc);
252 CRM_CHECK(rsc_id != NULL,
253 crm_log_xml_err(action->xml, "Bad:action"); return);
254
255
256
257
258
259
260
261
262
263
264 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
265
266 crm_xml_add(state, XML_ATTR_UUID, target_uuid);
267 crm_xml_add(state, XML_ATTR_UNAME, target);
268
269 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
270 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
271
272 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
273 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
274 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
275
276
277 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
278 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
279 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
280
281 op = synthesize_timeout_event(action, target_rc);
282 pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
283 __func__);
284 lrmd_free_event(op);
285
286 rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
287 fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
288 free_xml(state);
289
290 crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)",
291 rc, action->id, task_uuid, target);
292 crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
293 }
294
295 static gboolean
296 te_rsc_command(crm_graph_t * graph, crm_action_t * action)
297 {
298
299
300
301
302
303
304 xmlNode *cmd = NULL;
305 xmlNode *rsc_op = NULL;
306
307 gboolean rc = TRUE;
308 gboolean no_wait = FALSE;
309 gboolean is_local = FALSE;
310
311 char *counter = NULL;
312 const char *task = NULL;
313 const char *value = NULL;
314 const char *on_node = NULL;
315 const char *router_node = NULL;
316 const char *task_uuid = NULL;
317
318 CRM_ASSERT(action != NULL);
319 CRM_ASSERT(action->xml != NULL);
320
321 crm__clear_graph_action_flags(action, pcmk__graph_action_executed);
322 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
323
324 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
325 crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
326 return FALSE);
327
328 rsc_op = action->xml;
329 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
330 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
331 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
332
333 if (!router_node) {
334 router_node = on_node;
335 }
336
337 counter = pcmk__transition_key(transition_graph->id, action->id,
338 get_target_rc(action), te_uuid);
339 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
340
341 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
342 is_local = TRUE;
343 }
344
345 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
346 if (crm_is_true(value)) {
347 no_wait = TRUE;
348 }
349
350 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
351 task, task_uuid, (is_local? " locally" : ""), on_node,
352 (no_wait? " without waiting" : ""), action->id);
353
354 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
355 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
356
357 if (is_local) {
358
359 ha_msg_input_t data = {
360 .msg = cmd,
361 .xml = rsc_op,
362 };
363
364 fsa_data_t msg = {
365 .id = 0,
366 .data = &data,
367 .data_type = fsa_dt_ha_msg,
368 .fsa_input = I_NULL,
369 .fsa_cause = C_FSA_INTERNAL,
370 .actions = A_LRM_INVOKE,
371 .origin = __func__,
372 };
373
374 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
375
376 } else {
377 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
378 }
379
380 free(counter);
381 free_xml(cmd);
382
383 crm__set_graph_action_flags(action, pcmk__graph_action_executed);
384
385 if (rc == FALSE) {
386 crm_err("Action %d failed: send", action->id);
387 return FALSE;
388
389 } else if (no_wait) {
390 crm_info("Action %d confirmed - no wait", action->id);
391 crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
392
393
394 pcmk__update_graph(transition_graph, action);
395 trigger_graph();
396
397 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
398 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
399 action->id, task, task_uuid, on_node, action->timeout);
400 } else {
401 if (action->timeout <= 0) {
402 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
403 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
404 action->timeout = (int) graph->network_delay;
405 }
406 te_update_job_count(action, 1);
407 te_start_action_timer(graph, action);
408 }
409
410 return TRUE;
411 }
412
413 struct te_peer_s
414 {
415 char *name;
416 int jobs;
417 int migrate_jobs;
418 };
419
420 static void te_peer_free(gpointer p)
421 {
422 struct te_peer_s *peer = p;
423
424 free(peer->name);
425 free(peer);
426 }
427
428 void te_reset_job_counts(void)
429 {
430 GHashTableIter iter;
431 struct te_peer_s *peer = NULL;
432
433 if(te_targets == NULL) {
434 te_targets = pcmk__strkey_table(NULL, te_peer_free);
435 }
436
437 g_hash_table_iter_init(&iter, te_targets);
438 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
439 peer->jobs = 0;
440 peer->migrate_jobs = 0;
441 }
442 }
443
444 static void
445 te_update_job_count_on(const char *target, int offset, bool migrate)
446 {
447 struct te_peer_s *r = NULL;
448
449 if(target == NULL || te_targets == NULL) {
450 return;
451 }
452
453 r = g_hash_table_lookup(te_targets, target);
454 if(r == NULL) {
455 r = calloc(1, sizeof(struct te_peer_s));
456 r->name = strdup(target);
457 g_hash_table_insert(te_targets, r->name, r);
458 }
459
460 r->jobs += offset;
461 if(migrate) {
462 r->migrate_jobs += offset;
463 }
464 crm_trace("jobs[%s] = %d", target, r->jobs);
465 }
466
467 static void
468 te_update_job_count(crm_action_t * action, int offset)
469 {
470 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
471 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
472
473 if (action->type != action_type_rsc || target == NULL) {
474
475 return;
476 }
477
478
479
480
481
482 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
483
484 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
485 CRMD_ACTION_MIGRATED, NULL)) {
486
487 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
488 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
489
490 te_update_job_count_on(t1, offset, TRUE);
491 te_update_job_count_on(t2, offset, TRUE);
492 return;
493 } else if (target == NULL) {
494 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
495 }
496
497 te_update_job_count_on(target, offset, FALSE);
498 }
499
500 static gboolean
501 te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
502 {
503 int limit = 0;
504 struct te_peer_s *r = NULL;
505 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
506 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
507
508 if(target == NULL) {
509
510 return TRUE;
511
512 } else if(te_targets == NULL) {
513 return FALSE;
514 }
515
516 r = g_hash_table_lookup(te_targets, target);
517 limit = throttle_get_job_limit(target);
518
519 if(r == NULL) {
520 r = calloc(1, sizeof(struct te_peer_s));
521 r->name = strdup(target);
522 g_hash_table_insert(te_targets, r->name, r);
523 }
524
525 if(limit <= r->jobs) {
526 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
527 target, limit, r->jobs, id);
528 return FALSE;
529
530 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
531 if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
532 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
533 target, graph->migration_limit, r->migrate_jobs, id);
534 return FALSE;
535 }
536 }
537
538 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
539
540 return TRUE;
541 }
542
543 static gboolean
544 te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
545 {
546 const char *target = NULL;
547 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
548
549 if (action->type != action_type_rsc) {
550
551 return TRUE;
552 }
553
554
555
556
557
558 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
559
560 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
561 CRMD_ACTION_MIGRATED, NULL)) {
562 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
563 if(te_should_perform_action_on(graph, action, target) == FALSE) {
564 return FALSE;
565 }
566
567 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
568
569 } else if (target == NULL) {
570 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
571 }
572
573 return te_should_perform_action_on(graph, action, target);
574 }
575
576
577
578
579
580
581
582 void
583 te_action_confirmed(crm_action_t *action, crm_graph_t *graph)
584 {
585 if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
586 if ((action->type == action_type_rsc)
587 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
588 te_update_job_count(action, -1);
589 }
590 crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
591 }
592 if (graph) {
593 pcmk__update_graph(graph, action);
594 trigger_graph();
595 }
596 }
597
598
599 crm_graph_functions_t te_graph_fns = {
600 te_pseudo_action,
601 te_rsc_command,
602 te_crm_command,
603 te_fence_node,
604 te_should_perform_action,
605 };
606
607 void
608 notify_crmd(crm_graph_t * graph)
609 {
610 const char *type = "unknown";
611 enum crmd_fsa_input event = I_NULL;
612
613 crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
614
615 CRM_CHECK(graph->complete, graph->complete = TRUE);
616
617 switch (graph->completion_action) {
618 case tg_stop:
619 type = "stop";
620 if (fsa_state == S_TRANSITION_ENGINE) {
621 event = I_TE_SUCCESS;
622 }
623 break;
624 case tg_done:
625 type = "done";
626 if (fsa_state == S_TRANSITION_ENGINE) {
627 event = I_TE_SUCCESS;
628 }
629 break;
630
631 case tg_restart:
632 type = "restart";
633 if (fsa_state == S_TRANSITION_ENGINE) {
634 if (transition_timer->period_ms > 0) {
635 controld_stop_timer(transition_timer);
636 controld_start_timer(transition_timer);
637 } else {
638 event = I_PE_CALC;
639 }
640
641 } else if (fsa_state == S_POLICY_ENGINE) {
642 controld_set_fsa_action_flags(A_PE_INVOKE);
643 trigger_fsa();
644 }
645 break;
646
647 case tg_shutdown:
648 type = "shutdown";
649 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
650 event = I_STOP;
651
652 } else {
653 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
654 event = I_TERMINATE;
655 }
656 }
657
658 crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));
659
660 graph->abort_reason = NULL;
661 graph->completion_action = tg_done;
662 controld_clear_fsa_input_flags(R_IN_TRANSITION);
663
664 if (event != I_NULL) {
665 register_fsa_input(C_FSA_INTERNAL, event, NULL);
666
667 } else if (fsa_source) {
668 mainloop_set_trigger(fsa_source);
669 }
670 }