This source file includes following definitions.
- te_start_action_timer
- execute_pseudo_action
- get_target_rc
- execute_cluster_action
- synthesize_timeout_event
- controld_record_action_event
- controld_record_action_timeout
- execute_rsc_action
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- allowed_on_node
- graph_action_allowed
- te_action_confirmed
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 char *te_uuid = NULL;
24 GHashTable *te_targets = NULL;
25 void send_rsc_command(pcmk__graph_action_t *action);
26 static void te_update_job_count(pcmk__graph_action_t *action, int offset);
27
28 static void
29 te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
30 {
31 action->timer = g_timeout_add(action->timeout + graph->network_delay,
32 action_timer_callback, (void *) action);
33 CRM_ASSERT(action->timer != 0);
34 }
35
36
37
38
39
40
41
42
43
44
45 static int
46 execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
47 {
48 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
49
50
51 if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
52 GHashTableIter iter;
53 crm_node_t *node = NULL;
54
55 g_hash_table_iter_init(&iter, crm_peer_cache);
56 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
57 xmlNode *cmd = NULL;
58
59 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
60 continue;
61 }
62
63 cmd = create_request(task, pseudo->xml, node->uname,
64 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
65 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
66 free_xml(cmd);
67 }
68
69 remote_ra_process_maintenance_nodes(pseudo->xml);
70 } else {
71
72 remote_ra_process_pseudo(pseudo->xml);
73 }
74
75 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
76 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
77 te_action_confirmed(pseudo, graph);
78 return pcmk_rc_ok;
79 }
80
81 static int
82 get_target_rc(pcmk__graph_action_t *action)
83 {
84 int exit_status;
85
86 pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
87 &exit_status, 0);
88 return exit_status;
89 }
90
91
92
93
94
95
96
97
98
99
100 static int
101 execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
102 {
103 char *counter = NULL;
104 xmlNode *cmd = NULL;
105 gboolean is_local = FALSE;
106
107 const char *id = NULL;
108 const char *task = NULL;
109 const char *value = NULL;
110 const char *on_node = NULL;
111 const char *router_node = NULL;
112
113 gboolean rc = TRUE;
114 gboolean no_wait = FALSE;
115
116 id = ID(action->xml);
117 CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
118
119 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
120 CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
121
122 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
123 CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
124
125 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
126 if (router_node == NULL) {
127 router_node = on_node;
128 if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
129 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
130
131 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
132 router_node = fsa_our_uname;
133 }
134 }
135 }
136
137 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
138 is_local = TRUE;
139 }
140
141 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
142 if (crm_is_true(value)) {
143 no_wait = TRUE;
144 }
145
146 crm_info("Handling controller request '%s' (%s on %s)%s%s",
147 id, task, on_node, (is_local? " locally" : ""),
148 (no_wait? " without waiting" : ""));
149
150 if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
151
152 crm_info("Controller request '%s' is a local shutdown", id);
153 graph->completion_action = pcmk__graph_shutdown;
154 graph->abort_reason = "local shutdown";
155 te_action_confirmed(action, graph);
156 return pcmk_rc_ok;
157
158 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
159 crm_node_t *peer = crm_get_peer(0, router_node);
160
161 pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
162 }
163
164 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
165
166 counter = pcmk__transition_key(transition_graph->id, action->id,
167 get_target_rc(action), te_uuid);
168 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
169
170 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
171 free(counter);
172 free_xml(cmd);
173
174 if (rc == FALSE) {
175 crm_err("Action %d failed: send", action->id);
176 return ECOMM;
177
178 } else if (no_wait) {
179 te_action_confirmed(action, graph);
180
181 } else {
182 if (action->timeout <= 0) {
183 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
184 action->id, task, on_node, action->timeout, graph->network_delay);
185 action->timeout = (int) graph->network_delay;
186 }
187 te_start_action_timer(graph, action);
188 }
189
190 return pcmk_rc_ok;
191 }
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 static lrmd_event_data_t *
210 synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
211 {
212 lrmd_event_data_t *op = NULL;
213 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
214 const char *reason = NULL;
215 char *dynamic_reason = NULL;
216
217 if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
218 reason = "Local executor did not return result in time";
219 } else {
220 const char *router_node = NULL;
221
222 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
223 if (router_node == NULL) {
224 router_node = target;
225 }
226 dynamic_reason = crm_strdup_printf("Controller on %s did not return "
227 "result in time", router_node);
228 reason = dynamic_reason;
229 }
230
231 op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
232 PCMK_OCF_UNKNOWN_ERROR, reason);
233 op->call_id = -1;
234 op->user_data = pcmk__transition_key(transition_graph->id, action->id,
235 target_rc, te_uuid);
236 free(dynamic_reason);
237 return op;
238 }
239
240 static void
241 controld_record_action_event(pcmk__graph_action_t *action,
242 lrmd_event_data_t *op)
243 {
244 xmlNode *state = NULL;
245 xmlNode *rsc = NULL;
246 xmlNode *action_rsc = NULL;
247
248 int rc = pcmk_ok;
249
250 const char *rsc_id = NULL;
251 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
252 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
253 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
254
255 int call_options = cib_quorum_override | cib_scope_local;
256 int target_rc = get_target_rc(action);
257
258 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
259 if (action_rsc == NULL) {
260 return;
261 }
262
263 rsc_id = ID(action_rsc);
264 CRM_CHECK(rsc_id != NULL,
265 crm_log_xml_err(action->xml, "Bad:action"); return);
266
267
268
269
270
271
272
273
274
275
276 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
277
278 crm_xml_add(state, XML_ATTR_UUID, target_uuid);
279 crm_xml_add(state, XML_ATTR_UNAME, target);
280
281 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
282 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
283
284 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
285 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
286 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
287
288
289 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
290 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
291 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
292
293 pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
294 __func__);
295
296 rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
297 fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
298 free_xml(state);
299
300 crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
301 rc, action->id, task_uuid, target);
302 pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
303 }
304
305 void
306 controld_record_action_timeout(pcmk__graph_action_t *action)
307 {
308 lrmd_event_data_t *op = NULL;
309
310 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
311 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
312
313 int target_rc = get_target_rc(action);
314
315 crm_warn("%s %d: %s on %s timed out",
316 crm_element_name(action->xml), action->id, task_uuid, target);
317
318 op = synthesize_timeout_event(action, target_rc);
319 controld_record_action_event(action, op);
320 lrmd_free_event(op);
321 }
322
323
324
325
326
327
328
329
330
331
332 static int
333 execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
334 {
335
336
337
338
339
340
341 xmlNode *cmd = NULL;
342 xmlNode *rsc_op = NULL;
343
344 gboolean rc = TRUE;
345 gboolean no_wait = FALSE;
346 gboolean is_local = FALSE;
347
348 char *counter = NULL;
349 const char *task = NULL;
350 const char *value = NULL;
351 const char *on_node = NULL;
352 const char *router_node = NULL;
353 const char *task_uuid = NULL;
354
355 CRM_ASSERT(action != NULL);
356 CRM_ASSERT(action->xml != NULL);
357
358 pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
359 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
360
361 CRM_CHECK(!pcmk__str_empty(on_node),
362 crm_err("Corrupted command(id=%s) %s: no node",
363 ID(action->xml), pcmk__s(task, "without task"));
364 return pcmk_rc_node_unknown);
365
366 rsc_op = action->xml;
367 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
368 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
369 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
370
371 if (!router_node) {
372 router_node = on_node;
373 }
374
375 counter = pcmk__transition_key(transition_graph->id, action->id,
376 get_target_rc(action), te_uuid);
377 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
378
379 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
380 is_local = TRUE;
381 }
382
383 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
384 if (crm_is_true(value)) {
385 no_wait = TRUE;
386 }
387
388 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
389 task, task_uuid, (is_local? " locally" : ""), on_node,
390 (no_wait? " without waiting" : ""), action->id);
391
392 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
393 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
394
395 if (is_local) {
396
397 ha_msg_input_t data = {
398 .msg = cmd,
399 .xml = rsc_op,
400 };
401
402 fsa_data_t msg = {
403 .id = 0,
404 .data = &data,
405 .data_type = fsa_dt_ha_msg,
406 .fsa_input = I_NULL,
407 .fsa_cause = C_FSA_INTERNAL,
408 .actions = A_LRM_INVOKE,
409 .origin = __func__,
410 };
411
412 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
413
414 } else {
415 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
416 }
417
418 free(counter);
419 free_xml(cmd);
420
421 pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
422
423 if (rc == FALSE) {
424 crm_err("Action %d failed: send", action->id);
425 return ECOMM;
426
427 } else if (no_wait) {
428
429
430
431 crm_info("Action %d confirmed - no wait", action->id);
432 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
433 pcmk__update_graph(transition_graph, action);
434 trigger_graph();
435
436 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
437 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
438 action->id, task, task_uuid, on_node, action->timeout);
439 } else {
440 if (action->timeout <= 0) {
441 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
442 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
443 action->timeout = (int) graph->network_delay;
444 }
445 te_update_job_count(action, 1);
446 te_start_action_timer(graph, action);
447 }
448
449 return pcmk_rc_ok;
450 }
451
452 struct te_peer_s
453 {
454 char *name;
455 int jobs;
456 int migrate_jobs;
457 };
458
459 static void te_peer_free(gpointer p)
460 {
461 struct te_peer_s *peer = p;
462
463 free(peer->name);
464 free(peer);
465 }
466
467 void te_reset_job_counts(void)
468 {
469 GHashTableIter iter;
470 struct te_peer_s *peer = NULL;
471
472 if(te_targets == NULL) {
473 te_targets = pcmk__strkey_table(NULL, te_peer_free);
474 }
475
476 g_hash_table_iter_init(&iter, te_targets);
477 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
478 peer->jobs = 0;
479 peer->migrate_jobs = 0;
480 }
481 }
482
483 static void
484 te_update_job_count_on(const char *target, int offset, bool migrate)
485 {
486 struct te_peer_s *r = NULL;
487
488 if(target == NULL || te_targets == NULL) {
489 return;
490 }
491
492 r = g_hash_table_lookup(te_targets, target);
493 if(r == NULL) {
494 r = calloc(1, sizeof(struct te_peer_s));
495 r->name = strdup(target);
496 g_hash_table_insert(te_targets, r->name, r);
497 }
498
499 r->jobs += offset;
500 if(migrate) {
501 r->migrate_jobs += offset;
502 }
503 crm_trace("jobs[%s] = %d", target, r->jobs);
504 }
505
506 static void
507 te_update_job_count(pcmk__graph_action_t *action, int offset)
508 {
509 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
510 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
511
512 if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
513
514 return;
515 }
516
517
518
519
520
521 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
522
523 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
524 CRMD_ACTION_MIGRATED, NULL)) {
525
526 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
527 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
528
529 te_update_job_count_on(t1, offset, TRUE);
530 te_update_job_count_on(t2, offset, TRUE);
531 return;
532 } else if (target == NULL) {
533 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
534 }
535
536 te_update_job_count_on(target, offset, FALSE);
537 }
538
539
540
541
542
543
544
545
546
547
548
549 static bool
550 allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
551 const char *target)
552 {
553 int limit = 0;
554 struct te_peer_s *r = NULL;
555 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
556 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
557
558 if(target == NULL) {
559
560 return true;
561
562 } else if(te_targets == NULL) {
563 return false;
564 }
565
566 r = g_hash_table_lookup(te_targets, target);
567 limit = throttle_get_job_limit(target);
568
569 if(r == NULL) {
570 r = calloc(1, sizeof(struct te_peer_s));
571 r->name = strdup(target);
572 g_hash_table_insert(te_targets, r->name, r);
573 }
574
575 if(limit <= r->jobs) {
576 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
577 target, limit, r->jobs, id);
578 return false;
579
580 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
581 if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
582 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
583 target, graph->migration_limit, r->migrate_jobs, id);
584 return false;
585 }
586 }
587
588 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
589
590 return true;
591 }
592
593
594
595
596
597
598
599
600
601
602 static bool
603 graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
604 {
605 const char *target = NULL;
606 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
607
608 if (action->type != pcmk__rsc_graph_action) {
609
610 return true;
611 }
612
613
614
615
616
617 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
618
619 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
620 CRMD_ACTION_MIGRATED, NULL)) {
621 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
622 if (!allowed_on_node(graph, action, target)) {
623 return false;
624 }
625
626 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
627
628 } else if (target == NULL) {
629 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
630 }
631
632 return allowed_on_node(graph, action, target);
633 }
634
635
636
637
638
639
640
641 void
642 te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
643 {
644 if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
645 if ((action->type == pcmk__rsc_graph_action)
646 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
647 te_update_job_count(action, -1);
648 }
649 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
650 }
651 if (graph) {
652 pcmk__update_graph(graph, action);
653 trigger_graph();
654 }
655 }
656
657
658 pcmk__graph_functions_t te_graph_fns = {
659 execute_pseudo_action,
660 execute_rsc_action,
661 execute_cluster_action,
662 controld_execute_fence_action,
663 graph_action_allowed,
664 };
665
666 void
667 notify_crmd(pcmk__graph_t *graph)
668 {
669 const char *type = "unknown";
670 enum crmd_fsa_input event = I_NULL;
671
672 crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
673
674 CRM_CHECK(graph->complete, graph->complete = true);
675
676 switch (graph->completion_action) {
677 case pcmk__graph_wait:
678 type = "stop";
679 if (fsa_state == S_TRANSITION_ENGINE) {
680 event = I_TE_SUCCESS;
681 }
682 break;
683 case pcmk__graph_done:
684 type = "done";
685 if (fsa_state == S_TRANSITION_ENGINE) {
686 event = I_TE_SUCCESS;
687 }
688 break;
689
690 case pcmk__graph_restart:
691 type = "restart";
692 if (fsa_state == S_TRANSITION_ENGINE) {
693 if (transition_timer->period_ms > 0) {
694 controld_stop_timer(transition_timer);
695 controld_start_timer(transition_timer);
696 } else {
697 event = I_PE_CALC;
698 }
699
700 } else if (fsa_state == S_POLICY_ENGINE) {
701 controld_set_fsa_action_flags(A_PE_INVOKE);
702 trigger_fsa();
703 }
704 break;
705
706 case pcmk__graph_shutdown:
707 type = "shutdown";
708 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
709 event = I_STOP;
710
711 } else {
712 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
713 event = I_TERMINATE;
714 }
715 }
716
717 crm_debug("Transition %d status: %s - %s", graph->id, type,
718 pcmk__s(graph->abort_reason, "unspecified reason"));
719
720 graph->abort_reason = NULL;
721 graph->completion_action = pcmk__graph_done;
722 controld_clear_fsa_input_flags(R_IN_TRANSITION);
723
724 if (event != I_NULL) {
725 register_fsa_input(C_FSA_INTERNAL, event, NULL);
726
727 } else if (fsa_source) {
728 mainloop_set_trigger(fsa_source);
729 }
730 }