This source file includes following definitions.
- te_start_action_timer
- execute_pseudo_action
- get_target_rc
- execute_cluster_action
- synthesize_timeout_event
- controld_record_action_event
- controld_record_action_timeout
- execute_rsc_action
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- allowed_on_node
- graph_action_allowed
- te_action_confirmed
- controld_register_graph_functions
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 static GHashTable *te_targets = NULL;
24 void send_rsc_command(pcmk__graph_action_t *action);
25 static void te_update_job_count(pcmk__graph_action_t *action, int offset);
26
27 static void
28 te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
29 {
30 action->timer = g_timeout_add(action->timeout + graph->network_delay,
31 action_timer_callback, (void *) action);
32 CRM_ASSERT(action->timer != 0);
33 }
34
35
36
37
38
39
40
41
42
43
44 static int
45 execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
46 {
47 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
48
49
50 if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
51 GHashTableIter iter;
52 crm_node_t *node = NULL;
53
54 g_hash_table_iter_init(&iter, crm_peer_cache);
55 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
56 xmlNode *cmd = NULL;
57
58 if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
59 pcmk__str_casei)) {
60 continue;
61 }
62
63 cmd = create_request(task, pseudo->xml, node->uname,
64 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
65 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
66 free_xml(cmd);
67 }
68
69 remote_ra_process_maintenance_nodes(pseudo->xml);
70 } else {
71
72 remote_ra_process_pseudo(pseudo->xml);
73 }
74
75 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
76 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
77 te_action_confirmed(pseudo, graph);
78 return pcmk_rc_ok;
79 }
80
81 static int
82 get_target_rc(pcmk__graph_action_t *action)
83 {
84 int exit_status;
85
86 pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
87 &exit_status, 0);
88 return exit_status;
89 }
90
91
92
93
94
95
96
97
98
99
100 static int
101 execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
102 {
103 char *counter = NULL;
104 xmlNode *cmd = NULL;
105 gboolean is_local = FALSE;
106
107 const char *id = NULL;
108 const char *task = NULL;
109 const char *value = NULL;
110 const char *on_node = NULL;
111 const char *router_node = NULL;
112
113 gboolean rc = TRUE;
114 gboolean no_wait = FALSE;
115
116 id = ID(action->xml);
117 CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
118
119 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
120 CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
121
122 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
123 CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
124
125 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
126 if (router_node == NULL) {
127 router_node = on_node;
128 if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
129 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
130
131 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
132 router_node = controld_globals.our_nodename;
133 }
134 }
135 }
136
137 if (pcmk__str_eq(router_node, controld_globals.our_nodename,
138 pcmk__str_casei)) {
139 is_local = TRUE;
140 }
141
142 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
143 if (crm_is_true(value)) {
144 no_wait = TRUE;
145 }
146
147 crm_info("Handling controller request '%s' (%s on %s)%s%s",
148 id, task, on_node, (is_local? " locally" : ""),
149 (no_wait? " without waiting" : ""));
150
151 if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
152
153 crm_info("Controller request '%s' is a local shutdown", id);
154 graph->completion_action = pcmk__graph_shutdown;
155 graph->abort_reason = "local shutdown";
156 te_action_confirmed(action, graph);
157 return pcmk_rc_ok;
158
159 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
160 crm_node_t *peer = crm_get_peer(0, router_node);
161
162 pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
163 }
164
165 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
166
167 counter = pcmk__transition_key(controld_globals.transition_graph->id,
168 action->id, get_target_rc(action),
169 controld_globals.te_uuid);
170 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
171
172 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
173 free(counter);
174 free_xml(cmd);
175
176 if (rc == FALSE) {
177 crm_err("Action %d failed: send", action->id);
178 return ECOMM;
179
180 } else if (no_wait) {
181 te_action_confirmed(action, graph);
182
183 } else {
184 if (action->timeout <= 0) {
185 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
186 action->id, task, on_node, action->timeout, graph->network_delay);
187 action->timeout = (int) graph->network_delay;
188 }
189 te_start_action_timer(graph, action);
190 }
191
192 return pcmk_rc_ok;
193 }
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211 static lrmd_event_data_t *
212 synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
213 {
214 lrmd_event_data_t *op = NULL;
215 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
216 const char *reason = NULL;
217 char *dynamic_reason = NULL;
218
219 if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
220 reason = "Local executor did not return result in time";
221 } else {
222 const char *router_node = NULL;
223
224 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
225 if (router_node == NULL) {
226 router_node = target;
227 }
228 dynamic_reason = crm_strdup_printf("Controller on %s did not return "
229 "result in time", router_node);
230 reason = dynamic_reason;
231 }
232
233 op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
234 PCMK_OCF_UNKNOWN_ERROR, reason);
235 op->call_id = -1;
236 op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
237 action->id, target_rc,
238 controld_globals.te_uuid);
239 free(dynamic_reason);
240 return op;
241 }
242
243 static void
244 controld_record_action_event(pcmk__graph_action_t *action,
245 lrmd_event_data_t *op)
246 {
247 cib_t *cib_conn = controld_globals.cib_conn;
248
249 xmlNode *state = NULL;
250 xmlNode *rsc = NULL;
251 xmlNode *action_rsc = NULL;
252
253 int rc = pcmk_ok;
254
255 const char *rsc_id = NULL;
256 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
257 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
258 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
259
260 int target_rc = get_target_rc(action);
261
262 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
263 if (action_rsc == NULL) {
264 return;
265 }
266
267 rsc_id = ID(action_rsc);
268 CRM_CHECK(rsc_id != NULL,
269 crm_log_xml_err(action->xml, "Bad:action"); return);
270
271
272
273
274
275
276
277
278
279
280 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
281
282 crm_xml_add(state, XML_ATTR_ID, target_uuid);
283 crm_xml_add(state, XML_ATTR_UNAME, target);
284
285 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
286 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
287
288 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
289 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
290 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
291
292
293 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
294 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
295 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
296
297 pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
298 __func__);
299
300 rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
301 cib_scope_local);
302 fsa_register_cib_callback(rc, NULL, cib_action_updated);
303 free_xml(state);
304
305 crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
306 rc, action->id, task_uuid, target);
307 pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
308 }
309
310 void
311 controld_record_action_timeout(pcmk__graph_action_t *action)
312 {
313 lrmd_event_data_t *op = NULL;
314
315 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
316 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
317
318 int target_rc = get_target_rc(action);
319
320 crm_warn("%s %d: %s on %s timed out",
321 crm_element_name(action->xml), action->id, task_uuid, target);
322
323 op = synthesize_timeout_event(action, target_rc);
324 controld_record_action_event(action, op);
325 lrmd_free_event(op);
326 }
327
328
329
330
331
332
333
334
335
336
337 static int
338 execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
339 {
340
341
342
343
344
345
346 xmlNode *cmd = NULL;
347 xmlNode *rsc_op = NULL;
348
349 gboolean rc = TRUE;
350 gboolean no_wait = FALSE;
351 gboolean is_local = FALSE;
352
353 char *counter = NULL;
354 const char *task = NULL;
355 const char *value = NULL;
356 const char *on_node = NULL;
357 const char *router_node = NULL;
358 const char *task_uuid = NULL;
359
360 CRM_ASSERT(action != NULL);
361 CRM_ASSERT(action->xml != NULL);
362
363 pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
364 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
365
366 CRM_CHECK(!pcmk__str_empty(on_node),
367 crm_err("Corrupted command(id=%s) %s: no node",
368 ID(action->xml), pcmk__s(task, "without task"));
369 return pcmk_rc_node_unknown);
370
371 rsc_op = action->xml;
372 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
373 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
374 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
375
376 if (!router_node) {
377 router_node = on_node;
378 }
379
380 counter = pcmk__transition_key(controld_globals.transition_graph->id,
381 action->id, get_target_rc(action),
382 controld_globals.te_uuid);
383 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
384
385 if (pcmk__str_eq(router_node, controld_globals.our_nodename,
386 pcmk__str_casei)) {
387 is_local = TRUE;
388 }
389
390 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
391 if (crm_is_true(value)) {
392 no_wait = TRUE;
393 }
394
395 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
396 task, task_uuid, (is_local? " locally" : ""), on_node,
397 (no_wait? " without waiting" : ""), action->id);
398
399 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
400 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
401
402 if (is_local) {
403
404 ha_msg_input_t data = {
405 .msg = cmd,
406 .xml = rsc_op,
407 };
408
409 fsa_data_t msg = {
410 .id = 0,
411 .data = &data,
412 .data_type = fsa_dt_ha_msg,
413 .fsa_input = I_NULL,
414 .fsa_cause = C_FSA_INTERNAL,
415 .actions = A_LRM_INVOKE,
416 .origin = __func__,
417 };
418
419 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
420 I_NULL, &msg);
421
422 } else {
423 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
424 }
425
426 free(counter);
427 free_xml(cmd);
428
429 pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
430
431 if (rc == FALSE) {
432 crm_err("Action %d failed: send", action->id);
433 return ECOMM;
434
435 } else if (no_wait) {
436
437
438
439 crm_info("Action %d confirmed - no wait", action->id);
440 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
441 pcmk__update_graph(controld_globals.transition_graph, action);
442 trigger_graph();
443
444 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
445 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
446 action->id, task, task_uuid, on_node, action->timeout);
447 } else {
448 if (action->timeout <= 0) {
449 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
450 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
451 action->timeout = (int) graph->network_delay;
452 }
453 te_update_job_count(action, 1);
454 te_start_action_timer(graph, action);
455 }
456
457 return pcmk_rc_ok;
458 }
459
460 struct te_peer_s
461 {
462 char *name;
463 int jobs;
464 int migrate_jobs;
465 };
466
467 static void te_peer_free(gpointer p)
468 {
469 struct te_peer_s *peer = p;
470
471 free(peer->name);
472 free(peer);
473 }
474
475 void te_reset_job_counts(void)
476 {
477 GHashTableIter iter;
478 struct te_peer_s *peer = NULL;
479
480 if(te_targets == NULL) {
481 te_targets = pcmk__strkey_table(NULL, te_peer_free);
482 }
483
484 g_hash_table_iter_init(&iter, te_targets);
485 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
486 peer->jobs = 0;
487 peer->migrate_jobs = 0;
488 }
489 }
490
491 static void
492 te_update_job_count_on(const char *target, int offset, bool migrate)
493 {
494 struct te_peer_s *r = NULL;
495
496 if(target == NULL || te_targets == NULL) {
497 return;
498 }
499
500 r = g_hash_table_lookup(te_targets, target);
501 if(r == NULL) {
502 r = calloc(1, sizeof(struct te_peer_s));
503 r->name = strdup(target);
504 g_hash_table_insert(te_targets, r->name, r);
505 }
506
507 r->jobs += offset;
508 if(migrate) {
509 r->migrate_jobs += offset;
510 }
511 crm_trace("jobs[%s] = %d", target, r->jobs);
512 }
513
514 static void
515 te_update_job_count(pcmk__graph_action_t *action, int offset)
516 {
517 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
518 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
519
520 if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
521
522 return;
523 }
524
525
526
527
528
529 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
530
531 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
532 CRMD_ACTION_MIGRATED, NULL)) {
533
534 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
535 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
536
537 te_update_job_count_on(t1, offset, TRUE);
538 te_update_job_count_on(t2, offset, TRUE);
539 return;
540 } else if (target == NULL) {
541 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
542 }
543
544 te_update_job_count_on(target, offset, FALSE);
545 }
546
547
548
549
550
551
552
553
554
555
556
557 static bool
558 allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
559 const char *target)
560 {
561 int limit = 0;
562 struct te_peer_s *r = NULL;
563 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
564 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
565
566 if(target == NULL) {
567
568 return true;
569
570 } else if(te_targets == NULL) {
571 return false;
572 }
573
574 r = g_hash_table_lookup(te_targets, target);
575 limit = throttle_get_job_limit(target);
576
577 if(r == NULL) {
578 r = calloc(1, sizeof(struct te_peer_s));
579 r->name = strdup(target);
580 g_hash_table_insert(te_targets, r->name, r);
581 }
582
583 if(limit <= r->jobs) {
584 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
585 target, limit, r->jobs, id);
586 return false;
587
588 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
589 if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
590 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
591 target, graph->migration_limit, r->migrate_jobs, id);
592 return false;
593 }
594 }
595
596 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
597
598 return true;
599 }
600
601
602
603
604
605
606
607
608
609
610 static bool
611 graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
612 {
613 const char *target = NULL;
614 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
615
616 if (action->type != pcmk__rsc_graph_action) {
617
618 return true;
619 }
620
621
622
623
624
625 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
626
627 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
628 CRMD_ACTION_MIGRATED, NULL)) {
629 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
630 if (!allowed_on_node(graph, action, target)) {
631 return false;
632 }
633
634 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
635
636 } else if (target == NULL) {
637 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
638 }
639
640 return allowed_on_node(graph, action, target);
641 }
642
643
644
645
646
647
648
649 void
650 te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
651 {
652 if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
653 if ((action->type == pcmk__rsc_graph_action)
654 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
655 te_update_job_count(action, -1);
656 }
657 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
658 }
659 if (graph) {
660 pcmk__update_graph(graph, action);
661 trigger_graph();
662 }
663 }
664
665 static pcmk__graph_functions_t te_graph_fns = {
666 execute_pseudo_action,
667 execute_rsc_action,
668 execute_cluster_action,
669 controld_execute_fence_action,
670 graph_action_allowed,
671 };
672
673
674
675
676
677 void
678 controld_register_graph_functions(void)
679 {
680 pcmk__set_graph_functions(&te_graph_fns);
681 }
682
683 void
684 notify_crmd(pcmk__graph_t *graph)
685 {
686 const char *type = "unknown";
687 enum crmd_fsa_input event = I_NULL;
688
689 crm_debug("Processing transition completion in state %s",
690 fsa_state2string(controld_globals.fsa_state));
691
692 CRM_CHECK(graph->complete, graph->complete = true);
693
694 switch (graph->completion_action) {
695 case pcmk__graph_wait:
696 type = "stop";
697 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
698 event = I_TE_SUCCESS;
699 }
700 break;
701 case pcmk__graph_done:
702 type = "done";
703 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
704 event = I_TE_SUCCESS;
705 }
706 break;
707
708 case pcmk__graph_restart:
709 type = "restart";
710 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
711 if (controld_get_period_transition_timer() > 0) {
712 controld_stop_transition_timer();
713 controld_start_transition_timer();
714 } else {
715 event = I_PE_CALC;
716 }
717
718 } else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
719 controld_set_fsa_action_flags(A_PE_INVOKE);
720 controld_trigger_fsa();
721 }
722 break;
723
724 case pcmk__graph_shutdown:
725 type = "shutdown";
726 if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
727 event = I_STOP;
728
729 } else {
730 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
731 event = I_TERMINATE;
732 }
733 }
734
735 crm_debug("Transition %d status: %s - %s", graph->id, type,
736 pcmk__s(graph->abort_reason, "unspecified reason"));
737
738 graph->abort_reason = NULL;
739 graph->completion_action = pcmk__graph_done;
740
741 if (event != I_NULL) {
742 register_fsa_input(C_FSA_INTERNAL, event, NULL);
743 } else {
744 controld_trigger_fsa();
745 }
746 }