This source file includes following definitions.
- te_start_action_timer
- execute_pseudo_action
- get_target_rc
- execute_cluster_action
- synthesize_timeout_event
- controld_record_action_event
- controld_record_action_timeout
- execute_rsc_action
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- allowed_on_node
- graph_action_allowed
- te_action_confirmed
- controld_register_graph_functions
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 static GHashTable *te_targets = NULL;
24 void send_rsc_command(pcmk__graph_action_t *action);
25 static void te_update_job_count(pcmk__graph_action_t *action, int offset);
26
27 static void
28 te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
29 {
30 action->timer = g_timeout_add(action->timeout + graph->network_delay,
31 action_timer_callback, (void *) action);
32 CRM_ASSERT(action->timer != 0);
33 }
34
35
36
37
38
39
40
41
42
43
44 static int
45 execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
46 {
47 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
48
49
50 if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) {
51 GHashTableIter iter;
52 crm_node_t *node = NULL;
53
54 g_hash_table_iter_init(&iter, crm_peer_cache);
55 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
56 xmlNode *cmd = NULL;
57
58 if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
59 pcmk__str_casei)) {
60 continue;
61 }
62
63 cmd = create_request(task, pseudo->xml, node->uname,
64 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
65 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
66 free_xml(cmd);
67 }
68
69 remote_ra_process_maintenance_nodes(pseudo->xml);
70 } else {
71
72 remote_ra_process_pseudo(pseudo->xml);
73 }
74
75 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
76 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
77 te_action_confirmed(pseudo, graph);
78 return pcmk_rc_ok;
79 }
80
81 static int
82 get_target_rc(pcmk__graph_action_t *action)
83 {
84 int exit_status;
85
86 pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
87 &exit_status, 0);
88 return exit_status;
89 }
90
91
92
93
94
95
96
97
98
99
100 static int
101 execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
102 {
103 char *counter = NULL;
104 xmlNode *cmd = NULL;
105 gboolean is_local = FALSE;
106
107 const char *id = NULL;
108 const char *task = NULL;
109 const char *value = NULL;
110 const char *on_node = NULL;
111 const char *router_node = NULL;
112
113 gboolean rc = TRUE;
114 gboolean no_wait = FALSE;
115
116 id = ID(action->xml);
117 CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
118
119 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
120 CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
121
122 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
123 CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
124
125 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
126 if (router_node == NULL) {
127 router_node = on_node;
128 if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
129 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
130
131 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
132 router_node = controld_globals.our_nodename;
133 }
134 }
135 }
136
137 if (pcmk__str_eq(router_node, controld_globals.our_nodename,
138 pcmk__str_casei)) {
139 is_local = TRUE;
140 }
141
142 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
143 if (crm_is_true(value)) {
144 no_wait = TRUE;
145 }
146
147 crm_info("Handling controller request '%s' (%s on %s)%s%s",
148 id, task, on_node, (is_local? " locally" : ""),
149 (no_wait? " without waiting" : ""));
150
151 if (is_local
152 && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
153
154 crm_info("Controller request '%s' is a local shutdown", id);
155 graph->completion_action = pcmk__graph_shutdown;
156 graph->abort_reason = "local shutdown";
157 te_action_confirmed(action, graph);
158 return pcmk_rc_ok;
159
160 } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
161 crm_node_t *peer = crm_get_peer(0, router_node);
162
163 pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
164 }
165
166 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
167
168 counter = pcmk__transition_key(controld_globals.transition_graph->id,
169 action->id, get_target_rc(action),
170 controld_globals.te_uuid);
171 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
172
173 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
174 free(counter);
175 free_xml(cmd);
176
177 if (rc == FALSE) {
178 crm_err("Action %d failed: send", action->id);
179 return ECOMM;
180
181 } else if (no_wait) {
182 te_action_confirmed(action, graph);
183
184 } else {
185 if (action->timeout <= 0) {
186 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
187 action->id, task, on_node, action->timeout, graph->network_delay);
188 action->timeout = (int) graph->network_delay;
189 }
190 te_start_action_timer(graph, action);
191 }
192
193 return pcmk_rc_ok;
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212 static lrmd_event_data_t *
213 synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
214 {
215 lrmd_event_data_t *op = NULL;
216 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
217 const char *reason = NULL;
218 char *dynamic_reason = NULL;
219
220 if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
221 reason = "Local executor did not return result in time";
222 } else {
223 const char *router_node = NULL;
224
225 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
226 if (router_node == NULL) {
227 router_node = target;
228 }
229 dynamic_reason = crm_strdup_printf("Controller on %s did not return "
230 "result in time", router_node);
231 reason = dynamic_reason;
232 }
233
234 op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
235 PCMK_OCF_UNKNOWN_ERROR, reason);
236 op->call_id = -1;
237 op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
238 action->id, target_rc,
239 controld_globals.te_uuid);
240 free(dynamic_reason);
241 return op;
242 }
243
244 static void
245 controld_record_action_event(pcmk__graph_action_t *action,
246 lrmd_event_data_t *op)
247 {
248 cib_t *cib_conn = controld_globals.cib_conn;
249
250 xmlNode *state = NULL;
251 xmlNode *rsc = NULL;
252 xmlNode *action_rsc = NULL;
253
254 int rc = pcmk_ok;
255
256 const char *rsc_id = NULL;
257 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
258 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
259 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
260
261 int target_rc = get_target_rc(action);
262
263 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
264 if (action_rsc == NULL) {
265 return;
266 }
267
268 rsc_id = ID(action_rsc);
269 CRM_CHECK(rsc_id != NULL,
270 crm_log_xml_err(action->xml, "Bad:action"); return);
271
272
273
274
275
276
277
278
279
280
281 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
282
283 crm_xml_add(state, XML_ATTR_ID, target_uuid);
284 crm_xml_add(state, XML_ATTR_UNAME, target);
285
286 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
287 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
288
289 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
290 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
291 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
292
293
294 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
295 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
296 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
297
298 pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
299 __func__);
300
301 rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
302 cib_scope_local);
303 fsa_register_cib_callback(rc, NULL, cib_action_updated);
304 free_xml(state);
305
306 crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
307 rc, action->id, task_uuid, target);
308 pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
309 }
310
311 void
312 controld_record_action_timeout(pcmk__graph_action_t *action)
313 {
314 lrmd_event_data_t *op = NULL;
315
316 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
317 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
318
319 int target_rc = get_target_rc(action);
320
321 crm_warn("%s %d: %s on %s timed out",
322 action->xml->name, action->id, task_uuid, target);
323
324 op = synthesize_timeout_event(action, target_rc);
325 controld_record_action_event(action, op);
326 lrmd_free_event(op);
327 }
328
329
330
331
332
333
334
335
336
337
338 static int
339 execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
340 {
341
342
343
344
345
346
347 xmlNode *cmd = NULL;
348 xmlNode *rsc_op = NULL;
349
350 gboolean rc = TRUE;
351 gboolean no_wait = FALSE;
352 gboolean is_local = FALSE;
353
354 char *counter = NULL;
355 const char *task = NULL;
356 const char *value = NULL;
357 const char *on_node = NULL;
358 const char *router_node = NULL;
359 const char *task_uuid = NULL;
360
361 CRM_ASSERT(action != NULL);
362 CRM_ASSERT(action->xml != NULL);
363
364 pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
365 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
366
367 CRM_CHECK(!pcmk__str_empty(on_node),
368 crm_err("Corrupted command(id=%s) %s: no node",
369 ID(action->xml), pcmk__s(task, "without task"));
370 return pcmk_rc_node_unknown);
371
372 rsc_op = action->xml;
373 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
374 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
375 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
376
377 if (!router_node) {
378 router_node = on_node;
379 }
380
381 counter = pcmk__transition_key(controld_globals.transition_graph->id,
382 action->id, get_target_rc(action),
383 controld_globals.te_uuid);
384 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
385
386 if (pcmk__str_eq(router_node, controld_globals.our_nodename,
387 pcmk__str_casei)) {
388 is_local = TRUE;
389 }
390
391 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
392 if (crm_is_true(value)) {
393 no_wait = TRUE;
394 }
395
396 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
397 task, task_uuid, (is_local? " locally" : ""), on_node,
398 (no_wait? " without waiting" : ""), action->id);
399
400 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
401 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
402
403 if (is_local) {
404
405 ha_msg_input_t data = {
406 .msg = cmd,
407 .xml = rsc_op,
408 };
409
410 fsa_data_t msg = {
411 .id = 0,
412 .data = &data,
413 .data_type = fsa_dt_ha_msg,
414 .fsa_input = I_NULL,
415 .fsa_cause = C_FSA_INTERNAL,
416 .actions = A_LRM_INVOKE,
417 .origin = __func__,
418 };
419
420 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
421 I_NULL, &msg);
422
423 } else {
424 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
425 }
426
427 free(counter);
428 free_xml(cmd);
429
430 pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
431
432 if (rc == FALSE) {
433 crm_err("Action %d failed: send", action->id);
434 return ECOMM;
435
436 } else if (no_wait) {
437
438
439
440 crm_info("Action %d confirmed - no wait", action->id);
441 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
442 pcmk__update_graph(controld_globals.transition_graph, action);
443 trigger_graph();
444
445 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
446 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
447 action->id, task, task_uuid, on_node, action->timeout);
448 } else {
449 if (action->timeout <= 0) {
450 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
451 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
452 action->timeout = (int) graph->network_delay;
453 }
454 te_update_job_count(action, 1);
455 te_start_action_timer(graph, action);
456 }
457
458 return pcmk_rc_ok;
459 }
460
461 struct te_peer_s
462 {
463 char *name;
464 int jobs;
465 int migrate_jobs;
466 };
467
468 static void te_peer_free(gpointer p)
469 {
470 struct te_peer_s *peer = p;
471
472 free(peer->name);
473 free(peer);
474 }
475
476 void te_reset_job_counts(void)
477 {
478 GHashTableIter iter;
479 struct te_peer_s *peer = NULL;
480
481 if(te_targets == NULL) {
482 te_targets = pcmk__strkey_table(NULL, te_peer_free);
483 }
484
485 g_hash_table_iter_init(&iter, te_targets);
486 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
487 peer->jobs = 0;
488 peer->migrate_jobs = 0;
489 }
490 }
491
492 static void
493 te_update_job_count_on(const char *target, int offset, bool migrate)
494 {
495 struct te_peer_s *r = NULL;
496
497 if(target == NULL || te_targets == NULL) {
498 return;
499 }
500
501 r = g_hash_table_lookup(te_targets, target);
502 if(r == NULL) {
503 r = calloc(1, sizeof(struct te_peer_s));
504 r->name = strdup(target);
505 g_hash_table_insert(te_targets, r->name, r);
506 }
507
508 r->jobs += offset;
509 if(migrate) {
510 r->migrate_jobs += offset;
511 }
512 crm_trace("jobs[%s] = %d", target, r->jobs);
513 }
514
515 static void
516 te_update_job_count(pcmk__graph_action_t *action, int offset)
517 {
518 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
519 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
520
521 if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
522
523 return;
524 }
525
526
527
528
529
530 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
531
532 if ((target == NULL)
533 && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
534 PCMK_ACTION_MIGRATE_FROM, NULL)) {
535 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
536 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
537
538 te_update_job_count_on(t1, offset, TRUE);
539 te_update_job_count_on(t2, offset, TRUE);
540 return;
541 } else if (target == NULL) {
542 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
543 }
544
545 te_update_job_count_on(target, offset, FALSE);
546 }
547
548
549
550
551
552
553
554
555
556
557
558 static bool
559 allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
560 const char *target)
561 {
562 int limit = 0;
563 struct te_peer_s *r = NULL;
564 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
565 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
566
567 if(target == NULL) {
568
569 return true;
570
571 } else if(te_targets == NULL) {
572 return false;
573 }
574
575 r = g_hash_table_lookup(te_targets, target);
576 limit = throttle_get_job_limit(target);
577
578 if(r == NULL) {
579 r = calloc(1, sizeof(struct te_peer_s));
580 r->name = strdup(target);
581 g_hash_table_insert(te_targets, r->name, r);
582 }
583
584 if(limit <= r->jobs) {
585 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
586 target, limit, r->jobs, id);
587 return false;
588
589 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
590 if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
591 PCMK_ACTION_MIGRATE_FROM, NULL)) {
592 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
593 target, graph->migration_limit, r->migrate_jobs, id);
594 return false;
595 }
596 }
597
598 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
599
600 return true;
601 }
602
603
604
605
606
607
608
609
610
611
612 static bool
613 graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
614 {
615 const char *target = NULL;
616 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
617
618 if (action->type != pcmk__rsc_graph_action) {
619
620 return true;
621 }
622
623
624
625
626
627 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
628
629 if ((target == NULL)
630 && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
631 PCMK_ACTION_MIGRATE_FROM, NULL)) {
632 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
633 if (!allowed_on_node(graph, action, target)) {
634 return false;
635 }
636
637 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
638
639 } else if (target == NULL) {
640 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
641 }
642
643 return allowed_on_node(graph, action, target);
644 }
645
646
647
648
649
650
651
652 void
653 te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
654 {
655 if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
656 if ((action->type == pcmk__rsc_graph_action)
657 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
658 te_update_job_count(action, -1);
659 }
660 pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
661 }
662 if (graph) {
663 pcmk__update_graph(graph, action);
664 trigger_graph();
665 }
666 }
667
668 static pcmk__graph_functions_t te_graph_fns = {
669 execute_pseudo_action,
670 execute_rsc_action,
671 execute_cluster_action,
672 controld_execute_fence_action,
673 graph_action_allowed,
674 };
675
676
677
678
679
680 void
681 controld_register_graph_functions(void)
682 {
683 pcmk__set_graph_functions(&te_graph_fns);
684 }
685
686 void
687 notify_crmd(pcmk__graph_t *graph)
688 {
689 const char *type = "unknown";
690 enum crmd_fsa_input event = I_NULL;
691
692 crm_debug("Processing transition completion in state %s",
693 fsa_state2string(controld_globals.fsa_state));
694
695 CRM_CHECK(graph->complete, graph->complete = true);
696
697 switch (graph->completion_action) {
698 case pcmk__graph_wait:
699 type = "stop";
700 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
701 event = I_TE_SUCCESS;
702 }
703 break;
704 case pcmk__graph_done:
705 type = "done";
706 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
707 event = I_TE_SUCCESS;
708 }
709 break;
710
711 case pcmk__graph_restart:
712 type = "restart";
713 if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
714 if (controld_get_period_transition_timer() > 0) {
715 controld_stop_transition_timer();
716 controld_start_transition_timer();
717 } else {
718 event = I_PE_CALC;
719 }
720
721 } else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
722 controld_set_fsa_action_flags(A_PE_INVOKE);
723 controld_trigger_fsa();
724 }
725 break;
726
727 case pcmk__graph_shutdown:
728 type = "shutdown";
729 if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
730 event = I_STOP;
731
732 } else {
733 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
734 event = I_TERMINATE;
735 }
736 }
737
738 crm_debug("Transition %d status: %s - %s", graph->id, type,
739 pcmk__s(graph->abort_reason, "unspecified reason"));
740
741 graph->abort_reason = NULL;
742 graph->completion_action = pcmk__graph_done;
743
744 if (event != I_NULL) {
745 register_fsa_input(C_FSA_INTERNAL, event, NULL);
746 } else {
747 controld_trigger_fsa();
748 }
749 }