This source file includes following definitions.
- te_start_action_timer
- te_pseudo_action
- get_target_rc
- te_crm_command
- controld_record_action_timeout
- te_rsc_command
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- te_should_perform_action_on
- te_should_perform_action
- te_action_confirmed
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 char *te_uuid = NULL;
24 GHashTable *te_targets = NULL;
25 void send_rsc_command(crm_action_t * action);
26 static void te_update_job_count(crm_action_t * action, int offset);
27
28 static void
29 te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
30 {
31 action->timer = calloc(1, sizeof(crm_action_timer_t));
32 action->timer->timeout = action->timeout;
33 action->timer->action = action;
34 action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
35 action_timer_callback, (void *)action->timer);
36
37 CRM_ASSERT(action->timer->source_id != 0);
38 }
39
40 static gboolean
41 te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
42 {
43 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
44
45
46 if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
47 GHashTableIter iter;
48 crm_node_t *node = NULL;
49
50 g_hash_table_iter_init(&iter, crm_peer_cache);
51 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
52 xmlNode *cmd = NULL;
53
54 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
55 continue;
56 }
57
58 cmd = create_request(task, pseudo->xml, node->uname,
59 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
60 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
61 free_xml(cmd);
62 }
63
64 remote_ra_process_maintenance_nodes(pseudo->xml);
65 } else {
66
67 remote_ra_process_pseudo(pseudo->xml);
68 }
69
70 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
71 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
72 te_action_confirmed(pseudo, graph);
73 return TRUE;
74 }
75
76 static int
77 get_target_rc(crm_action_t * action)
78 {
79 int exit_status;
80
81 pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
82 &exit_status, 0);
83 return exit_status;
84 }
85
86 static gboolean
87 te_crm_command(crm_graph_t * graph, crm_action_t * action)
88 {
89 char *counter = NULL;
90 xmlNode *cmd = NULL;
91 gboolean is_local = FALSE;
92
93 const char *id = NULL;
94 const char *task = NULL;
95 const char *value = NULL;
96 const char *on_node = NULL;
97 const char *router_node = NULL;
98
99 gboolean rc = TRUE;
100 gboolean no_wait = FALSE;
101
102 id = ID(action->xml);
103 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
104 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
105 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
106
107 if (!router_node) {
108 router_node = on_node;
109 if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
110 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
111
112 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
113 router_node = fsa_our_uname;
114 }
115 }
116 }
117
118 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
119 crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
120 return FALSE);
121
122 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
123 is_local = TRUE;
124 }
125
126 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
127 if (crm_is_true(value)) {
128 no_wait = TRUE;
129 }
130
131 crm_info("Executing crm-event (%s)%s%s: %s on %s",
132 crm_str(id), (is_local? " locally" : ""),
133 (no_wait? " without waiting" : ""), crm_str(task), on_node);
134
135 if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
136
137 crm_info("crm-event (%s) is a local shutdown", crm_str(id));
138 graph->completion_action = tg_shutdown;
139 graph->abort_reason = "local shutdown";
140 te_action_confirmed(action, graph);
141 return TRUE;
142
143 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
144 crm_node_t *peer = crm_get_peer(0, router_node);
145
146 pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
147 }
148
149 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
150
151 counter = pcmk__transition_key(transition_graph->id, action->id,
152 get_target_rc(action), te_uuid);
153 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
154
155 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
156 free(counter);
157 free_xml(cmd);
158
159 if (rc == FALSE) {
160 crm_err("Action %d failed: send", action->id);
161 return FALSE;
162
163 } else if (no_wait) {
164 te_action_confirmed(action, graph);
165
166 } else {
167 if (action->timeout <= 0) {
168 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
169 action->id, task, on_node, action->timeout, graph->network_delay);
170 action->timeout = (int) graph->network_delay;
171 }
172 te_start_action_timer(graph, action);
173 }
174
175 return TRUE;
176 }
177
178 void
179 controld_record_action_timeout(crm_action_t *action)
180 {
181 lrmd_event_data_t *op = NULL;
182 xmlNode *state = NULL;
183 xmlNode *rsc = NULL;
184 xmlNode *xml_op = NULL;
185 xmlNode *action_rsc = NULL;
186
187 int rc = pcmk_ok;
188
189 const char *rsc_id = NULL;
190 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
191 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
192 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
193
194 int call_options = cib_quorum_override | cib_scope_local;
195 int target_rc = get_target_rc(action);
196
197 crm_warn("%s %d: %s on %s timed out",
198 crm_element_name(action->xml), action->id, task_uuid, target);
199
200 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
201 if (action_rsc == NULL) {
202 return;
203 }
204
205 rsc_id = ID(action_rsc);
206 CRM_CHECK(rsc_id != NULL,
207 crm_log_xml_err(action->xml, "Bad:action"); return);
208
209
210
211
212
213
214
215
216
217
218 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
219
220 crm_xml_add(state, XML_ATTR_UUID, target_uuid);
221 crm_xml_add(state, XML_ATTR_UNAME, target);
222
223 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
224 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
225
226 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
227 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
228 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
229
230
231 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
232 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
233 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
234
235
236
237
238
239
240 op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
241 PCMK_OCF_UNKNOWN_ERROR,
242 "Cluster communication timeout "
243 "(no response from executor)");
244 op->call_id = -1;
245 op->user_data = pcmk__transition_key(transition_graph->id, action->id,
246 target_rc, te_uuid);
247
248 xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
249 target, __func__, LOG_INFO);
250 lrmd_free_event(op);
251
252 crm_log_xml_trace(xml_op, "Action timeout");
253
254 rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
255 fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
256 free_xml(state);
257
258 crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)",
259 rc, action->id, task_uuid, target);
260 crm__set_graph_action_flags(action, pcmk__graph_action_sent_update);
261 }
262
263 static gboolean
264 te_rsc_command(crm_graph_t * graph, crm_action_t * action)
265 {
266
267
268
269
270
271
272 xmlNode *cmd = NULL;
273 xmlNode *rsc_op = NULL;
274
275 gboolean rc = TRUE;
276 gboolean no_wait = FALSE;
277 gboolean is_local = FALSE;
278
279 char *counter = NULL;
280 const char *task = NULL;
281 const char *value = NULL;
282 const char *on_node = NULL;
283 const char *router_node = NULL;
284 const char *task_uuid = NULL;
285
286 CRM_ASSERT(action != NULL);
287 CRM_ASSERT(action->xml != NULL);
288
289 crm__clear_graph_action_flags(action, pcmk__graph_action_executed);
290 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
291
292 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
293 crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
294 return FALSE);
295
296 rsc_op = action->xml;
297 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
298 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
299 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
300
301 if (!router_node) {
302 router_node = on_node;
303 }
304
305 counter = pcmk__transition_key(transition_graph->id, action->id,
306 get_target_rc(action), te_uuid);
307 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
308
309 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
310 is_local = TRUE;
311 }
312
313 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
314 if (crm_is_true(value)) {
315 no_wait = TRUE;
316 }
317
318 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
319 task, task_uuid, (is_local? " locally" : ""), on_node,
320 (no_wait? " without waiting" : ""), action->id);
321
322 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
323 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
324
325 if (is_local) {
326
327 ha_msg_input_t data = {
328 .msg = cmd,
329 .xml = rsc_op,
330 };
331
332 fsa_data_t msg = {
333 .id = 0,
334 .data = &data,
335 .data_type = fsa_dt_ha_msg,
336 .fsa_input = I_NULL,
337 .fsa_cause = C_FSA_INTERNAL,
338 .actions = A_LRM_INVOKE,
339 .origin = __func__,
340 };
341
342 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
343
344 } else {
345 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
346 }
347
348 free(counter);
349 free_xml(cmd);
350
351 crm__set_graph_action_flags(action, pcmk__graph_action_executed);
352
353 if (rc == FALSE) {
354 crm_err("Action %d failed: send", action->id);
355 return FALSE;
356
357 } else if (no_wait) {
358 crm_info("Action %d confirmed - no wait", action->id);
359 crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
360
361
362 pcmk__update_graph(transition_graph, action);
363 trigger_graph();
364
365 } else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
366 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
367 action->id, task, task_uuid, on_node, action->timeout);
368 } else {
369 if (action->timeout <= 0) {
370 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
371 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
372 action->timeout = (int) graph->network_delay;
373 }
374 te_update_job_count(action, 1);
375 te_start_action_timer(graph, action);
376 }
377
378 return TRUE;
379 }
380
381 struct te_peer_s
382 {
383 char *name;
384 int jobs;
385 int migrate_jobs;
386 };
387
388 static void te_peer_free(gpointer p)
389 {
390 struct te_peer_s *peer = p;
391
392 free(peer->name);
393 free(peer);
394 }
395
396 void te_reset_job_counts(void)
397 {
398 GHashTableIter iter;
399 struct te_peer_s *peer = NULL;
400
401 if(te_targets == NULL) {
402 te_targets = pcmk__strkey_table(NULL, te_peer_free);
403 }
404
405 g_hash_table_iter_init(&iter, te_targets);
406 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
407 peer->jobs = 0;
408 peer->migrate_jobs = 0;
409 }
410 }
411
412 static void
413 te_update_job_count_on(const char *target, int offset, bool migrate)
414 {
415 struct te_peer_s *r = NULL;
416
417 if(target == NULL || te_targets == NULL) {
418 return;
419 }
420
421 r = g_hash_table_lookup(te_targets, target);
422 if(r == NULL) {
423 r = calloc(1, sizeof(struct te_peer_s));
424 r->name = strdup(target);
425 g_hash_table_insert(te_targets, r->name, r);
426 }
427
428 r->jobs += offset;
429 if(migrate) {
430 r->migrate_jobs += offset;
431 }
432 crm_trace("jobs[%s] = %d", target, r->jobs);
433 }
434
435 static void
436 te_update_job_count(crm_action_t * action, int offset)
437 {
438 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
439 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
440
441 if (action->type != action_type_rsc || target == NULL) {
442
443 return;
444 }
445
446
447
448
449
450 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
451
452 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
453 CRMD_ACTION_MIGRATED, NULL)) {
454
455 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
456 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
457
458 te_update_job_count_on(t1, offset, TRUE);
459 te_update_job_count_on(t2, offset, TRUE);
460 return;
461 } else if (target == NULL) {
462 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
463 }
464
465 te_update_job_count_on(target, offset, FALSE);
466 }
467
468 static gboolean
469 te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
470 {
471 int limit = 0;
472 struct te_peer_s *r = NULL;
473 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
474 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
475
476 if(target == NULL) {
477
478 return TRUE;
479
480 } else if(te_targets == NULL) {
481 return FALSE;
482 }
483
484 r = g_hash_table_lookup(te_targets, target);
485 limit = throttle_get_job_limit(target);
486
487 if(r == NULL) {
488 r = calloc(1, sizeof(struct te_peer_s));
489 r->name = strdup(target);
490 g_hash_table_insert(te_targets, r->name, r);
491 }
492
493 if(limit <= r->jobs) {
494 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
495 target, limit, r->jobs, id);
496 return FALSE;
497
498 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
499 if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
500 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
501 target, graph->migration_limit, r->migrate_jobs, id);
502 return FALSE;
503 }
504 }
505
506 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
507
508 return TRUE;
509 }
510
511 static gboolean
512 te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
513 {
514 const char *target = NULL;
515 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
516
517 if (action->type != action_type_rsc) {
518
519 return TRUE;
520 }
521
522
523
524
525
526 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
527
528 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
529 CRMD_ACTION_MIGRATED, NULL)) {
530 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
531 if(te_should_perform_action_on(graph, action, target) == FALSE) {
532 return FALSE;
533 }
534
535 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
536
537 } else if (target == NULL) {
538 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
539 }
540
541 return te_should_perform_action_on(graph, action, target);
542 }
543
544
545
546
547
548
549
550 void
551 te_action_confirmed(crm_action_t *action, crm_graph_t *graph)
552 {
553 if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
554 if ((action->type == action_type_rsc)
555 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
556 te_update_job_count(action, -1);
557 }
558 crm__set_graph_action_flags(action, pcmk__graph_action_confirmed);
559 }
560 if (graph) {
561 pcmk__update_graph(graph, action);
562 trigger_graph();
563 }
564 }
565
566
567 crm_graph_functions_t te_graph_fns = {
568 te_pseudo_action,
569 te_rsc_command,
570 te_crm_command,
571 te_fence_node,
572 te_should_perform_action,
573 };
574
575 void
576 notify_crmd(crm_graph_t * graph)
577 {
578 const char *type = "unknown";
579 enum crmd_fsa_input event = I_NULL;
580
581 crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
582
583 if (graph->complete == FALSE) {
584 CRM_CHECK(graph->complete,);
585 graph->complete = TRUE;
586 }
587
588 switch (graph->completion_action) {
589 case tg_stop:
590 type = "stop";
591 if (fsa_state == S_TRANSITION_ENGINE) {
592 event = I_TE_SUCCESS;
593 }
594 break;
595 case tg_done:
596 type = "done";
597 if (fsa_state == S_TRANSITION_ENGINE) {
598 event = I_TE_SUCCESS;
599 }
600 break;
601
602 case tg_restart:
603 type = "restart";
604 if (fsa_state == S_TRANSITION_ENGINE) {
605 if (transition_timer->period_ms > 0) {
606 controld_stop_timer(transition_timer);
607 controld_start_timer(transition_timer);
608 } else {
609 event = I_PE_CALC;
610 }
611
612 } else if (fsa_state == S_POLICY_ENGINE) {
613 controld_set_fsa_action_flags(A_PE_INVOKE);
614 trigger_fsa();
615 }
616 break;
617
618 case tg_shutdown:
619 type = "shutdown";
620 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
621 event = I_STOP;
622
623 } else {
624 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
625 event = I_TERMINATE;
626 }
627 }
628
629 crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));
630
631 graph->abort_reason = NULL;
632 graph->completion_action = tg_done;
633 controld_clear_fsa_input_flags(R_IN_TRANSITION);
634
635 if (event != I_NULL) {
636 register_fsa_input(C_FSA_INTERNAL, event, NULL);
637
638 } else if (fsa_source) {
639 mainloop_set_trigger(fsa_source);
640 }
641 }