This source file includes following definitions.
- te_start_action_timer
- te_pseudo_action
- get_target_rc
- te_crm_command
- controld_record_action_timeout
- te_rsc_command
- te_peer_free
- te_reset_job_counts
- te_update_job_count_on
- te_update_job_count
- te_should_perform_action_on
- te_should_perform_action
- te_action_confirmed
- notify_crmd
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <crm/crm.h>
14 #include <crm/cib.h>
15 #include <crm/lrmd.h>
16 #include <crm/msg_xml.h>
17 #include <crm/common/xml.h>
18 #include <crm/cluster.h>
19
20 #include <pacemaker-internal.h>
21 #include <pacemaker-controld.h>
22
23 char *te_uuid = NULL;
24 GHashTable *te_targets = NULL;
25 void send_rsc_command(crm_action_t * action);
26 static void te_update_job_count(crm_action_t * action, int offset);
27
28 static void
29 te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
30 {
31 action->timer = calloc(1, sizeof(crm_action_timer_t));
32 action->timer->timeout = action->timeout;
33 action->timer->action = action;
34 action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
35 action_timer_callback, (void *)action->timer);
36
37 CRM_ASSERT(action->timer->source_id != 0);
38 }
39
40 static gboolean
41 te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
42 {
43 const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
44
45
46 if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
47 GHashTableIter iter;
48 crm_node_t *node = NULL;
49
50 g_hash_table_iter_init(&iter, crm_peer_cache);
51 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
52 xmlNode *cmd = NULL;
53
54 if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) {
55 continue;
56 }
57
58 cmd = create_request(task, pseudo->xml, node->uname,
59 CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
60 send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
61 free_xml(cmd);
62 }
63
64 remote_ra_process_maintenance_nodes(pseudo->xml);
65 } else {
66
67 remote_ra_process_pseudo(pseudo->xml);
68 }
69
70 crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
71 crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
72 te_action_confirmed(pseudo, graph);
73 return TRUE;
74 }
75
76 static int
77 get_target_rc(crm_action_t * action)
78 {
79 const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
80
81 if (target_rc_s != NULL) {
82 return crm_parse_int(target_rc_s, "0");
83 }
84 return 0;
85 }
86
87 static gboolean
88 te_crm_command(crm_graph_t * graph, crm_action_t * action)
89 {
90 char *counter = NULL;
91 xmlNode *cmd = NULL;
92 gboolean is_local = FALSE;
93
94 const char *id = NULL;
95 const char *task = NULL;
96 const char *value = NULL;
97 const char *on_node = NULL;
98 const char *router_node = NULL;
99
100 gboolean rc = TRUE;
101 gboolean no_wait = FALSE;
102
103 id = ID(action->xml);
104 task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
105 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
106 router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
107
108 if (!router_node) {
109 router_node = on_node;
110 if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
111 const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
112
113 if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
114 router_node = fsa_our_uname;
115 }
116 }
117 }
118
119 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
120 crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
121 return FALSE);
122
123 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
124 is_local = TRUE;
125 }
126
127 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
128 if (crm_is_true(value)) {
129 no_wait = TRUE;
130 }
131
132 crm_info("Executing crm-event (%s)%s%s: %s on %s",
133 crm_str(id), (is_local? " locally" : ""),
134 (no_wait? " without waiting" : ""), crm_str(task), on_node);
135
136 if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
137
138 crm_info("crm-event (%s) is a local shutdown", crm_str(id));
139 graph->completion_action = tg_shutdown;
140 graph->abort_reason = "local shutdown";
141 te_action_confirmed(action, graph);
142 return TRUE;
143
144 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
145 crm_node_t *peer = crm_get_peer(0, router_node);
146 crm_update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
147 }
148
149 cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
150
151 counter = pcmk__transition_key(transition_graph->id, action->id,
152 get_target_rc(action), te_uuid);
153 crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
154
155 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
156 free(counter);
157 free_xml(cmd);
158
159 if (rc == FALSE) {
160 crm_err("Action %d failed: send", action->id);
161 return FALSE;
162
163 } else if (no_wait) {
164 te_action_confirmed(action, graph);
165
166 } else {
167 if (action->timeout <= 0) {
168 crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
169 action->id, task, on_node, action->timeout, graph->network_delay);
170 action->timeout = (int) graph->network_delay;
171 }
172 te_start_action_timer(graph, action);
173 }
174
175 return TRUE;
176 }
177
178 void
179 controld_record_action_timeout(crm_action_t *action)
180 {
181 lrmd_event_data_t *op = NULL;
182 xmlNode *state = NULL;
183 xmlNode *rsc = NULL;
184 xmlNode *xml_op = NULL;
185 xmlNode *action_rsc = NULL;
186
187 int rc = pcmk_ok;
188
189 const char *rsc_id = NULL;
190 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
191 const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
192 const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
193
194 int call_options = cib_quorum_override | cib_scope_local;
195 int target_rc = get_target_rc(action);
196
197 crm_warn("%s %d: %s on %s timed out",
198 crm_element_name(action->xml), action->id, task_uuid, target);
199
200 action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
201 if (action_rsc == NULL) {
202 return;
203 }
204
205 rsc_id = ID(action_rsc);
206 CRM_CHECK(rsc_id != NULL,
207 crm_log_xml_err(action->xml, "Bad:action"); return);
208
209
210
211
212
213
214
215
216
217
218 state = create_xml_node(NULL, XML_CIB_TAG_STATE);
219
220 crm_xml_add(state, XML_ATTR_UUID, target_uuid);
221 crm_xml_add(state, XML_ATTR_UNAME, target);
222
223 rsc = create_xml_node(state, XML_CIB_TAG_LRM);
224 crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
225
226 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
227 rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
228 crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
229
230
231 crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
232 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
233 crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
234
235
236
237
238
239
240
241
242
243
244 op = convert_graph_action(NULL, action, PCMK_LRM_OP_TIMEOUT,
245 PCMK_OCF_UNKNOWN_ERROR);
246 op->call_id = -1;
247 op->user_data = pcmk__transition_key(transition_graph->id, action->id,
248 target_rc, te_uuid);
249
250 xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
251 target, __func__, LOG_INFO);
252 lrmd_free_event(op);
253
254 crm_log_xml_trace(xml_op, "Action timeout");
255
256 rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
257 fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
258 free_xml(state);
259
260 crm_trace("Sent CIB update (call ID %d) for timeout of action %d (%s on %s)",
261 rc, action->id, task_uuid, target);
262 action->sent_update = TRUE;
263 }
264
265 static gboolean
266 te_rsc_command(crm_graph_t * graph, crm_action_t * action)
267 {
268
269
270
271
272
273
274 xmlNode *cmd = NULL;
275 xmlNode *rsc_op = NULL;
276
277 gboolean rc = TRUE;
278 gboolean no_wait = FALSE;
279 gboolean is_local = FALSE;
280
281 char *counter = NULL;
282 const char *task = NULL;
283 const char *value = NULL;
284 const char *on_node = NULL;
285 const char *router_node = NULL;
286 const char *task_uuid = NULL;
287
288 CRM_ASSERT(action != NULL);
289 CRM_ASSERT(action->xml != NULL);
290
291 action->executed = FALSE;
292 on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
293
294 CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
295 crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
296 return FALSE);
297
298 rsc_op = action->xml;
299 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
300 task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
301 router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
302
303 if (!router_node) {
304 router_node = on_node;
305 }
306
307 counter = pcmk__transition_key(transition_graph->id, action->id,
308 get_target_rc(action), te_uuid);
309 crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
310
311 if (pcmk__str_eq(router_node, fsa_our_uname, pcmk__str_casei)) {
312 is_local = TRUE;
313 }
314
315 value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
316 if (crm_is_true(value)) {
317 no_wait = TRUE;
318 }
319
320 crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
321 task, task_uuid, (is_local? " locally" : ""), on_node,
322 (no_wait? " without waiting" : ""), action->id);
323
324 cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
325 CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
326
327 if (is_local) {
328
329 ha_msg_input_t data = {
330 .msg = cmd,
331 .xml = rsc_op,
332 };
333
334 fsa_data_t msg = {
335 .id = 0,
336 .data = &data,
337 .data_type = fsa_dt_ha_msg,
338 .fsa_input = I_NULL,
339 .fsa_cause = C_FSA_INTERNAL,
340 .actions = A_LRM_INVOKE,
341 .origin = __func__,
342 };
343
344 do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
345
346 } else {
347 rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
348 }
349
350 free(counter);
351 free_xml(cmd);
352
353 action->executed = TRUE;
354
355 if (rc == FALSE) {
356 crm_err("Action %d failed: send", action->id);
357 return FALSE;
358
359 } else if (no_wait) {
360 crm_info("Action %d confirmed - no wait", action->id);
361 action->confirmed = TRUE;
362
363
364 update_graph(transition_graph, action);
365 trigger_graph();
366
367 } else if (action->confirmed == TRUE) {
368 crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
369 action->id, task, task_uuid, on_node, action->timeout);
370 } else {
371 if (action->timeout <= 0) {
372 crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
373 action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
374 action->timeout = (int) graph->network_delay;
375 }
376 te_update_job_count(action, 1);
377 te_start_action_timer(graph, action);
378 }
379
380 return TRUE;
381 }
382
383 struct te_peer_s
384 {
385 char *name;
386 int jobs;
387 int migrate_jobs;
388 };
389
390 static void te_peer_free(gpointer p)
391 {
392 struct te_peer_s *peer = p;
393
394 free(peer->name);
395 free(peer);
396 }
397
398 void te_reset_job_counts(void)
399 {
400 GHashTableIter iter;
401 struct te_peer_s *peer = NULL;
402
403 if(te_targets == NULL) {
404 te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free);
405 }
406
407 g_hash_table_iter_init(&iter, te_targets);
408 while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
409 peer->jobs = 0;
410 peer->migrate_jobs = 0;
411 }
412 }
413
414 static void
415 te_update_job_count_on(const char *target, int offset, bool migrate)
416 {
417 struct te_peer_s *r = NULL;
418
419 if(target == NULL || te_targets == NULL) {
420 return;
421 }
422
423 r = g_hash_table_lookup(te_targets, target);
424 if(r == NULL) {
425 r = calloc(1, sizeof(struct te_peer_s));
426 r->name = strdup(target);
427 g_hash_table_insert(te_targets, r->name, r);
428 }
429
430 r->jobs += offset;
431 if(migrate) {
432 r->migrate_jobs += offset;
433 }
434 crm_trace("jobs[%s] = %d", target, r->jobs);
435 }
436
437 static void
438 te_update_job_count(crm_action_t * action, int offset)
439 {
440 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
441 const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
442
443 if (action->type != action_type_rsc || target == NULL) {
444
445 return;
446 }
447
448
449
450
451
452 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
453
454 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
455 CRMD_ACTION_MIGRATED, NULL)) {
456
457 const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
458 const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
459
460 te_update_job_count_on(t1, offset, TRUE);
461 te_update_job_count_on(t2, offset, TRUE);
462 return;
463 } else if (target == NULL) {
464 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
465 }
466
467 te_update_job_count_on(target, offset, FALSE);
468 }
469
470 static gboolean
471 te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
472 {
473 int limit = 0;
474 struct te_peer_s *r = NULL;
475 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
476 const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
477
478 if(target == NULL) {
479
480 return TRUE;
481
482 } else if(te_targets == NULL) {
483 return FALSE;
484 }
485
486 r = g_hash_table_lookup(te_targets, target);
487 limit = throttle_get_job_limit(target);
488
489 if(r == NULL) {
490 r = calloc(1, sizeof(struct te_peer_s));
491 r->name = strdup(target);
492 g_hash_table_insert(te_targets, r->name, r);
493 }
494
495 if(limit <= r->jobs) {
496 crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
497 target, limit, r->jobs, id);
498 return FALSE;
499
500 } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
501 if (pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, NULL)) {
502 crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
503 target, graph->migration_limit, r->migrate_jobs, id);
504 return FALSE;
505 }
506 }
507
508 crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
509
510 return TRUE;
511 }
512
513 static gboolean
514 te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
515 {
516 const char *target = NULL;
517 const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
518
519 if (action->type != action_type_rsc) {
520
521 return TRUE;
522 }
523
524
525
526
527
528 target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
529
530 if ((target == NULL) && pcmk__strcase_any_of(task, CRMD_ACTION_MIGRATE,
531 CRMD_ACTION_MIGRATED, NULL)) {
532 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
533 if(te_should_perform_action_on(graph, action, target) == FALSE) {
534 return FALSE;
535 }
536
537 target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
538
539 } else if (target == NULL) {
540 target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
541 }
542
543 return te_should_perform_action_on(graph, action, target);
544 }
545
546
547
548
549
550
551
552 void
553 te_action_confirmed(crm_action_t *action, crm_graph_t *graph)
554 {
555 if (action->confirmed == FALSE) {
556 if ((action->type == action_type_rsc)
557 && (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
558 te_update_job_count(action, -1);
559 }
560 action->confirmed = TRUE;
561 }
562 if (graph) {
563 update_graph(graph, action);
564 trigger_graph();
565 }
566 }
567
568
569 crm_graph_functions_t te_graph_fns = {
570 te_pseudo_action,
571 te_rsc_command,
572 te_crm_command,
573 te_fence_node,
574 te_should_perform_action,
575 };
576
577 void
578 notify_crmd(crm_graph_t * graph)
579 {
580 const char *type = "unknown";
581 enum crmd_fsa_input event = I_NULL;
582
583 crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
584
585 if (graph->complete == FALSE) {
586 CRM_CHECK(graph->complete,);
587 graph->complete = TRUE;
588 }
589
590 switch (graph->completion_action) {
591 case tg_stop:
592 type = "stop";
593 if (fsa_state == S_TRANSITION_ENGINE) {
594 event = I_TE_SUCCESS;
595 }
596 break;
597 case tg_done:
598 type = "done";
599 if (fsa_state == S_TRANSITION_ENGINE) {
600 event = I_TE_SUCCESS;
601 }
602 break;
603
604 case tg_restart:
605 type = "restart";
606 if (fsa_state == S_TRANSITION_ENGINE) {
607 if (transition_timer->period_ms > 0) {
608 controld_stop_timer(transition_timer);
609 controld_start_timer(transition_timer);
610 } else {
611 event = I_PE_CALC;
612 }
613
614 } else if (fsa_state == S_POLICY_ENGINE) {
615 controld_set_fsa_action_flags(A_PE_INVOKE);
616 trigger_fsa();
617 }
618 break;
619
620 case tg_shutdown:
621 type = "shutdown";
622 if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
623 event = I_STOP;
624
625 } else {
626 crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
627 event = I_TERMINATE;
628 }
629 }
630
631 crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));
632
633 graph->abort_reason = NULL;
634 graph->completion_action = tg_done;
635 controld_clear_fsa_input_flags(R_IN_TRANSITION);
636
637 if (event != I_NULL) {
638 register_fsa_input(C_FSA_INTERNAL, event, NULL);
639
640 } else if (fsa_source) {
641 mainloop_set_trigger(fsa_source);
642 }
643 }