This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- controld_execute_remote_agent
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/lrmd_internal.h>
17 #include <crm/services.h>
18
19 #include <pacemaker-controld.h>
20
21 #define REMOTE_LRMD_RA "remote"
22
23
24 #define MAX_START_TIMEOUT_MS 10000
25
26 typedef struct remote_ra_cmd_s {
27
28 char *owner;
29
30 char *rsc_id;
31
32 char *action;
33
34 char *userdata;
35
36 int start_delay;
37
38 int delay_id;
39
40 int timeout;
41 int remaining_timeout;
42
43 guint interval_ms;
44
45 int interval_id;
46 int reported_success;
47 int monitor_timeout_id;
48 int takeover_timeout_id;
49
50 lrmd_key_value_t *params;
51 pcmk__action_result_t result;
52 int call_id;
53 time_t start_time;
54 gboolean cancel;
55 } remote_ra_cmd_t;
56
57 enum remote_migration_status {
58 expect_takeover = 1,
59 takeover_complete,
60 };
61
62 typedef struct remote_ra_data_s {
63 crm_trigger_t *work;
64 remote_ra_cmd_t *cur_cmd;
65 GList *cmds;
66 GList *recurring_cmds;
67
68 enum remote_migration_status migrate_status;
69
70 gboolean active;
71
72
73
74
75 gboolean is_maintenance;
76
77
78
79
80
81
82
83 gboolean controlling_guest;
84 } remote_ra_data_t;
85
86 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
87 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
88 static GList *fail_all_monitor_cmds(GList * list);
89
90 static void
91 free_cmd(gpointer user_data)
92 {
93 remote_ra_cmd_t *cmd = user_data;
94
95 if (!cmd) {
96 return;
97 }
98 if (cmd->delay_id) {
99 g_source_remove(cmd->delay_id);
100 }
101 if (cmd->interval_id) {
102 g_source_remove(cmd->interval_id);
103 }
104 if (cmd->monitor_timeout_id) {
105 g_source_remove(cmd->monitor_timeout_id);
106 }
107 if (cmd->takeover_timeout_id) {
108 g_source_remove(cmd->takeover_timeout_id);
109 }
110 free(cmd->owner);
111 free(cmd->rsc_id);
112 free(cmd->action);
113 free(cmd->userdata);
114 pcmk__reset_result(&(cmd->result));
115 lrmd_key_value_freeall(cmd->params);
116 free(cmd);
117 }
118
119 static int
120 generate_callid(void)
121 {
122 static int remote_ra_callid = 0;
123
124 remote_ra_callid++;
125 if (remote_ra_callid <= 0) {
126 remote_ra_callid = 1;
127 }
128
129 return remote_ra_callid;
130 }
131
132 static gboolean
133 recurring_helper(gpointer data)
134 {
135 remote_ra_cmd_t *cmd = data;
136 lrm_state_t *connection_rsc = NULL;
137
138 cmd->interval_id = 0;
139 connection_rsc = lrm_state_find(cmd->rsc_id);
140 if (connection_rsc && connection_rsc->remote_ra_data) {
141 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
142
143 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
144
145 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
146 mainloop_set_trigger(ra_data->work);
147 }
148 return FALSE;
149 }
150
151 static gboolean
152 start_delay_helper(gpointer data)
153 {
154 remote_ra_cmd_t *cmd = data;
155 lrm_state_t *connection_rsc = NULL;
156
157 cmd->delay_id = 0;
158 connection_rsc = lrm_state_find(cmd->rsc_id);
159 if (connection_rsc && connection_rsc->remote_ra_data) {
160 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
161
162 mainloop_set_trigger(ra_data->work);
163 }
164 return FALSE;
165 }
166
167
168
169
170
171
172
173 static void
174 remote_node_up(const char *node_name)
175 {
176 int call_opt, call_id = 0;
177 xmlNode *update, *state;
178 crm_node_t *node;
179 enum controld_section_e section = controld_section_all;
180
181 CRM_CHECK(node_name != NULL, return);
182 crm_info("Announcing Pacemaker Remote node %s", node_name);
183
184
185
186
187
188
189 call_opt = crmd_cib_smart_opt();
190 if (controld_shutdown_lock_enabled) {
191 section = controld_section_all_unlocked;
192 }
193
194 update_attrd_remote_node_removed(node_name, NULL);
195
196 controld_delete_node_state(node_name, section, call_opt);
197
198
199 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
200
201
202 node = crm_remote_peer_get(node_name);
203 CRM_CHECK(node != NULL, return);
204 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
205
206
207
208
209
210
211
212 send_remote_state_message(node_name, TRUE);
213
214 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
215 state = create_node_state_update(node, node_update_cluster, update,
216 __func__);
217
218
219
220
221
222 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
223
224
225
226
227
228
229
230
231 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
232 if (call_id < 0) {
233 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
234 }
235 free_xml(update);
236 }
237
238 enum down_opts {
239 DOWN_KEEP_LRM,
240 DOWN_ERASE_LRM
241 };
242
243
244
245
246
247
248
249
250 static void
251 remote_node_down(const char *node_name, const enum down_opts opts)
252 {
253 xmlNode *update;
254 int call_id = 0;
255 int call_opt = crmd_cib_smart_opt();
256 crm_node_t *node;
257
258
259 update_attrd_remote_node_removed(node_name, NULL);
260
261
262
263
264
265
266 if (opts == DOWN_ERASE_LRM) {
267 controld_delete_node_state(node_name, controld_section_all, call_opt);
268 } else {
269 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
270 }
271
272
273 node = crm_remote_peer_get(node_name);
274 CRM_CHECK(node != NULL, return);
275 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
276
277
278 send_remote_state_message(node_name, FALSE);
279
280
281 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
282 create_node_state_update(node, node_update_cluster, update, __func__);
283 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
284 if (call_id < 0) {
285 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
286 }
287 free_xml(update);
288 }
289
290
291
292
293
294
295
296 static void
297 check_remote_node_state(remote_ra_cmd_t *cmd)
298 {
299
300 if (cmd->result.exit_status != PCMK_OCF_OK) {
301 return;
302 }
303
304 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
305 remote_node_up(cmd->rsc_id);
306
307 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
308
309
310
311
312
313
314
315 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
316
317 CRM_CHECK(node != NULL, return);
318 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
319
320 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
321 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
322 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
323
324 if (ra_data) {
325 if (ra_data->migrate_status != takeover_complete) {
326
327 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
328 } else if (AM_I_DC == FALSE) {
329
330
331
332
333 crm_remote_peer_cache_remove(cmd->rsc_id);
334 }
335 }
336 }
337
338
339
340
341
342
343
344
345
346
347 }
348
349 static void
350 report_remote_ra_result(remote_ra_cmd_t * cmd)
351 {
352 lrmd_event_data_t op = { 0, };
353
354 check_remote_node_state(cmd);
355
356 op.type = lrmd_event_exec_complete;
357 op.rsc_id = cmd->rsc_id;
358 op.op_type = cmd->action;
359 op.user_data = cmd->userdata;
360 op.timeout = cmd->timeout;
361 op.interval_ms = cmd->interval_ms;
362 op.t_run = (unsigned int) cmd->start_time;
363 op.t_rcchange = (unsigned int) cmd->start_time;
364
365 lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
366 cmd->result.exit_reason);
367
368 if (cmd->reported_success && (cmd->result.exit_status != PCMK_OCF_OK)) {
369 op.t_rcchange = (unsigned int) time(NULL);
370
371
372
373
374
375
376
377
378
379 if (op.t_rcchange == op.t_run) {
380 op.t_rcchange++;
381 }
382 }
383
384 if (cmd->params) {
385 lrmd_key_value_t *tmp;
386
387 op.params = pcmk__strkey_table(free, free);
388 for (tmp = cmd->params; tmp; tmp = tmp->next) {
389 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
390 }
391
392 }
393 op.call_id = cmd->call_id;
394 op.remote_nodename = cmd->owner;
395
396 lrm_op_callback(&op);
397
398 if (op.params) {
399 g_hash_table_destroy(op.params);
400 }
401 lrmd__reset_result(&op);
402 }
403
404 static void
405 update_remaining_timeout(remote_ra_cmd_t * cmd)
406 {
407 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
408 }
409
410 static gboolean
411 retry_start_cmd_cb(gpointer data)
412 {
413 lrm_state_t *lrm_state = data;
414 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
415 remote_ra_cmd_t *cmd = NULL;
416 int rc = ETIME;
417
418 if (!ra_data || !ra_data->cur_cmd) {
419 return FALSE;
420 }
421 cmd = ra_data->cur_cmd;
422 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
423 return FALSE;
424 }
425 update_remaining_timeout(cmd);
426
427 if (cmd->remaining_timeout > 0) {
428 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
429 } else {
430 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
431 PCMK_EXEC_TIMEOUT,
432 "Not enough time remains to retry remote connection");
433 }
434
435 if (rc != pcmk_rc_ok) {
436 report_remote_ra_result(cmd);
437
438 if (ra_data->cmds) {
439 mainloop_set_trigger(ra_data->work);
440 }
441 ra_data->cur_cmd = NULL;
442 free_cmd(cmd);
443 } else {
444
445 }
446
447 return FALSE;
448 }
449
450
451 static gboolean
452 connection_takeover_timeout_cb(gpointer data)
453 {
454 lrm_state_t *lrm_state = NULL;
455 remote_ra_cmd_t *cmd = data;
456
457 crm_info("takeover event timed out for node %s", cmd->rsc_id);
458 cmd->takeover_timeout_id = 0;
459
460 lrm_state = lrm_state_find(cmd->rsc_id);
461
462 handle_remote_ra_stop(lrm_state, cmd);
463 free_cmd(cmd);
464
465 return FALSE;
466 }
467
468 static gboolean
469 monitor_timeout_cb(gpointer data)
470 {
471 lrm_state_t *lrm_state = NULL;
472 remote_ra_cmd_t *cmd = data;
473
474 lrm_state = lrm_state_find(cmd->rsc_id);
475
476 crm_info("Timed out waiting for remote poke response from %s%s",
477 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
478 cmd->monitor_timeout_id = 0;
479 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
480 "Remote executor did not respond");
481
482 if (lrm_state && lrm_state->remote_ra_data) {
483 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
484
485 if (ra_data->cur_cmd == cmd) {
486 ra_data->cur_cmd = NULL;
487 }
488 if (ra_data->cmds) {
489 mainloop_set_trigger(ra_data->work);
490 }
491 }
492
493 report_remote_ra_result(cmd);
494 free_cmd(cmd);
495
496 if(lrm_state) {
497 lrm_state_disconnect(lrm_state);
498 }
499 return FALSE;
500 }
501
502 static void
503 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
504 {
505 lrmd_event_data_t op = { 0, };
506
507 if (lrm_state == NULL) {
508
509 lrm_state = lrm_state_find(fsa_our_uname);
510 }
511 CRM_ASSERT(lrm_state != NULL);
512
513 op.type = lrmd_event_exec_complete;
514 op.rsc_id = rsc_id;
515 op.op_type = op_type;
516 op.t_run = (unsigned int) time(NULL);
517 op.t_rcchange = op.t_run;
518 op.call_id = generate_callid();
519 lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
520 process_lrm_event(lrm_state, &op, NULL, NULL);
521 }
522
523 void
524 remote_lrm_op_callback(lrmd_event_data_t * op)
525 {
526 gboolean cmd_handled = FALSE;
527 lrm_state_t *lrm_state = NULL;
528 remote_ra_data_t *ra_data = NULL;
529 remote_ra_cmd_t *cmd = NULL;
530
531 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
532 "(%d) status=%s (%d)",
533 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
534 lrmd_event_type2str(op->type), op->remote_nodename,
535 services_ocf_exitcode_str(op->rc), op->rc,
536 pcmk_exec_status_str(op->op_status), op->op_status);
537
538 lrm_state = lrm_state_find(op->remote_nodename);
539 if (!lrm_state || !lrm_state->remote_ra_data) {
540 crm_debug("No state information found for remote connection event");
541 return;
542 }
543 ra_data = lrm_state->remote_ra_data;
544
545 if (op->type == lrmd_event_new_client) {
546
547
548 if (ra_data->migrate_status == expect_takeover) {
549
550 ra_data->migrate_status = takeover_complete;
551
552 } else {
553 crm_err("Disconnecting from Pacemaker Remote node %s due to "
554 "unexpected client takeover", op->remote_nodename);
555
556
557
558 lrm_state_disconnect_only(lrm_state);
559 }
560 return;
561 }
562
563
564 if (op->type == lrmd_event_exec_complete) {
565 if (ra_data->migrate_status == takeover_complete) {
566 crm_debug("ignoring event, this connection is taken over by another node");
567 } else {
568 lrm_op_callback(op);
569 }
570 return;
571 }
572
573 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
574
575 if (ra_data->active == FALSE) {
576 crm_debug("Disconnection from Pacemaker Remote node %s complete",
577 lrm_state->node_name);
578
579 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
580 crm_err("Lost connection to Pacemaker Remote node %s",
581 lrm_state->node_name);
582 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
583 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
584
585 } else {
586 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
587 lrm_state->node_name);
588
589 handle_remote_ra_stop(lrm_state, NULL);
590 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
591
592 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
593 }
594 return;
595 }
596
597 if (!ra_data->cur_cmd) {
598 crm_debug("no event to match");
599 return;
600 }
601
602 cmd = ra_data->cur_cmd;
603
604
605
606 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
607 "migrate_from", NULL)) {
608 if (op->connection_rc < 0) {
609 update_remaining_timeout(cmd);
610
611 if (op->connection_rc == -ENOKEY) {
612
613 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
614 PCMK_EXEC_ERROR,
615 "Authentication key not readable");
616
617 } else if (cmd->remaining_timeout > 3000) {
618 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
619 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
620 return;
621
622 } else {
623 crm_trace("can't reschedule start, remaining timeout too small %d",
624 cmd->remaining_timeout);
625 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
626 PCMK_EXEC_TIMEOUT,
627 pcmk_strerror(op->connection_rc));
628 }
629
630 } else {
631 lrm_state_reset_tables(lrm_state, TRUE);
632 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
633 ra_data->active = TRUE;
634 }
635
636 crm_debug("Remote connection event matched %s action", cmd->action);
637 report_remote_ra_result(cmd);
638 cmd_handled = TRUE;
639
640 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
641
642 if (cmd->monitor_timeout_id) {
643 g_source_remove(cmd->monitor_timeout_id);
644 cmd->monitor_timeout_id = 0;
645 }
646
647
648
649
650 if (!cmd->reported_success) {
651 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
652 report_remote_ra_result(cmd);
653 cmd->reported_success = 1;
654 }
655
656 crm_debug("Remote poke event matched %s action", cmd->action);
657
658
659 if (cmd->interval_ms && (cmd->cancel == FALSE)) {
660 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
661 cmd->interval_id = g_timeout_add(cmd->interval_ms,
662 recurring_helper, cmd);
663 cmd = NULL;
664 }
665 cmd_handled = TRUE;
666
667 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
668 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
669 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
670 PCMK_EXEC_ERROR,
671 "Remote connection unexpectedly dropped "
672 "during monitor");
673 report_remote_ra_result(cmd);
674 crm_err("Remote connection to %s unexpectedly dropped during monitor",
675 lrm_state->node_name);
676 }
677 cmd_handled = TRUE;
678
679 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
680
681 handle_remote_ra_stop(lrm_state, cmd);
682 cmd_handled = TRUE;
683
684 } else {
685 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
686 }
687
688 if (cmd_handled) {
689 ra_data->cur_cmd = NULL;
690 if (ra_data->cmds) {
691 mainloop_set_trigger(ra_data->work);
692 }
693 free_cmd(cmd);
694 }
695 }
696
697 static void
698 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
699 {
700 remote_ra_data_t *ra_data = NULL;
701
702 CRM_ASSERT(lrm_state);
703 ra_data = lrm_state->remote_ra_data;
704
705 if (ra_data->migrate_status != takeover_complete) {
706
707 g_hash_table_remove_all(lrm_state->pending_ops);
708 } else {
709
710
711 lrm_state_reset_tables(lrm_state, FALSE);
712 }
713
714 ra_data->active = FALSE;
715 lrm_state_disconnect(lrm_state);
716
717 if (ra_data->cmds) {
718 g_list_free_full(ra_data->cmds, free_cmd);
719 }
720 if (ra_data->recurring_cmds) {
721 g_list_free_full(ra_data->recurring_cmds, free_cmd);
722 }
723 ra_data->cmds = NULL;
724 ra_data->recurring_cmds = NULL;
725 ra_data->cur_cmd = NULL;
726
727 if (cmd) {
728 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
729 report_remote_ra_result(cmd);
730 }
731 }
732
733
734 static int
735 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
736 {
737 const char *server = NULL;
738 lrmd_key_value_t *tmp = NULL;
739 int port = 0;
740 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
741 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
742 int rc = pcmk_rc_ok;
743
744 for (tmp = cmd->params; tmp; tmp = tmp->next) {
745 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
746 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
747 server = tmp->value;
748 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
749 port = atoi(tmp->value);
750 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
751 ra_data->controlling_guest = TRUE;
752 }
753 }
754
755 rc = controld_connect_remote_executor(lrm_state, server, port,
756 timeout_used);
757 if (rc != pcmk_rc_ok) {
758 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
759 PCMK_EXEC_ERROR, pcmk_rc_str(rc));
760 }
761 return rc;
762 }
763
764 static gboolean
765 handle_remote_ra_exec(gpointer user_data)
766 {
767 int rc = 0;
768 lrm_state_t *lrm_state = user_data;
769 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
770 remote_ra_cmd_t *cmd;
771 GList *first = NULL;
772
773 if (ra_data->cur_cmd) {
774
775 return TRUE;
776 }
777
778 while (ra_data->cmds) {
779 first = ra_data->cmds;
780 cmd = first->data;
781 if (cmd->delay_id) {
782
783 return TRUE;
784 }
785
786 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
787 g_list_free_1(first);
788
789 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
790 ra_data->migrate_status = 0;
791 if (handle_remote_ra_start(lrm_state, cmd,
792 cmd->timeout) == pcmk_rc_ok) {
793
794 crm_debug("Initiated async remote connection, %s action will complete after connect event",
795 cmd->action);
796 ra_data->cur_cmd = cmd;
797 return TRUE;
798 }
799 report_remote_ra_result(cmd);
800
801 } else if (!strcmp(cmd->action, "monitor")) {
802
803 if (lrm_state_is_connected(lrm_state) == TRUE) {
804 rc = lrm_state_poke_connection(lrm_state);
805 if (rc < 0) {
806 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
807 PCMK_EXEC_ERROR, pcmk_strerror(rc));
808 }
809 } else {
810 rc = -1;
811 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
812 PCMK_EXEC_DONE, "Remote connection inactive");
813 }
814
815 if (rc == 0) {
816 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
817 cmd->rsc_id);
818 ra_data->cur_cmd = cmd;
819 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
820 return TRUE;
821 }
822 report_remote_ra_result(cmd);
823
824 } else if (!strcmp(cmd->action, "stop")) {
825
826 if (ra_data->migrate_status == expect_takeover) {
827
828
829
830
831
832
833 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
834 ra_data->cur_cmd = cmd;
835 return TRUE;
836 }
837
838 handle_remote_ra_stop(lrm_state, cmd);
839
840 } else if (!strcmp(cmd->action, "migrate_to")) {
841 ra_data->migrate_status = expect_takeover;
842 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
843 report_remote_ra_result(cmd);
844 } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
845 CRMD_ACTION_RELOAD_AGENT, NULL)) {
846
847
848
849
850
851
852
853
854
855 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
856 report_remote_ra_result(cmd);
857 }
858
859 free_cmd(cmd);
860 }
861
862 return TRUE;
863 }
864
865 static void
866 remote_ra_data_init(lrm_state_t * lrm_state)
867 {
868 remote_ra_data_t *ra_data = NULL;
869
870 if (lrm_state->remote_ra_data) {
871 return;
872 }
873
874 ra_data = calloc(1, sizeof(remote_ra_data_t));
875 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
876 lrm_state->remote_ra_data = ra_data;
877 }
878
879 void
880 remote_ra_cleanup(lrm_state_t * lrm_state)
881 {
882 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
883
884 if (!ra_data) {
885 return;
886 }
887
888 if (ra_data->cmds) {
889 g_list_free_full(ra_data->cmds, free_cmd);
890 }
891
892 if (ra_data->recurring_cmds) {
893 g_list_free_full(ra_data->recurring_cmds, free_cmd);
894 }
895 mainloop_destroy_trigger(ra_data->work);
896 free(ra_data);
897 lrm_state->remote_ra_data = NULL;
898 }
899
900 gboolean
901 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
902 {
903 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
904 return TRUE;
905 }
906 if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
907 return TRUE;
908 }
909
910 return FALSE;
911 }
912
913 lrmd_rsc_info_t *
914 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
915 {
916 lrmd_rsc_info_t *info = NULL;
917
918 if ((lrm_state_find(rsc_id))) {
919 info = calloc(1, sizeof(lrmd_rsc_info_t));
920
921 info->id = strdup(rsc_id);
922 info->type = strdup(REMOTE_LRMD_RA);
923 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
924 info->provider = strdup("pacemaker");
925 }
926
927 return info;
928 }
929
930 static gboolean
931 is_remote_ra_supported_action(const char *action)
932 {
933 return pcmk__str_any_of(action,
934 CRMD_ACTION_START,
935 CRMD_ACTION_STOP,
936 CRMD_ACTION_STATUS,
937 CRMD_ACTION_MIGRATE,
938 CRMD_ACTION_MIGRATED,
939 CRMD_ACTION_RELOAD_AGENT,
940 CRMD_ACTION_RELOAD,
941 NULL);
942 }
943
944 static GList *
945 fail_all_monitor_cmds(GList * list)
946 {
947 GList *rm_list = NULL;
948 remote_ra_cmd_t *cmd = NULL;
949 GList *gIter = NULL;
950
951 for (gIter = list; gIter != NULL; gIter = gIter->next) {
952 cmd = gIter->data;
953 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
954 rm_list = g_list_append(rm_list, cmd);
955 }
956 }
957
958 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
959 cmd = gIter->data;
960
961 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
962 PCMK_EXEC_ERROR, "Lost connection to remote executor");
963 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
964 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
965 report_remote_ra_result(cmd);
966
967 list = g_list_remove(list, cmd);
968 free_cmd(cmd);
969 }
970
971
972 g_list_free(rm_list);
973 return list;
974 }
975
976 static GList *
977 remove_cmd(GList * list, const char *action, guint interval_ms)
978 {
979 remote_ra_cmd_t *cmd = NULL;
980 GList *gIter = NULL;
981
982 for (gIter = list; gIter != NULL; gIter = gIter->next) {
983 cmd = gIter->data;
984 if ((cmd->interval_ms == interval_ms)
985 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
986 break;
987 }
988 cmd = NULL;
989 }
990 if (cmd) {
991 list = g_list_remove(list, cmd);
992 free_cmd(cmd);
993 }
994 return list;
995 }
996
997 int
998 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
999 const char *action, guint interval_ms)
1000 {
1001 lrm_state_t *connection_rsc = NULL;
1002 remote_ra_data_t *ra_data = NULL;
1003
1004 connection_rsc = lrm_state_find(rsc_id);
1005 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1006 return -EINVAL;
1007 }
1008
1009 ra_data = connection_rsc->remote_ra_data;
1010 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1011 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1012 interval_ms);
1013 if (ra_data->cur_cmd &&
1014 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1015 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1016
1017 ra_data->cur_cmd->cancel = TRUE;
1018 }
1019
1020 return 0;
1021 }
1022
1023 static remote_ra_cmd_t *
1024 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1025 const char *userdata)
1026 {
1027 GList *gIter = NULL;
1028 remote_ra_cmd_t *cmd = NULL;
1029
1030
1031
1032
1033
1034
1035
1036 if (interval_ms == 0) {
1037 return NULL;
1038 }
1039
1040 if (ra_data->cur_cmd &&
1041 ra_data->cur_cmd->cancel == FALSE &&
1042 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1043 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1044
1045 cmd = ra_data->cur_cmd;
1046 goto handle_dup;
1047 }
1048
1049 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1050 cmd = gIter->data;
1051 if ((cmd->interval_ms == interval_ms)
1052 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1053 goto handle_dup;
1054 }
1055 }
1056
1057 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1058 cmd = gIter->data;
1059 if ((cmd->interval_ms == interval_ms)
1060 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1061 goto handle_dup;
1062 }
1063 }
1064
1065 return NULL;
1066
1067 handle_dup:
1068
1069 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1070 cmd->rsc_id, "monitor", interval_ms);
1071
1072
1073 if (userdata) {
1074 free(cmd->userdata);
1075 cmd->userdata = strdup(userdata);
1076 }
1077
1078
1079 if (cmd->reported_success) {
1080 cmd->start_time = time(NULL);
1081 cmd->call_id = generate_callid();
1082 cmd->reported_success = 0;
1083 }
1084
1085
1086
1087
1088 if (cmd->interval_id) {
1089 g_source_remove(cmd->interval_id);
1090 cmd->interval_id = 0;
1091 recurring_helper(cmd);
1092 }
1093
1094 return cmd;
1095 }
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 int
1116 controld_execute_remote_agent(lrm_state_t *lrm_state, const char *rsc_id,
1117 const char *action, const char *userdata,
1118 guint interval_ms, int timeout_ms,
1119 int start_delay_ms, lrmd_key_value_t *params,
1120 int *call_id)
1121 {
1122 lrm_state_t *connection_rsc = NULL;
1123 remote_ra_cmd_t *cmd = NULL;
1124 remote_ra_data_t *ra_data = NULL;
1125
1126 *call_id = 0;
1127
1128 CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1129 && (userdata != NULL) && (call_id != NULL),
1130 lrmd_key_value_freeall(params); return EINVAL);
1131
1132 if (!is_remote_ra_supported_action(action)) {
1133 lrmd_key_value_freeall(params);
1134 return EOPNOTSUPP;
1135 }
1136
1137 connection_rsc = lrm_state_find(rsc_id);
1138 if (connection_rsc == NULL) {
1139 lrmd_key_value_freeall(params);
1140 return ENOTCONN;
1141 }
1142
1143 remote_ra_data_init(connection_rsc);
1144 ra_data = connection_rsc->remote_ra_data;
1145
1146 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1147 if (cmd) {
1148 *call_id = cmd->call_id;
1149 lrmd_key_value_freeall(params);
1150 return pcmk_rc_ok;
1151 }
1152
1153 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1154 if (cmd == NULL) {
1155 lrmd_key_value_freeall(params);
1156 return ENOMEM;
1157 }
1158
1159 cmd->owner = strdup(lrm_state->node_name);
1160 cmd->rsc_id = strdup(rsc_id);
1161 cmd->action = strdup(action);
1162 cmd->userdata = strdup(userdata);
1163 if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
1164 || (cmd->userdata == NULL)) {
1165 free_cmd(cmd);
1166 lrmd_key_value_freeall(params);
1167 return ENOMEM;
1168 }
1169
1170 cmd->interval_ms = interval_ms;
1171 cmd->timeout = timeout_ms;
1172 cmd->start_delay = start_delay_ms;
1173 cmd->params = params;
1174 cmd->start_time = time(NULL);
1175
1176 cmd->call_id = generate_callid();
1177
1178 if (cmd->start_delay) {
1179 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1180 }
1181
1182 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1183 mainloop_set_trigger(ra_data->work);
1184
1185 *call_id = cmd->call_id;
1186 return pcmk_rc_ok;
1187 }
1188
1189
1190
1191
1192
1193
1194
1195 void
1196 remote_ra_fail(const char *node_name)
1197 {
1198 lrm_state_t *lrm_state = lrm_state_find(node_name);
1199
1200 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1201 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1202
1203 crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1204 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1205 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1206 }
1207 }
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1221 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1222 "/" XML_CIB_TAG_NODE
1223
1224
1225
1226
1227
1228
1229
1230 void
1231 remote_ra_process_pseudo(xmlNode *xml)
1232 {
1233 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1234
1235 if (numXpathResults(search) == 1) {
1236 xmlNode *result = getXpathResult(search, 0);
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252 if (result) {
1253 const char *remote = ID(result);
1254
1255 if (remote) {
1256 remote_node_down(remote, DOWN_ERASE_LRM);
1257 }
1258 }
1259 }
1260 freeXpathObject(search);
1261 }
1262
1263 static void
1264 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1265 {
1266 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1267 xmlNode *update, *state;
1268 int call_opt, call_id = 0;
1269 crm_node_t *node;
1270
1271 call_opt = crmd_cib_smart_opt();
1272 node = crm_remote_peer_get(lrm_state->node_name);
1273 CRM_CHECK(node != NULL, return);
1274 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1275 state = create_node_state_update(node, node_update_none, update,
1276 __func__);
1277 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1278 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1279 if (call_id < 0) {
1280 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1281 } else {
1282
1283 ra_data->is_maintenance = maintenance;
1284 }
1285 free_xml(update);
1286 }
1287
1288 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1289 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1290 XML_GRAPH_TAG_MAINTENANCE
1291
1292
1293
1294
1295
1296
1297
1298
1299 void
1300 remote_ra_process_maintenance_nodes(xmlNode *xml)
1301 {
1302 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1303
1304 if (numXpathResults(search) == 1) {
1305 xmlNode *node;
1306 int cnt = 0, cnt_remote = 0;
1307
1308 for (node =
1309 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1310 node != NULL; node = pcmk__xml_next(node)) {
1311 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1312
1313 cnt++;
1314 if (lrm_state && lrm_state->remote_ra_data &&
1315 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1316 int is_maint;
1317
1318 cnt_remote++;
1319 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1320 &is_maint, 0);
1321 remote_ra_maintenance(lrm_state, is_maint);
1322 }
1323 }
1324 crm_trace("Action holds %d nodes (%d remotes found) "
1325 "adjusting maintenance-mode", cnt, cnt_remote);
1326 }
1327 freeXpathObject(search);
1328 }
1329
1330 gboolean
1331 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1332 {
1333 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1334
1335 return ra_data->is_maintenance;
1336 }
1337
1338 gboolean
1339 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1340 {
1341 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1342
1343 return ra_data->controlling_guest;
1344 }