This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- remote_ra_exec
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/services.h>
17
18 #include <pacemaker-controld.h>
19
20 #define REMOTE_LRMD_RA "remote"
21
22
23 #define MAX_START_TIMEOUT_MS 10000
24
25 typedef struct remote_ra_cmd_s {
26
27 char *owner;
28
29 char *rsc_id;
30
31 char *action;
32
33 char *userdata;
34 char *exit_reason;
35
36 int start_delay;
37
38 int delay_id;
39
40 int timeout;
41 int remaining_timeout;
42
43 guint interval_ms;
44
45 int interval_id;
46 int reported_success;
47 int monitor_timeout_id;
48 int takeover_timeout_id;
49
50 lrmd_key_value_t *params;
51
52 int rc;
53 int op_status;
54 int call_id;
55 time_t start_time;
56 gboolean cancel;
57 } remote_ra_cmd_t;
58
59 enum remote_migration_status {
60 expect_takeover = 1,
61 takeover_complete,
62 };
63
64 typedef struct remote_ra_data_s {
65 crm_trigger_t *work;
66 remote_ra_cmd_t *cur_cmd;
67 GList *cmds;
68 GList *recurring_cmds;
69
70 enum remote_migration_status migrate_status;
71
72 gboolean active;
73
74
75
76
77 gboolean is_maintenance;
78
79
80
81
82
83
84
85 gboolean controlling_guest;
86 } remote_ra_data_t;
87
88 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
89 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
90 static GList *fail_all_monitor_cmds(GList * list);
91
92 static void
93 free_cmd(gpointer user_data)
94 {
95 remote_ra_cmd_t *cmd = user_data;
96
97 if (!cmd) {
98 return;
99 }
100 if (cmd->delay_id) {
101 g_source_remove(cmd->delay_id);
102 }
103 if (cmd->interval_id) {
104 g_source_remove(cmd->interval_id);
105 }
106 if (cmd->monitor_timeout_id) {
107 g_source_remove(cmd->monitor_timeout_id);
108 }
109 if (cmd->takeover_timeout_id) {
110 g_source_remove(cmd->takeover_timeout_id);
111 }
112 free(cmd->owner);
113 free(cmd->rsc_id);
114 free(cmd->action);
115 free(cmd->userdata);
116 free(cmd->exit_reason);
117 lrmd_key_value_freeall(cmd->params);
118 free(cmd);
119 }
120
121 static int
122 generate_callid(void)
123 {
124 static int remote_ra_callid = 0;
125
126 remote_ra_callid++;
127 if (remote_ra_callid <= 0) {
128 remote_ra_callid = 1;
129 }
130
131 return remote_ra_callid;
132 }
133
134 static gboolean
135 recurring_helper(gpointer data)
136 {
137 remote_ra_cmd_t *cmd = data;
138 lrm_state_t *connection_rsc = NULL;
139
140 cmd->interval_id = 0;
141 connection_rsc = lrm_state_find(cmd->rsc_id);
142 if (connection_rsc && connection_rsc->remote_ra_data) {
143 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
144
145 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
146
147 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
148 mainloop_set_trigger(ra_data->work);
149 }
150 return FALSE;
151 }
152
153 static gboolean
154 start_delay_helper(gpointer data)
155 {
156 remote_ra_cmd_t *cmd = data;
157 lrm_state_t *connection_rsc = NULL;
158
159 cmd->delay_id = 0;
160 connection_rsc = lrm_state_find(cmd->rsc_id);
161 if (connection_rsc && connection_rsc->remote_ra_data) {
162 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
163
164 mainloop_set_trigger(ra_data->work);
165 }
166 return FALSE;
167 }
168
169
170
171
172
173
174
175 static void
176 remote_node_up(const char *node_name)
177 {
178 int call_opt, call_id = 0;
179 xmlNode *update, *state;
180 crm_node_t *node;
181 enum controld_section_e section = controld_section_all;
182
183 CRM_CHECK(node_name != NULL, return);
184 crm_info("Announcing pacemaker_remote node %s", node_name);
185
186
187
188
189
190
191 call_opt = crmd_cib_smart_opt();
192 if (controld_shutdown_lock_enabled) {
193 section = controld_section_all_unlocked;
194 }
195
196 update_attrd_remote_node_removed(node_name, NULL);
197
198 controld_delete_node_state(node_name, section, call_opt);
199
200
201 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
202
203
204 node = crm_remote_peer_get(node_name);
205 CRM_CHECK(node != NULL, return);
206 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
207
208
209
210
211
212
213
214 send_remote_state_message(node_name, TRUE);
215
216 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
217 state = create_node_state_update(node, node_update_cluster, update,
218 __func__);
219
220
221
222
223
224 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
225
226
227
228
229
230
231
232
233 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
234 if (call_id < 0) {
235 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
236 }
237 free_xml(update);
238 }
239
240 enum down_opts {
241 DOWN_KEEP_LRM,
242 DOWN_ERASE_LRM
243 };
244
245
246
247
248
249
250
251
252 static void
253 remote_node_down(const char *node_name, const enum down_opts opts)
254 {
255 xmlNode *update;
256 int call_id = 0;
257 int call_opt = crmd_cib_smart_opt();
258 crm_node_t *node;
259
260
261 update_attrd_remote_node_removed(node_name, NULL);
262
263
264
265
266
267
268 if (opts == DOWN_ERASE_LRM) {
269 controld_delete_node_state(node_name, controld_section_all, call_opt);
270 } else {
271 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
272 }
273
274
275 node = crm_remote_peer_get(node_name);
276 CRM_CHECK(node != NULL, return);
277 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
278
279
280 send_remote_state_message(node_name, FALSE);
281
282
283 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
284 create_node_state_update(node, node_update_cluster, update, __func__);
285 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
286 if (call_id < 0) {
287 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
288 }
289 free_xml(update);
290 }
291
292
293
294
295
296
297
298 static void
299 check_remote_node_state(remote_ra_cmd_t *cmd)
300 {
301
302 if (cmd->rc != PCMK_OCF_OK) {
303 return;
304 }
305
306 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
307 remote_node_up(cmd->rsc_id);
308
309 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
310
311
312
313
314
315
316
317 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
318
319 CRM_CHECK(node != NULL, return);
320 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
321
322 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
323 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
324 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
325
326 if (ra_data) {
327 if (ra_data->migrate_status != takeover_complete) {
328
329 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
330 } else if (AM_I_DC == FALSE) {
331
332
333
334
335 crm_remote_peer_cache_remove(cmd->rsc_id);
336 }
337 }
338 }
339
340
341
342
343
344
345
346
347
348
349 }
350
351 static void
352 report_remote_ra_result(remote_ra_cmd_t * cmd)
353 {
354 lrmd_event_data_t op = { 0, };
355
356 check_remote_node_state(cmd);
357
358 op.type = lrmd_event_exec_complete;
359 op.rsc_id = cmd->rsc_id;
360 op.op_type = cmd->action;
361 op.user_data = cmd->userdata;
362 op.exit_reason = cmd->exit_reason;
363 op.timeout = cmd->timeout;
364 op.interval_ms = cmd->interval_ms;
365 op.rc = cmd->rc;
366 op.op_status = cmd->op_status;
367 op.t_run = (unsigned int) cmd->start_time;
368 op.t_rcchange = (unsigned int) cmd->start_time;
369 if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
370 op.t_rcchange = (unsigned int) time(NULL);
371
372
373
374
375
376
377
378
379
380 if (op.t_rcchange == op.t_run) {
381 op.t_rcchange++;
382 }
383 }
384
385 if (cmd->params) {
386 lrmd_key_value_t *tmp;
387
388 op.params = pcmk__strkey_table(free, free);
389 for (tmp = cmd->params; tmp; tmp = tmp->next) {
390 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
391 }
392
393 }
394 op.call_id = cmd->call_id;
395 op.remote_nodename = cmd->owner;
396
397 lrm_op_callback(&op);
398
399 if (op.params) {
400 g_hash_table_destroy(op.params);
401 }
402 }
403
404 static void
405 update_remaining_timeout(remote_ra_cmd_t * cmd)
406 {
407 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
408 }
409
410 static gboolean
411 retry_start_cmd_cb(gpointer data)
412 {
413 lrm_state_t *lrm_state = data;
414 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
415 remote_ra_cmd_t *cmd = NULL;
416 int rc = -1;
417
418 if (!ra_data || !ra_data->cur_cmd) {
419 return FALSE;
420 }
421 cmd = ra_data->cur_cmd;
422 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
423 return FALSE;
424 }
425 update_remaining_timeout(cmd);
426
427 if (cmd->remaining_timeout > 0) {
428 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
429 }
430
431 if (rc != 0) {
432 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
433 cmd->op_status = PCMK_LRM_OP_ERROR;
434 report_remote_ra_result(cmd);
435
436 if (ra_data->cmds) {
437 mainloop_set_trigger(ra_data->work);
438 }
439 ra_data->cur_cmd = NULL;
440 free_cmd(cmd);
441 } else {
442
443 }
444
445 return FALSE;
446 }
447
448
449 static gboolean
450 connection_takeover_timeout_cb(gpointer data)
451 {
452 lrm_state_t *lrm_state = NULL;
453 remote_ra_cmd_t *cmd = data;
454
455 crm_info("takeover event timed out for node %s", cmd->rsc_id);
456 cmd->takeover_timeout_id = 0;
457
458 lrm_state = lrm_state_find(cmd->rsc_id);
459
460 handle_remote_ra_stop(lrm_state, cmd);
461 free_cmd(cmd);
462
463 return FALSE;
464 }
465
466 static gboolean
467 monitor_timeout_cb(gpointer data)
468 {
469 lrm_state_t *lrm_state = NULL;
470 remote_ra_cmd_t *cmd = data;
471
472 lrm_state = lrm_state_find(cmd->rsc_id);
473
474 crm_info("Timed out waiting for remote poke response from %s%s",
475 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
476 cmd->monitor_timeout_id = 0;
477 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
478 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
479
480 if (lrm_state && lrm_state->remote_ra_data) {
481 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
482
483 if (ra_data->cur_cmd == cmd) {
484 ra_data->cur_cmd = NULL;
485 }
486 if (ra_data->cmds) {
487 mainloop_set_trigger(ra_data->work);
488 }
489 }
490
491 report_remote_ra_result(cmd);
492 free_cmd(cmd);
493
494 if(lrm_state) {
495 lrm_state_disconnect(lrm_state);
496 }
497 return FALSE;
498 }
499
500 static void
501 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
502 {
503 lrmd_event_data_t op = { 0, };
504
505 if (lrm_state == NULL) {
506
507 lrm_state = lrm_state_find(fsa_our_uname);
508 }
509 CRM_ASSERT(lrm_state != NULL);
510
511 op.type = lrmd_event_exec_complete;
512 op.rsc_id = rsc_id;
513 op.op_type = op_type;
514 op.rc = PCMK_OCF_OK;
515 op.op_status = PCMK_LRM_OP_DONE;
516 op.t_run = (unsigned int) time(NULL);
517 op.t_rcchange = op.t_run;
518 op.call_id = generate_callid();
519 process_lrm_event(lrm_state, &op, NULL, NULL);
520 }
521
522 void
523 remote_lrm_op_callback(lrmd_event_data_t * op)
524 {
525 gboolean cmd_handled = FALSE;
526 lrm_state_t *lrm_state = NULL;
527 remote_ra_data_t *ra_data = NULL;
528 remote_ra_cmd_t *cmd = NULL;
529
530 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
531 "(%d) status=%s (%d)",
532 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
533 lrmd_event_type2str(op->type), op->remote_nodename,
534 services_ocf_exitcode_str(op->rc), op->rc,
535 services_lrm_status_str(op->op_status), op->op_status);
536
537 lrm_state = lrm_state_find(op->remote_nodename);
538 if (!lrm_state || !lrm_state->remote_ra_data) {
539 crm_debug("No state information found for remote connection event");
540 return;
541 }
542 ra_data = lrm_state->remote_ra_data;
543
544 if (op->type == lrmd_event_new_client) {
545
546
547 if (ra_data->migrate_status == expect_takeover) {
548
549 ra_data->migrate_status = takeover_complete;
550
551 } else {
552 crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
553
554
555
556 lrm_state_disconnect_only(lrm_state);
557 }
558 return;
559 }
560
561
562 if (op->type == lrmd_event_exec_complete) {
563 if (ra_data->migrate_status == takeover_complete) {
564 crm_debug("ignoring event, this connection is taken over by another node");
565 } else {
566 lrm_op_callback(op);
567 }
568 return;
569 }
570
571 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
572
573 if (ra_data->active == FALSE) {
574 crm_debug("Disconnection from Pacemaker Remote node %s complete",
575 lrm_state->node_name);
576
577 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
578 crm_err("Lost connection to Pacemaker Remote node %s",
579 lrm_state->node_name);
580 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
581 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
582
583 } else {
584 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
585 lrm_state->node_name);
586
587 handle_remote_ra_stop(lrm_state, NULL);
588 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
589
590 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
591 }
592 return;
593 }
594
595 if (!ra_data->cur_cmd) {
596 crm_debug("no event to match");
597 return;
598 }
599
600 cmd = ra_data->cur_cmd;
601
602
603
604 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
605 "migrate_from", NULL)) {
606 if (op->connection_rc < 0) {
607 update_remaining_timeout(cmd);
608
609 if (op->connection_rc == -ENOKEY) {
610
611 cmd->op_status = PCMK_LRM_OP_ERROR;
612 cmd->rc = PCMK_OCF_INVALID_PARAM;
613 cmd->exit_reason = strdup("Authentication key not readable");
614
615 } else if (cmd->remaining_timeout > 3000) {
616 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
617 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
618 return;
619
620 } else {
621 crm_trace("can't reschedule start, remaining timeout too small %d",
622 cmd->remaining_timeout);
623 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
624 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
625 }
626
627 } else {
628 lrm_state_reset_tables(lrm_state, TRUE);
629 cmd->rc = PCMK_OCF_OK;
630 cmd->op_status = PCMK_LRM_OP_DONE;
631 ra_data->active = TRUE;
632 }
633
634 crm_debug("Remote connection event matched %s action", cmd->action);
635 report_remote_ra_result(cmd);
636 cmd_handled = TRUE;
637
638 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
639
640 if (cmd->monitor_timeout_id) {
641 g_source_remove(cmd->monitor_timeout_id);
642 cmd->monitor_timeout_id = 0;
643 }
644
645
646
647
648 if (!cmd->reported_success) {
649 cmd->rc = PCMK_OCF_OK;
650 cmd->op_status = PCMK_LRM_OP_DONE;
651 report_remote_ra_result(cmd);
652 cmd->reported_success = 1;
653 }
654
655 crm_debug("Remote poke event matched %s action", cmd->action);
656
657
658 if (cmd->interval_ms && (cmd->cancel == FALSE)) {
659 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
660 cmd->interval_id = g_timeout_add(cmd->interval_ms,
661 recurring_helper, cmd);
662 cmd = NULL;
663 }
664 cmd_handled = TRUE;
665
666 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
667 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
668 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
669 cmd->op_status = PCMK_LRM_OP_ERROR;
670 report_remote_ra_result(cmd);
671 crm_err("Remote connection to %s unexpectedly dropped during monitor",
672 lrm_state->node_name);
673 }
674 cmd_handled = TRUE;
675
676 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
677
678 handle_remote_ra_stop(lrm_state, cmd);
679 cmd_handled = TRUE;
680
681 } else {
682 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
683 }
684
685 if (cmd_handled) {
686 ra_data->cur_cmd = NULL;
687 if (ra_data->cmds) {
688 mainloop_set_trigger(ra_data->work);
689 }
690 free_cmd(cmd);
691 }
692 }
693
694 static void
695 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
696 {
697 remote_ra_data_t *ra_data = NULL;
698
699 CRM_ASSERT(lrm_state);
700 ra_data = lrm_state->remote_ra_data;
701
702 if (ra_data->migrate_status != takeover_complete) {
703
704 g_hash_table_remove_all(lrm_state->pending_ops);
705 } else {
706
707
708 lrm_state_reset_tables(lrm_state, FALSE);
709 }
710
711 ra_data->active = FALSE;
712 lrm_state_disconnect(lrm_state);
713
714 if (ra_data->cmds) {
715 g_list_free_full(ra_data->cmds, free_cmd);
716 }
717 if (ra_data->recurring_cmds) {
718 g_list_free_full(ra_data->recurring_cmds, free_cmd);
719 }
720 ra_data->cmds = NULL;
721 ra_data->recurring_cmds = NULL;
722 ra_data->cur_cmd = NULL;
723
724 if (cmd) {
725 cmd->rc = PCMK_OCF_OK;
726 cmd->op_status = PCMK_LRM_OP_DONE;
727
728 report_remote_ra_result(cmd);
729 }
730 }
731
732 static int
733 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
734 {
735 const char *server = NULL;
736 lrmd_key_value_t *tmp = NULL;
737 int port = 0;
738 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
739 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
740
741 for (tmp = cmd->params; tmp; tmp = tmp->next) {
742 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
743 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
744 server = tmp->value;
745 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
746 port = atoi(tmp->value);
747 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
748 ra_data->controlling_guest = TRUE;
749 }
750 }
751
752 return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
753 }
754
755 static gboolean
756 handle_remote_ra_exec(gpointer user_data)
757 {
758 int rc = 0;
759 lrm_state_t *lrm_state = user_data;
760 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
761 remote_ra_cmd_t *cmd;
762 GList *first = NULL;
763
764 if (ra_data->cur_cmd) {
765
766 return TRUE;
767 }
768
769 while (ra_data->cmds) {
770 first = ra_data->cmds;
771 cmd = first->data;
772 if (cmd->delay_id) {
773
774 return TRUE;
775 }
776
777 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
778 g_list_free_1(first);
779
780 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
781 ra_data->migrate_status = 0;
782 rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
783 if (rc == 0) {
784
785 crm_debug("Initiated async remote connection, %s action will complete after connect event",
786 cmd->action);
787 ra_data->cur_cmd = cmd;
788 return TRUE;
789 } else {
790 crm_debug("Could not initiate remote connection for %s action",
791 cmd->action);
792 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
793 cmd->op_status = PCMK_LRM_OP_ERROR;
794 }
795 report_remote_ra_result(cmd);
796
797 } else if (!strcmp(cmd->action, "monitor")) {
798
799 if (lrm_state_is_connected(lrm_state) == TRUE) {
800 rc = lrm_state_poke_connection(lrm_state);
801 if (rc < 0) {
802 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
803 cmd->op_status = PCMK_LRM_OP_ERROR;
804 }
805 } else {
806 rc = -1;
807 cmd->op_status = PCMK_LRM_OP_DONE;
808 cmd->rc = PCMK_OCF_NOT_RUNNING;
809 }
810
811 if (rc == 0) {
812 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
813 cmd->rsc_id);
814 ra_data->cur_cmd = cmd;
815 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
816 return TRUE;
817 }
818 report_remote_ra_result(cmd);
819
820 } else if (!strcmp(cmd->action, "stop")) {
821
822 if (ra_data->migrate_status == expect_takeover) {
823
824
825
826
827
828
829 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
830 ra_data->cur_cmd = cmd;
831 return TRUE;
832 }
833
834 handle_remote_ra_stop(lrm_state, cmd);
835
836 } else if (!strcmp(cmd->action, "migrate_to")) {
837 ra_data->migrate_status = expect_takeover;
838 cmd->rc = PCMK_OCF_OK;
839 cmd->op_status = PCMK_LRM_OP_DONE;
840 report_remote_ra_result(cmd);
841 } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
842 CRMD_ACTION_RELOAD_AGENT, NULL)) {
843
844
845
846
847
848
849
850
851
852 cmd->rc = PCMK_OCF_OK;
853 cmd->op_status = PCMK_LRM_OP_DONE;
854 report_remote_ra_result(cmd);
855 }
856
857 free_cmd(cmd);
858 }
859
860 return TRUE;
861 }
862
863 static void
864 remote_ra_data_init(lrm_state_t * lrm_state)
865 {
866 remote_ra_data_t *ra_data = NULL;
867
868 if (lrm_state->remote_ra_data) {
869 return;
870 }
871
872 ra_data = calloc(1, sizeof(remote_ra_data_t));
873 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
874 lrm_state->remote_ra_data = ra_data;
875 }
876
877 void
878 remote_ra_cleanup(lrm_state_t * lrm_state)
879 {
880 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
881
882 if (!ra_data) {
883 return;
884 }
885
886 if (ra_data->cmds) {
887 g_list_free_full(ra_data->cmds, free_cmd);
888 }
889
890 if (ra_data->recurring_cmds) {
891 g_list_free_full(ra_data->recurring_cmds, free_cmd);
892 }
893 mainloop_destroy_trigger(ra_data->work);
894 free(ra_data);
895 lrm_state->remote_ra_data = NULL;
896 }
897
898 gboolean
899 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
900 {
901 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
902 return TRUE;
903 }
904 if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
905 return TRUE;
906 }
907
908 return FALSE;
909 }
910
911 lrmd_rsc_info_t *
912 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
913 {
914 lrmd_rsc_info_t *info = NULL;
915
916 if ((lrm_state_find(rsc_id))) {
917 info = calloc(1, sizeof(lrmd_rsc_info_t));
918
919 info->id = strdup(rsc_id);
920 info->type = strdup(REMOTE_LRMD_RA);
921 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
922 info->provider = strdup("pacemaker");
923 }
924
925 return info;
926 }
927
928 static gboolean
929 is_remote_ra_supported_action(const char *action)
930 {
931 return pcmk__str_any_of(action,
932 CRMD_ACTION_START,
933 CRMD_ACTION_STOP,
934 CRMD_ACTION_STATUS,
935 CRMD_ACTION_MIGRATE,
936 CRMD_ACTION_MIGRATED,
937 CRMD_ACTION_RELOAD_AGENT,
938 CRMD_ACTION_RELOAD,
939 NULL);
940 }
941
942 static GList *
943 fail_all_monitor_cmds(GList * list)
944 {
945 GList *rm_list = NULL;
946 remote_ra_cmd_t *cmd = NULL;
947 GList *gIter = NULL;
948
949 for (gIter = list; gIter != NULL; gIter = gIter->next) {
950 cmd = gIter->data;
951 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
952 rm_list = g_list_append(rm_list, cmd);
953 }
954 }
955
956 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
957 cmd = gIter->data;
958
959 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
960 cmd->op_status = PCMK_LRM_OP_ERROR;
961 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
962 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
963 report_remote_ra_result(cmd);
964
965 list = g_list_remove(list, cmd);
966 free_cmd(cmd);
967 }
968
969
970 g_list_free(rm_list);
971 return list;
972 }
973
974 static GList *
975 remove_cmd(GList * list, const char *action, guint interval_ms)
976 {
977 remote_ra_cmd_t *cmd = NULL;
978 GList *gIter = NULL;
979
980 for (gIter = list; gIter != NULL; gIter = gIter->next) {
981 cmd = gIter->data;
982 if ((cmd->interval_ms == interval_ms)
983 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
984 break;
985 }
986 cmd = NULL;
987 }
988 if (cmd) {
989 list = g_list_remove(list, cmd);
990 free_cmd(cmd);
991 }
992 return list;
993 }
994
995 int
996 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
997 const char *action, guint interval_ms)
998 {
999 lrm_state_t *connection_rsc = NULL;
1000 remote_ra_data_t *ra_data = NULL;
1001
1002 connection_rsc = lrm_state_find(rsc_id);
1003 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1004 return -EINVAL;
1005 }
1006
1007 ra_data = connection_rsc->remote_ra_data;
1008 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1009 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1010 interval_ms);
1011 if (ra_data->cur_cmd &&
1012 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1013 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1014
1015 ra_data->cur_cmd->cancel = TRUE;
1016 }
1017
1018 return 0;
1019 }
1020
1021 static remote_ra_cmd_t *
1022 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1023 const char *userdata)
1024 {
1025 GList *gIter = NULL;
1026 remote_ra_cmd_t *cmd = NULL;
1027
1028
1029
1030
1031
1032
1033
1034 if (interval_ms == 0) {
1035 return NULL;
1036 }
1037
1038 if (ra_data->cur_cmd &&
1039 ra_data->cur_cmd->cancel == FALSE &&
1040 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1041 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1042
1043 cmd = ra_data->cur_cmd;
1044 goto handle_dup;
1045 }
1046
1047 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1048 cmd = gIter->data;
1049 if ((cmd->interval_ms == interval_ms)
1050 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1051 goto handle_dup;
1052 }
1053 }
1054
1055 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1056 cmd = gIter->data;
1057 if ((cmd->interval_ms == interval_ms)
1058 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1059 goto handle_dup;
1060 }
1061 }
1062
1063 return NULL;
1064
1065 handle_dup:
1066
1067 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1068 cmd->rsc_id, "monitor", interval_ms);
1069
1070
1071 if (userdata) {
1072 free(cmd->userdata);
1073 cmd->userdata = strdup(userdata);
1074 }
1075
1076
1077 if (cmd->reported_success) {
1078 cmd->start_time = time(NULL);
1079 cmd->call_id = generate_callid();
1080 cmd->reported_success = 0;
1081 }
1082
1083
1084
1085
1086 if (cmd->interval_id) {
1087 g_source_remove(cmd->interval_id);
1088 cmd->interval_id = 0;
1089 recurring_helper(cmd);
1090 }
1091
1092 return cmd;
1093 }
1094
1095 int
1096 remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
1097 const char *userdata, guint interval_ms,
1098 int timeout,
1099 int start_delay,
1100 lrmd_key_value_t * params)
1101 {
1102 int rc = 0;
1103 lrm_state_t *connection_rsc = NULL;
1104 remote_ra_cmd_t *cmd = NULL;
1105 remote_ra_data_t *ra_data = NULL;
1106
1107 if (is_remote_ra_supported_action(action) == FALSE) {
1108 rc = -EINVAL;
1109 goto exec_done;
1110 }
1111
1112 connection_rsc = lrm_state_find(rsc_id);
1113 if (!connection_rsc) {
1114 rc = -EINVAL;
1115 goto exec_done;
1116 }
1117
1118 remote_ra_data_init(connection_rsc);
1119 ra_data = connection_rsc->remote_ra_data;
1120
1121 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1122 if (cmd) {
1123 rc = cmd->call_id;
1124 goto exec_done;
1125 }
1126
1127 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1128 cmd->owner = strdup(lrm_state->node_name);
1129 cmd->rsc_id = strdup(rsc_id);
1130 cmd->action = strdup(action);
1131 cmd->userdata = strdup(userdata);
1132 cmd->interval_ms = interval_ms;
1133 cmd->timeout = timeout;
1134 cmd->start_delay = start_delay;
1135 cmd->params = params;
1136 cmd->start_time = time(NULL);
1137
1138 cmd->call_id = generate_callid();
1139
1140 if (cmd->start_delay) {
1141 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1142 }
1143
1144 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1145 mainloop_set_trigger(ra_data->work);
1146
1147 return cmd->call_id;
1148 exec_done:
1149
1150 lrmd_key_value_freeall(params);
1151 return rc;
1152 }
1153
1154
1155
1156
1157
1158
1159
1160 void
1161 remote_ra_fail(const char *node_name)
1162 {
1163 lrm_state_t *lrm_state = lrm_state_find(node_name);
1164
1165 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1166 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1167
1168 crm_info("Failing monitors on pacemaker_remote node %s", node_name);
1169 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1170 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1171 }
1172 }
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1186 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1187 "/" XML_CIB_TAG_NODE
1188
1189
1190
1191
1192
1193
1194
1195 void
1196 remote_ra_process_pseudo(xmlNode *xml)
1197 {
1198 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1199
1200 if (numXpathResults(search) == 1) {
1201 xmlNode *result = getXpathResult(search, 0);
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 if (result) {
1218 const char *remote = ID(result);
1219
1220 if (remote) {
1221 remote_node_down(remote, DOWN_ERASE_LRM);
1222 }
1223 }
1224 }
1225 freeXpathObject(search);
1226 }
1227
1228 static void
1229 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1230 {
1231 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1232 xmlNode *update, *state;
1233 int call_opt, call_id = 0;
1234 crm_node_t *node;
1235
1236 call_opt = crmd_cib_smart_opt();
1237 node = crm_remote_peer_get(lrm_state->node_name);
1238 CRM_CHECK(node != NULL, return);
1239 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1240 state = create_node_state_update(node, node_update_none, update,
1241 __func__);
1242 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1243 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1244 if (call_id < 0) {
1245 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1246 } else {
1247
1248 ra_data->is_maintenance = maintenance;
1249 }
1250 free_xml(update);
1251 }
1252
1253 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1254 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1255 XML_GRAPH_TAG_MAINTENANCE
1256
1257
1258
1259
1260
1261
1262
1263
1264 void
1265 remote_ra_process_maintenance_nodes(xmlNode *xml)
1266 {
1267 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1268
1269 if (numXpathResults(search) == 1) {
1270 xmlNode *node;
1271 int cnt = 0, cnt_remote = 0;
1272
1273 for (node =
1274 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1275 node != NULL; node = pcmk__xml_next(node)) {
1276 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1277
1278 cnt++;
1279 if (lrm_state && lrm_state->remote_ra_data &&
1280 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1281 int is_maint;
1282
1283 cnt_remote++;
1284 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1285 &is_maint, 0);
1286 remote_ra_maintenance(lrm_state, is_maint);
1287 }
1288 }
1289 crm_trace("Action holds %d nodes (%d remotes found) "
1290 "adjusting maintenance-mode", cnt, cnt_remote);
1291 }
1292 freeXpathObject(search);
1293 }
1294
1295 gboolean
1296 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1297 {
1298 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1299
1300 return ra_data->is_maintenance;
1301 }
1302
1303 gboolean
1304 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1305 {
1306 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1307
1308 return ra_data->controlling_guest;
1309 }