This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- remote_ra_exec
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/services.h>
17
18 #include <pacemaker-controld.h>
19
20 #define REMOTE_LRMD_RA "remote"
21
22
23 #define MAX_START_TIMEOUT_MS 10000
24
25 typedef struct remote_ra_cmd_s {
26
27 char *owner;
28
29 char *rsc_id;
30
31 char *action;
32
33 char *userdata;
34 char *exit_reason;
35
36 int start_delay;
37
38 int delay_id;
39
40 int timeout;
41 int remaining_timeout;
42
43 guint interval_ms;
44
45 int interval_id;
46 int reported_success;
47 int monitor_timeout_id;
48 int takeover_timeout_id;
49
50 lrmd_key_value_t *params;
51
52 int rc;
53 int op_status;
54 int call_id;
55 time_t start_time;
56 gboolean cancel;
57 } remote_ra_cmd_t;
58
59 enum remote_migration_status {
60 expect_takeover = 1,
61 takeover_complete,
62 };
63
64 typedef struct remote_ra_data_s {
65 crm_trigger_t *work;
66 remote_ra_cmd_t *cur_cmd;
67 GList *cmds;
68 GList *recurring_cmds;
69
70 enum remote_migration_status migrate_status;
71
72 gboolean active;
73
74
75
76
77 gboolean is_maintenance;
78
79
80
81
82
83
84
85 gboolean controlling_guest;
86 } remote_ra_data_t;
87
88 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
89 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
90 static GList *fail_all_monitor_cmds(GList * list);
91
92 static void
93 free_cmd(gpointer user_data)
94 {
95 remote_ra_cmd_t *cmd = user_data;
96
97 if (!cmd) {
98 return;
99 }
100 if (cmd->delay_id) {
101 g_source_remove(cmd->delay_id);
102 }
103 if (cmd->interval_id) {
104 g_source_remove(cmd->interval_id);
105 }
106 if (cmd->monitor_timeout_id) {
107 g_source_remove(cmd->monitor_timeout_id);
108 }
109 if (cmd->takeover_timeout_id) {
110 g_source_remove(cmd->takeover_timeout_id);
111 }
112 free(cmd->owner);
113 free(cmd->rsc_id);
114 free(cmd->action);
115 free(cmd->userdata);
116 free(cmd->exit_reason);
117 lrmd_key_value_freeall(cmd->params);
118 free(cmd);
119 }
120
121 static int
122 generate_callid(void)
123 {
124 static int remote_ra_callid = 0;
125
126 remote_ra_callid++;
127 if (remote_ra_callid <= 0) {
128 remote_ra_callid = 1;
129 }
130
131 return remote_ra_callid;
132 }
133
134 static gboolean
135 recurring_helper(gpointer data)
136 {
137 remote_ra_cmd_t *cmd = data;
138 lrm_state_t *connection_rsc = NULL;
139
140 cmd->interval_id = 0;
141 connection_rsc = lrm_state_find(cmd->rsc_id);
142 if (connection_rsc && connection_rsc->remote_ra_data) {
143 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
144
145 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
146
147 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
148 mainloop_set_trigger(ra_data->work);
149 }
150 return FALSE;
151 }
152
153 static gboolean
154 start_delay_helper(gpointer data)
155 {
156 remote_ra_cmd_t *cmd = data;
157 lrm_state_t *connection_rsc = NULL;
158
159 cmd->delay_id = 0;
160 connection_rsc = lrm_state_find(cmd->rsc_id);
161 if (connection_rsc && connection_rsc->remote_ra_data) {
162 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
163
164 mainloop_set_trigger(ra_data->work);
165 }
166 return FALSE;
167 }
168
169
170
171
172
173
174
175 static void
176 remote_node_up(const char *node_name)
177 {
178 int call_opt, call_id = 0;
179 xmlNode *update, *state;
180 crm_node_t *node;
181 enum controld_section_e section = controld_section_all;
182
183 CRM_CHECK(node_name != NULL, return);
184 crm_info("Announcing pacemaker_remote node %s", node_name);
185
186
187
188
189
190
191 call_opt = crmd_cib_smart_opt();
192 if (controld_shutdown_lock_enabled) {
193 section = controld_section_all_unlocked;
194 }
195 controld_delete_node_state(node_name, section, call_opt);
196
197
198 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
199
200
201 node = crm_remote_peer_get(node_name);
202 CRM_CHECK(node != NULL, return);
203 crm_update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
204
205
206
207
208
209
210
211 send_remote_state_message(node_name, TRUE);
212
213 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
214 state = create_node_state_update(node, node_update_cluster, update,
215 __func__);
216
217
218
219
220
221 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
222
223
224
225
226
227
228
229
230 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
231 if (call_id < 0) {
232 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
233 }
234 free_xml(update);
235 }
236
237 enum down_opts {
238 DOWN_KEEP_LRM,
239 DOWN_ERASE_LRM
240 };
241
242
243
244
245
246
247
248
249 static void
250 remote_node_down(const char *node_name, const enum down_opts opts)
251 {
252 xmlNode *update;
253 int call_id = 0;
254 int call_opt = crmd_cib_smart_opt();
255 crm_node_t *node;
256
257
258 update_attrd_remote_node_removed(node_name, NULL);
259
260
261
262
263
264
265 if (opts == DOWN_ERASE_LRM) {
266 controld_delete_node_state(node_name, controld_section_all, call_opt);
267 } else {
268 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
269 }
270
271
272 node = crm_remote_peer_get(node_name);
273 CRM_CHECK(node != NULL, return);
274 crm_update_peer_state(__func__, node, CRM_NODE_LOST, 0);
275
276
277 send_remote_state_message(node_name, FALSE);
278
279
280 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
281 create_node_state_update(node, node_update_cluster, update, __func__);
282 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
283 if (call_id < 0) {
284 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
285 }
286 free_xml(update);
287 }
288
289
290
291
292
293
294
295 static void
296 check_remote_node_state(remote_ra_cmd_t *cmd)
297 {
298
299 if (cmd->rc != PCMK_OCF_OK) {
300 return;
301 }
302
303 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
304 remote_node_up(cmd->rsc_id);
305
306 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
307
308
309
310
311
312
313
314 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
315
316 CRM_CHECK(node != NULL, return);
317 crm_update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
318
319 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
320 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
321 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
322
323 if (ra_data) {
324 if (ra_data->migrate_status != takeover_complete) {
325
326 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
327 } else if (AM_I_DC == FALSE) {
328
329
330
331
332 crm_remote_peer_cache_remove(cmd->rsc_id);
333 }
334 }
335 }
336
337
338
339
340
341
342
343
344
345
346 }
347
348 static void
349 report_remote_ra_result(remote_ra_cmd_t * cmd)
350 {
351 lrmd_event_data_t op = { 0, };
352
353 check_remote_node_state(cmd);
354
355 op.type = lrmd_event_exec_complete;
356 op.rsc_id = cmd->rsc_id;
357 op.op_type = cmd->action;
358 op.user_data = cmd->userdata;
359 op.exit_reason = cmd->exit_reason;
360 op.timeout = cmd->timeout;
361 op.interval_ms = cmd->interval_ms;
362 op.rc = cmd->rc;
363 op.op_status = cmd->op_status;
364 op.t_run = (unsigned int) cmd->start_time;
365 op.t_rcchange = (unsigned int) cmd->start_time;
366 if (cmd->reported_success && cmd->rc != PCMK_OCF_OK) {
367 op.t_rcchange = (unsigned int) time(NULL);
368
369
370
371
372
373
374
375
376
377 if (op.t_rcchange == op.t_run) {
378 op.t_rcchange++;
379 }
380 }
381
382 if (cmd->params) {
383 lrmd_key_value_t *tmp;
384
385 op.params = crm_str_table_new();
386 for (tmp = cmd->params; tmp; tmp = tmp->next) {
387 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
388 }
389
390 }
391 op.call_id = cmd->call_id;
392 op.remote_nodename = cmd->owner;
393
394 lrm_op_callback(&op);
395
396 if (op.params) {
397 g_hash_table_destroy(op.params);
398 }
399 }
400
401 static void
402 update_remaining_timeout(remote_ra_cmd_t * cmd)
403 {
404 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
405 }
406
407 static gboolean
408 retry_start_cmd_cb(gpointer data)
409 {
410 lrm_state_t *lrm_state = data;
411 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
412 remote_ra_cmd_t *cmd = NULL;
413 int rc = -1;
414
415 if (!ra_data || !ra_data->cur_cmd) {
416 return FALSE;
417 }
418 cmd = ra_data->cur_cmd;
419 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
420 return FALSE;
421 }
422 update_remaining_timeout(cmd);
423
424 if (cmd->remaining_timeout > 0) {
425 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
426 }
427
428 if (rc != 0) {
429 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
430 cmd->op_status = PCMK_LRM_OP_ERROR;
431 report_remote_ra_result(cmd);
432
433 if (ra_data->cmds) {
434 mainloop_set_trigger(ra_data->work);
435 }
436 ra_data->cur_cmd = NULL;
437 free_cmd(cmd);
438 } else {
439
440 }
441
442 return FALSE;
443 }
444
445
446 static gboolean
447 connection_takeover_timeout_cb(gpointer data)
448 {
449 lrm_state_t *lrm_state = NULL;
450 remote_ra_cmd_t *cmd = data;
451
452 crm_info("takeover event timed out for node %s", cmd->rsc_id);
453 cmd->takeover_timeout_id = 0;
454
455 lrm_state = lrm_state_find(cmd->rsc_id);
456
457 handle_remote_ra_stop(lrm_state, cmd);
458 free_cmd(cmd);
459
460 return FALSE;
461 }
462
463 static gboolean
464 monitor_timeout_cb(gpointer data)
465 {
466 lrm_state_t *lrm_state = NULL;
467 remote_ra_cmd_t *cmd = data;
468
469 lrm_state = lrm_state_find(cmd->rsc_id);
470
471 crm_info("Timed out waiting for remote poke response from %s%s",
472 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
473 cmd->monitor_timeout_id = 0;
474 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
475 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
476
477 if (lrm_state && lrm_state->remote_ra_data) {
478 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
479
480 if (ra_data->cur_cmd == cmd) {
481 ra_data->cur_cmd = NULL;
482 }
483 if (ra_data->cmds) {
484 mainloop_set_trigger(ra_data->work);
485 }
486 }
487
488 report_remote_ra_result(cmd);
489 free_cmd(cmd);
490
491 if(lrm_state) {
492 lrm_state_disconnect(lrm_state);
493 }
494 return FALSE;
495 }
496
497 static void
498 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
499 {
500 lrmd_event_data_t op = { 0, };
501
502 if (lrm_state == NULL) {
503
504 lrm_state = lrm_state_find(fsa_our_uname);
505 }
506 CRM_ASSERT(lrm_state != NULL);
507
508 op.type = lrmd_event_exec_complete;
509 op.rsc_id = rsc_id;
510 op.op_type = op_type;
511 op.rc = PCMK_OCF_OK;
512 op.op_status = PCMK_LRM_OP_DONE;
513 op.t_run = (unsigned int) time(NULL);
514 op.t_rcchange = op.t_run;
515 op.call_id = generate_callid();
516 process_lrm_event(lrm_state, &op, NULL, NULL);
517 }
518
519 void
520 remote_lrm_op_callback(lrmd_event_data_t * op)
521 {
522 gboolean cmd_handled = FALSE;
523 lrm_state_t *lrm_state = NULL;
524 remote_ra_data_t *ra_data = NULL;
525 remote_ra_cmd_t *cmd = NULL;
526
527 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
528 "(%d) status=%s (%d)",
529 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
530 lrmd_event_type2str(op->type), op->remote_nodename,
531 services_ocf_exitcode_str(op->rc), op->rc,
532 services_lrm_status_str(op->op_status), op->op_status);
533
534 lrm_state = lrm_state_find(op->remote_nodename);
535 if (!lrm_state || !lrm_state->remote_ra_data) {
536 crm_debug("No state information found for remote connection event");
537 return;
538 }
539 ra_data = lrm_state->remote_ra_data;
540
541 if (op->type == lrmd_event_new_client) {
542
543
544 if (ra_data->migrate_status == expect_takeover) {
545
546 ra_data->migrate_status = takeover_complete;
547
548 } else {
549 crm_err("Unexpected pacemaker_remote client takeover for %s. Disconnecting", op->remote_nodename);
550
551
552
553 lrm_state_disconnect_only(lrm_state);
554 }
555 return;
556 }
557
558
559 if (op->type == lrmd_event_exec_complete) {
560 if (ra_data->migrate_status == takeover_complete) {
561 crm_debug("ignoring event, this connection is taken over by another node");
562 } else {
563 lrm_op_callback(op);
564 }
565 return;
566 }
567
568 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
569
570 if (ra_data->active == FALSE) {
571 crm_debug("Disconnection from Pacemaker Remote node %s complete",
572 lrm_state->node_name);
573
574 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
575 crm_err("Lost connection to Pacemaker Remote node %s",
576 lrm_state->node_name);
577 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
578 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
579
580 } else {
581 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
582 lrm_state->node_name);
583
584 handle_remote_ra_stop(lrm_state, NULL);
585 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
586
587 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
588 }
589 return;
590 }
591
592 if (!ra_data->cur_cmd) {
593 crm_debug("no event to match");
594 return;
595 }
596
597 cmd = ra_data->cur_cmd;
598
599
600
601 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
602 "migrate_from", NULL)) {
603 if (op->connection_rc < 0) {
604 update_remaining_timeout(cmd);
605
606 if (op->connection_rc == -ENOKEY) {
607
608 cmd->op_status = PCMK_LRM_OP_ERROR;
609 cmd->rc = PCMK_OCF_INVALID_PARAM;
610 cmd->exit_reason = strdup("Authentication key not readable");
611
612 } else if (cmd->remaining_timeout > 3000) {
613 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
614 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
615 return;
616
617 } else {
618 crm_trace("can't reschedule start, remaining timeout too small %d",
619 cmd->remaining_timeout);
620 cmd->op_status = PCMK_LRM_OP_TIMEOUT;
621 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
622 }
623
624 } else {
625 lrm_state_reset_tables(lrm_state, TRUE);
626 cmd->rc = PCMK_OCF_OK;
627 cmd->op_status = PCMK_LRM_OP_DONE;
628 ra_data->active = TRUE;
629 }
630
631 crm_debug("Remote connection event matched %s action", cmd->action);
632 report_remote_ra_result(cmd);
633 cmd_handled = TRUE;
634
635 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
636
637 if (cmd->monitor_timeout_id) {
638 g_source_remove(cmd->monitor_timeout_id);
639 cmd->monitor_timeout_id = 0;
640 }
641
642
643
644
645 if (!cmd->reported_success) {
646 cmd->rc = PCMK_OCF_OK;
647 cmd->op_status = PCMK_LRM_OP_DONE;
648 report_remote_ra_result(cmd);
649 cmd->reported_success = 1;
650 }
651
652 crm_debug("Remote poke event matched %s action", cmd->action);
653
654
655 if (cmd->interval_ms && (cmd->cancel == FALSE)) {
656 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
657 cmd->interval_id = g_timeout_add(cmd->interval_ms,
658 recurring_helper, cmd);
659 cmd = NULL;
660 }
661 cmd_handled = TRUE;
662
663 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
664 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
665 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
666 cmd->op_status = PCMK_LRM_OP_ERROR;
667 report_remote_ra_result(cmd);
668 crm_err("Remote connection to %s unexpectedly dropped during monitor",
669 lrm_state->node_name);
670 }
671 cmd_handled = TRUE;
672
673 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
674
675 handle_remote_ra_stop(lrm_state, cmd);
676 cmd_handled = TRUE;
677
678 } else {
679 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
680 }
681
682 if (cmd_handled) {
683 ra_data->cur_cmd = NULL;
684 if (ra_data->cmds) {
685 mainloop_set_trigger(ra_data->work);
686 }
687 free_cmd(cmd);
688 }
689 }
690
691 static void
692 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
693 {
694 remote_ra_data_t *ra_data = NULL;
695
696 CRM_ASSERT(lrm_state);
697 ra_data = lrm_state->remote_ra_data;
698
699 if (ra_data->migrate_status != takeover_complete) {
700
701 g_hash_table_remove_all(lrm_state->pending_ops);
702 } else {
703
704
705 lrm_state_reset_tables(lrm_state, FALSE);
706 }
707
708 ra_data->active = FALSE;
709 lrm_state_disconnect(lrm_state);
710
711 if (ra_data->cmds) {
712 g_list_free_full(ra_data->cmds, free_cmd);
713 }
714 if (ra_data->recurring_cmds) {
715 g_list_free_full(ra_data->recurring_cmds, free_cmd);
716 }
717 ra_data->cmds = NULL;
718 ra_data->recurring_cmds = NULL;
719 ra_data->cur_cmd = NULL;
720
721 if (cmd) {
722 cmd->rc = PCMK_OCF_OK;
723 cmd->op_status = PCMK_LRM_OP_DONE;
724
725 report_remote_ra_result(cmd);
726 }
727 }
728
729 static int
730 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
731 {
732 const char *server = NULL;
733 lrmd_key_value_t *tmp = NULL;
734 int port = 0;
735 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
736 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
737
738 for (tmp = cmd->params; tmp; tmp = tmp->next) {
739 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
740 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
741 server = tmp->value;
742 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
743 port = atoi(tmp->value);
744 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
745 ra_data->controlling_guest = TRUE;
746 }
747 }
748
749 return lrm_state_remote_connect_async(lrm_state, server, port, timeout_used);
750 }
751
752 static gboolean
753 handle_remote_ra_exec(gpointer user_data)
754 {
755 int rc = 0;
756 lrm_state_t *lrm_state = user_data;
757 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
758 remote_ra_cmd_t *cmd;
759 GList *first = NULL;
760
761 if (ra_data->cur_cmd) {
762
763 return TRUE;
764 }
765
766 while (ra_data->cmds) {
767 first = ra_data->cmds;
768 cmd = first->data;
769 if (cmd->delay_id) {
770
771 return TRUE;
772 }
773
774 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
775 g_list_free_1(first);
776
777 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
778 ra_data->migrate_status = 0;
779 rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
780 if (rc == 0) {
781
782 crm_debug("Initiated async remote connection, %s action will complete after connect event",
783 cmd->action);
784 ra_data->cur_cmd = cmd;
785 return TRUE;
786 } else {
787 crm_debug("Could not initiate remote connection for %s action",
788 cmd->action);
789 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
790 cmd->op_status = PCMK_LRM_OP_ERROR;
791 }
792 report_remote_ra_result(cmd);
793
794 } else if (!strcmp(cmd->action, "monitor")) {
795
796 if (lrm_state_is_connected(lrm_state) == TRUE) {
797 rc = lrm_state_poke_connection(lrm_state);
798 if (rc < 0) {
799 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
800 cmd->op_status = PCMK_LRM_OP_ERROR;
801 }
802 } else {
803 rc = -1;
804 cmd->op_status = PCMK_LRM_OP_DONE;
805 cmd->rc = PCMK_OCF_NOT_RUNNING;
806 }
807
808 if (rc == 0) {
809 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
810 cmd->rsc_id);
811 ra_data->cur_cmd = cmd;
812 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
813 return TRUE;
814 }
815 report_remote_ra_result(cmd);
816
817 } else if (!strcmp(cmd->action, "stop")) {
818
819 if (ra_data->migrate_status == expect_takeover) {
820
821
822
823
824
825
826 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
827 ra_data->cur_cmd = cmd;
828 return TRUE;
829 }
830
831 handle_remote_ra_stop(lrm_state, cmd);
832
833 } else if (!strcmp(cmd->action, "migrate_to")) {
834 ra_data->migrate_status = expect_takeover;
835 cmd->rc = PCMK_OCF_OK;
836 cmd->op_status = PCMK_LRM_OP_DONE;
837 report_remote_ra_result(cmd);
838 } else if (!strcmp(cmd->action, "reload")) {
839
840 cmd->rc = PCMK_OCF_OK;
841 cmd->op_status = PCMK_LRM_OP_DONE;
842 report_remote_ra_result(cmd);
843 }
844
845 free_cmd(cmd);
846 }
847
848 return TRUE;
849 }
850
851 static void
852 remote_ra_data_init(lrm_state_t * lrm_state)
853 {
854 remote_ra_data_t *ra_data = NULL;
855
856 if (lrm_state->remote_ra_data) {
857 return;
858 }
859
860 ra_data = calloc(1, sizeof(remote_ra_data_t));
861 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
862 lrm_state->remote_ra_data = ra_data;
863 }
864
865 void
866 remote_ra_cleanup(lrm_state_t * lrm_state)
867 {
868 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
869
870 if (!ra_data) {
871 return;
872 }
873
874 if (ra_data->cmds) {
875 g_list_free_full(ra_data->cmds, free_cmd);
876 }
877
878 if (ra_data->recurring_cmds) {
879 g_list_free_full(ra_data->recurring_cmds, free_cmd);
880 }
881 mainloop_destroy_trigger(ra_data->work);
882 free(ra_data);
883 lrm_state->remote_ra_data = NULL;
884 }
885
886 gboolean
887 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
888 {
889 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
890 return TRUE;
891 }
892 if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
893 return TRUE;
894 }
895
896 return FALSE;
897 }
898
899 lrmd_rsc_info_t *
900 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
901 {
902 lrmd_rsc_info_t *info = NULL;
903
904 if ((lrm_state_find(rsc_id))) {
905 info = calloc(1, sizeof(lrmd_rsc_info_t));
906
907 info->id = strdup(rsc_id);
908 info->type = strdup(REMOTE_LRMD_RA);
909 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
910 info->provider = strdup("pacemaker");
911 }
912
913 return info;
914 }
915
916 static gboolean
917 is_remote_ra_supported_action(const char *action)
918 {
919 if (!action) {
920 return FALSE;
921 } else if (strcmp(action, "start") &&
922 strcmp(action, "stop") &&
923 strcmp(action, "reload") &&
924 strcmp(action, "migrate_to") &&
925 strcmp(action, "migrate_from") && strcmp(action, "monitor")) {
926 return FALSE;
927 }
928
929 return TRUE;
930 }
931
932 static GList *
933 fail_all_monitor_cmds(GList * list)
934 {
935 GList *rm_list = NULL;
936 remote_ra_cmd_t *cmd = NULL;
937 GListPtr gIter = NULL;
938
939 for (gIter = list; gIter != NULL; gIter = gIter->next) {
940 cmd = gIter->data;
941 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
942 rm_list = g_list_append(rm_list, cmd);
943 }
944 }
945
946 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
947 cmd = gIter->data;
948
949 cmd->rc = PCMK_OCF_UNKNOWN_ERROR;
950 cmd->op_status = PCMK_LRM_OP_ERROR;
951 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
952 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
953 report_remote_ra_result(cmd);
954
955 list = g_list_remove(list, cmd);
956 free_cmd(cmd);
957 }
958
959
960 g_list_free(rm_list);
961 return list;
962 }
963
964 static GList *
965 remove_cmd(GList * list, const char *action, guint interval_ms)
966 {
967 remote_ra_cmd_t *cmd = NULL;
968 GListPtr gIter = NULL;
969
970 for (gIter = list; gIter != NULL; gIter = gIter->next) {
971 cmd = gIter->data;
972 if ((cmd->interval_ms == interval_ms)
973 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
974 break;
975 }
976 cmd = NULL;
977 }
978 if (cmd) {
979 list = g_list_remove(list, cmd);
980 free_cmd(cmd);
981 }
982 return list;
983 }
984
985 int
986 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
987 const char *action, guint interval_ms)
988 {
989 lrm_state_t *connection_rsc = NULL;
990 remote_ra_data_t *ra_data = NULL;
991
992 connection_rsc = lrm_state_find(rsc_id);
993 if (!connection_rsc || !connection_rsc->remote_ra_data) {
994 return -EINVAL;
995 }
996
997 ra_data = connection_rsc->remote_ra_data;
998 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
999 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1000 interval_ms);
1001 if (ra_data->cur_cmd &&
1002 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1003 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1004
1005 ra_data->cur_cmd->cancel = TRUE;
1006 }
1007
1008 return 0;
1009 }
1010
1011 static remote_ra_cmd_t *
1012 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1013 const char *userdata)
1014 {
1015 GList *gIter = NULL;
1016 remote_ra_cmd_t *cmd = NULL;
1017
1018
1019
1020
1021
1022
1023
1024 if (interval_ms == 0) {
1025 return NULL;
1026 }
1027
1028 if (ra_data->cur_cmd &&
1029 ra_data->cur_cmd->cancel == FALSE &&
1030 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1031 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1032
1033 cmd = ra_data->cur_cmd;
1034 goto handle_dup;
1035 }
1036
1037 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1038 cmd = gIter->data;
1039 if ((cmd->interval_ms == interval_ms)
1040 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1041 goto handle_dup;
1042 }
1043 }
1044
1045 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1046 cmd = gIter->data;
1047 if ((cmd->interval_ms == interval_ms)
1048 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1049 goto handle_dup;
1050 }
1051 }
1052
1053 return NULL;
1054
1055 handle_dup:
1056
1057 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1058 cmd->rsc_id, "monitor", interval_ms);
1059
1060
1061 if (userdata) {
1062 free(cmd->userdata);
1063 cmd->userdata = strdup(userdata);
1064 }
1065
1066
1067 if (cmd->reported_success) {
1068 cmd->start_time = time(NULL);
1069 cmd->call_id = generate_callid();
1070 cmd->reported_success = 0;
1071 }
1072
1073
1074
1075
1076 if (cmd->interval_id) {
1077 g_source_remove(cmd->interval_id);
1078 cmd->interval_id = 0;
1079 recurring_helper(cmd);
1080 }
1081
1082 return cmd;
1083 }
1084
1085 int
1086 remote_ra_exec(lrm_state_t *lrm_state, const char *rsc_id, const char *action,
1087 const char *userdata, guint interval_ms,
1088 int timeout,
1089 int start_delay,
1090 lrmd_key_value_t * params)
1091 {
1092 int rc = 0;
1093 lrm_state_t *connection_rsc = NULL;
1094 remote_ra_cmd_t *cmd = NULL;
1095 remote_ra_data_t *ra_data = NULL;
1096
1097 if (is_remote_ra_supported_action(action) == FALSE) {
1098 rc = -EINVAL;
1099 goto exec_done;
1100 }
1101
1102 connection_rsc = lrm_state_find(rsc_id);
1103 if (!connection_rsc) {
1104 rc = -EINVAL;
1105 goto exec_done;
1106 }
1107
1108 remote_ra_data_init(connection_rsc);
1109 ra_data = connection_rsc->remote_ra_data;
1110
1111 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1112 if (cmd) {
1113 rc = cmd->call_id;
1114 goto exec_done;
1115 }
1116
1117 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1118 cmd->owner = strdup(lrm_state->node_name);
1119 cmd->rsc_id = strdup(rsc_id);
1120 cmd->action = strdup(action);
1121 cmd->userdata = strdup(userdata);
1122 cmd->interval_ms = interval_ms;
1123 cmd->timeout = timeout;
1124 cmd->start_delay = start_delay;
1125 cmd->params = params;
1126 cmd->start_time = time(NULL);
1127
1128 cmd->call_id = generate_callid();
1129
1130 if (cmd->start_delay) {
1131 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1132 }
1133
1134 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1135 mainloop_set_trigger(ra_data->work);
1136
1137 return cmd->call_id;
1138 exec_done:
1139
1140 lrmd_key_value_freeall(params);
1141 return rc;
1142 }
1143
1144
1145
1146
1147
1148
1149
1150 void
1151 remote_ra_fail(const char *node_name)
1152 {
1153 lrm_state_t *lrm_state = lrm_state_find(node_name);
1154
1155 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1156 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1157
1158 crm_info("Failing monitors on pacemaker_remote node %s", node_name);
1159 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1160 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1161 }
1162 }
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1177 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1178 "/" XML_CIB_TAG_NODE
1179
1180
1181
1182
1183
1184
1185
1186 void
1187 remote_ra_process_pseudo(xmlNode *xml)
1188 {
1189 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1190
1191 if (numXpathResults(search) == 1) {
1192 xmlNode *result = getXpathResult(search, 0);
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 if (result) {
1209 const char *remote = ID(result);
1210
1211 if (remote) {
1212 remote_node_down(remote, DOWN_ERASE_LRM);
1213 }
1214 }
1215 }
1216 freeXpathObject(search);
1217 }
1218
1219 static void
1220 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1221 {
1222 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1223 xmlNode *update, *state;
1224 int call_opt, call_id = 0;
1225 crm_node_t *node;
1226
1227 call_opt = crmd_cib_smart_opt();
1228 node = crm_remote_peer_get(lrm_state->node_name);
1229 CRM_CHECK(node != NULL, return);
1230 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1231 state = create_node_state_update(node, node_update_none, update,
1232 __func__);
1233 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1234 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1235 if (call_id < 0) {
1236 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1237 } else {
1238
1239 ra_data->is_maintenance = maintenance;
1240 }
1241 free_xml(update);
1242 }
1243
1244 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1245 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1246 XML_GRAPH_TAG_MAINTENANCE
1247
1248
1249
1250
1251
1252
1253
1254
1255 void
1256 remote_ra_process_maintenance_nodes(xmlNode *xml)
1257 {
1258 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1259
1260 if (numXpathResults(search) == 1) {
1261 xmlNode *node;
1262 int cnt = 0, cnt_remote = 0;
1263
1264 for (node =
1265 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1266 node != NULL; node = pcmk__xml_next(node)) {
1267 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1268
1269 cnt++;
1270 if (lrm_state && lrm_state->remote_ra_data &&
1271 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1272 cnt_remote++;
1273 remote_ra_maintenance(lrm_state,
1274 crm_atoi(crm_element_value(node,
1275 XML_NODE_IS_MAINTENANCE), "0"));
1276
1277 }
1278 }
1279 crm_trace("Action holds %d nodes (%d remotes found) "
1280 "adjusting maintenance-mode", cnt, cnt_remote);
1281 }
1282 freeXpathObject(search);
1283 }
1284
1285 gboolean
1286 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1287 {
1288 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1289
1290 return ra_data->is_maintenance;
1291 }
1292
1293 gboolean
1294 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1295 {
1296 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1297
1298 return ra_data->controlling_guest;
1299 }