This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- controld_execute_remote_agent
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/lrmd_internal.h>
17 #include <crm/services.h>
18
19 #include <pacemaker-controld.h>
20
21 #define REMOTE_LRMD_RA "remote"
22
23
24 #define MAX_START_TIMEOUT_MS 10000
25
26 typedef struct remote_ra_cmd_s {
27
28 char *owner;
29
30 char *rsc_id;
31
32 char *action;
33
34 char *userdata;
35
36 int start_delay;
37
38 int delay_id;
39
40 int timeout;
41 int remaining_timeout;
42
43 guint interval_ms;
44
45 int interval_id;
46 int reported_success;
47 int monitor_timeout_id;
48 int takeover_timeout_id;
49
50 lrmd_key_value_t *params;
51 pcmk__action_result_t result;
52 int call_id;
53 time_t start_time;
54 gboolean cancel;
55 } remote_ra_cmd_t;
56
57 enum remote_migration_status {
58 expect_takeover = 1,
59 takeover_complete,
60 };
61
62 typedef struct remote_ra_data_s {
63 crm_trigger_t *work;
64 remote_ra_cmd_t *cur_cmd;
65 GList *cmds;
66 GList *recurring_cmds;
67
68 enum remote_migration_status migrate_status;
69
70 gboolean active;
71
72
73
74
75 gboolean is_maintenance;
76
77
78
79
80
81
82
83 gboolean controlling_guest;
84 } remote_ra_data_t;
85
86 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
87 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
88 static GList *fail_all_monitor_cmds(GList * list);
89
90 static void
91 free_cmd(gpointer user_data)
92 {
93 remote_ra_cmd_t *cmd = user_data;
94
95 if (!cmd) {
96 return;
97 }
98 if (cmd->delay_id) {
99 g_source_remove(cmd->delay_id);
100 }
101 if (cmd->interval_id) {
102 g_source_remove(cmd->interval_id);
103 }
104 if (cmd->monitor_timeout_id) {
105 g_source_remove(cmd->monitor_timeout_id);
106 }
107 if (cmd->takeover_timeout_id) {
108 g_source_remove(cmd->takeover_timeout_id);
109 }
110 free(cmd->owner);
111 free(cmd->rsc_id);
112 free(cmd->action);
113 free(cmd->userdata);
114 pcmk__reset_result(&(cmd->result));
115 lrmd_key_value_freeall(cmd->params);
116 free(cmd);
117 }
118
119 static int
120 generate_callid(void)
121 {
122 static int remote_ra_callid = 0;
123
124 remote_ra_callid++;
125 if (remote_ra_callid <= 0) {
126 remote_ra_callid = 1;
127 }
128
129 return remote_ra_callid;
130 }
131
132 static gboolean
133 recurring_helper(gpointer data)
134 {
135 remote_ra_cmd_t *cmd = data;
136 lrm_state_t *connection_rsc = NULL;
137
138 cmd->interval_id = 0;
139 connection_rsc = lrm_state_find(cmd->rsc_id);
140 if (connection_rsc && connection_rsc->remote_ra_data) {
141 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
142
143 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
144
145 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
146 mainloop_set_trigger(ra_data->work);
147 }
148 return FALSE;
149 }
150
151 static gboolean
152 start_delay_helper(gpointer data)
153 {
154 remote_ra_cmd_t *cmd = data;
155 lrm_state_t *connection_rsc = NULL;
156
157 cmd->delay_id = 0;
158 connection_rsc = lrm_state_find(cmd->rsc_id);
159 if (connection_rsc && connection_rsc->remote_ra_data) {
160 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
161
162 mainloop_set_trigger(ra_data->work);
163 }
164 return FALSE;
165 }
166
167
168
169
170
171
172
173 static void
174 remote_node_up(const char *node_name)
175 {
176 int call_opt, call_id = 0;
177 xmlNode *update, *state;
178 crm_node_t *node;
179 enum controld_section_e section = controld_section_all;
180
181 CRM_CHECK(node_name != NULL, return);
182 crm_info("Announcing Pacemaker Remote node %s", node_name);
183
184
185
186
187
188
189 call_opt = crmd_cib_smart_opt();
190 if (controld_shutdown_lock_enabled) {
191 section = controld_section_all_unlocked;
192 }
193
194 update_attrd_remote_node_removed(node_name, NULL);
195
196 controld_delete_node_state(node_name, section, call_opt);
197
198
199
200
201
202
203
204 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
205
206
207 node = crm_remote_peer_get(node_name);
208 CRM_CHECK(node != NULL, return);
209 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
210
211
212
213
214
215
216
217 send_remote_state_message(node_name, TRUE);
218
219 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
220 state = create_node_state_update(node, node_update_cluster, update,
221 __func__);
222
223
224
225
226
227 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
228
229
230
231
232
233
234
235
236 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
237 if (call_id < 0) {
238 crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
239 }
240 free_xml(update);
241 }
242
243 enum down_opts {
244 DOWN_KEEP_LRM,
245 DOWN_ERASE_LRM
246 };
247
248
249
250
251
252
253
254
255 static void
256 remote_node_down(const char *node_name, const enum down_opts opts)
257 {
258 xmlNode *update;
259 int call_id = 0;
260 int call_opt = crmd_cib_smart_opt();
261 crm_node_t *node;
262
263
264 update_attrd_remote_node_removed(node_name, NULL);
265
266
267
268
269
270
271 if (opts == DOWN_ERASE_LRM) {
272 controld_delete_node_state(node_name, controld_section_all, call_opt);
273 } else {
274 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
275 }
276
277
278 node = crm_remote_peer_get(node_name);
279 CRM_CHECK(node != NULL, return);
280 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
281
282
283 send_remote_state_message(node_name, FALSE);
284
285
286 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
287 create_node_state_update(node, node_update_cluster, update, __func__);
288 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
289 if (call_id < 0) {
290 crm_perror(LOG_ERR, "%s CIB node state update", node_name);
291 }
292 free_xml(update);
293 }
294
295
296
297
298
299
300
301 static void
302 check_remote_node_state(const remote_ra_cmd_t *cmd)
303 {
304
305 if (!pcmk__result_ok(&(cmd->result))) {
306 return;
307 }
308
309 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
310 remote_node_up(cmd->rsc_id);
311
312 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
313
314
315
316
317
318
319
320 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
321
322 CRM_CHECK(node != NULL, return);
323 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
324
325 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
326 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
327 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
328
329 if (ra_data) {
330 if (ra_data->migrate_status != takeover_complete) {
331
332 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
333 } else if (AM_I_DC == FALSE) {
334
335
336
337
338 crm_remote_peer_cache_remove(cmd->rsc_id);
339 }
340 }
341 }
342
343
344
345
346
347
348
349
350
351
352 }
353
354 static void
355 report_remote_ra_result(remote_ra_cmd_t * cmd)
356 {
357 lrmd_event_data_t op = { 0, };
358
359 check_remote_node_state(cmd);
360
361 op.type = lrmd_event_exec_complete;
362 op.rsc_id = cmd->rsc_id;
363 op.op_type = cmd->action;
364 op.user_data = cmd->userdata;
365 op.timeout = cmd->timeout;
366 op.interval_ms = cmd->interval_ms;
367 op.t_run = (unsigned int) cmd->start_time;
368 op.t_rcchange = (unsigned int) cmd->start_time;
369
370 lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
371 cmd->result.exit_reason);
372
373 if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) {
374 op.t_rcchange = (unsigned int) time(NULL);
375
376
377
378
379
380
381
382
383
384 if (op.t_rcchange == op.t_run) {
385 op.t_rcchange++;
386 }
387 }
388
389 if (cmd->params) {
390 lrmd_key_value_t *tmp;
391
392 op.params = pcmk__strkey_table(free, free);
393 for (tmp = cmd->params; tmp; tmp = tmp->next) {
394 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
395 }
396
397 }
398 op.call_id = cmd->call_id;
399 op.remote_nodename = cmd->owner;
400
401 lrm_op_callback(&op);
402
403 if (op.params) {
404 g_hash_table_destroy(op.params);
405 }
406 lrmd__reset_result(&op);
407 }
408
409 static void
410 update_remaining_timeout(remote_ra_cmd_t * cmd)
411 {
412 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
413 }
414
415 static gboolean
416 retry_start_cmd_cb(gpointer data)
417 {
418 lrm_state_t *lrm_state = data;
419 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
420 remote_ra_cmd_t *cmd = NULL;
421 int rc = ETIME;
422
423 if (!ra_data || !ra_data->cur_cmd) {
424 return FALSE;
425 }
426 cmd = ra_data->cur_cmd;
427 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
428 return FALSE;
429 }
430 update_remaining_timeout(cmd);
431
432 if (cmd->remaining_timeout > 0) {
433 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
434 } else {
435 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
436 PCMK_EXEC_TIMEOUT,
437 "Not enough time remains to retry remote connection");
438 }
439
440 if (rc != pcmk_rc_ok) {
441 report_remote_ra_result(cmd);
442
443 if (ra_data->cmds) {
444 mainloop_set_trigger(ra_data->work);
445 }
446 ra_data->cur_cmd = NULL;
447 free_cmd(cmd);
448 } else {
449
450 }
451
452 return FALSE;
453 }
454
455
456 static gboolean
457 connection_takeover_timeout_cb(gpointer data)
458 {
459 lrm_state_t *lrm_state = NULL;
460 remote_ra_cmd_t *cmd = data;
461
462 crm_info("takeover event timed out for node %s", cmd->rsc_id);
463 cmd->takeover_timeout_id = 0;
464
465 lrm_state = lrm_state_find(cmd->rsc_id);
466
467 handle_remote_ra_stop(lrm_state, cmd);
468 free_cmd(cmd);
469
470 return FALSE;
471 }
472
473 static gboolean
474 monitor_timeout_cb(gpointer data)
475 {
476 lrm_state_t *lrm_state = NULL;
477 remote_ra_cmd_t *cmd = data;
478
479 lrm_state = lrm_state_find(cmd->rsc_id);
480
481 crm_info("Timed out waiting for remote poke response from %s%s",
482 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
483 cmd->monitor_timeout_id = 0;
484 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
485 "Remote executor did not respond");
486
487 if (lrm_state && lrm_state->remote_ra_data) {
488 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
489
490 if (ra_data->cur_cmd == cmd) {
491 ra_data->cur_cmd = NULL;
492 }
493 if (ra_data->cmds) {
494 mainloop_set_trigger(ra_data->work);
495 }
496 }
497
498 report_remote_ra_result(cmd);
499 free_cmd(cmd);
500
501 if(lrm_state) {
502 lrm_state_disconnect(lrm_state);
503 }
504 return FALSE;
505 }
506
507 static void
508 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
509 {
510 lrmd_event_data_t op = { 0, };
511
512 if (lrm_state == NULL) {
513
514 lrm_state = lrm_state_find(fsa_our_uname);
515 }
516 CRM_ASSERT(lrm_state != NULL);
517
518 op.type = lrmd_event_exec_complete;
519 op.rsc_id = rsc_id;
520 op.op_type = op_type;
521 op.t_run = (unsigned int) time(NULL);
522 op.t_rcchange = op.t_run;
523 op.call_id = generate_callid();
524 lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
525 process_lrm_event(lrm_state, &op, NULL, NULL);
526 }
527
528 void
529 remote_lrm_op_callback(lrmd_event_data_t * op)
530 {
531 gboolean cmd_handled = FALSE;
532 lrm_state_t *lrm_state = NULL;
533 remote_ra_data_t *ra_data = NULL;
534 remote_ra_cmd_t *cmd = NULL;
535
536 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
537 "(%d) status=%s (%d)",
538 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
539 lrmd_event_type2str(op->type), op->remote_nodename,
540 services_ocf_exitcode_str(op->rc), op->rc,
541 pcmk_exec_status_str(op->op_status), op->op_status);
542
543 lrm_state = lrm_state_find(op->remote_nodename);
544 if (!lrm_state || !lrm_state->remote_ra_data) {
545 crm_debug("No state information found for remote connection event");
546 return;
547 }
548 ra_data = lrm_state->remote_ra_data;
549
550 if (op->type == lrmd_event_new_client) {
551
552
553 if (ra_data->migrate_status == expect_takeover) {
554
555 ra_data->migrate_status = takeover_complete;
556
557 } else {
558 crm_err("Disconnecting from Pacemaker Remote node %s due to "
559 "unexpected client takeover", op->remote_nodename);
560
561
562
563 lrm_state_disconnect_only(lrm_state);
564 }
565 return;
566 }
567
568
569 if (op->type == lrmd_event_exec_complete) {
570 if (ra_data->migrate_status == takeover_complete) {
571 crm_debug("ignoring event, this connection is taken over by another node");
572 } else {
573 lrm_op_callback(op);
574 }
575 return;
576 }
577
578 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
579
580 if (ra_data->active == FALSE) {
581 crm_debug("Disconnection from Pacemaker Remote node %s complete",
582 lrm_state->node_name);
583
584 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
585 crm_err("Lost connection to Pacemaker Remote node %s",
586 lrm_state->node_name);
587 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
588 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
589
590 } else {
591 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
592 lrm_state->node_name);
593
594 handle_remote_ra_stop(lrm_state, NULL);
595 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
596
597 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
598 }
599 return;
600 }
601
602 if (!ra_data->cur_cmd) {
603 crm_debug("no event to match");
604 return;
605 }
606
607 cmd = ra_data->cur_cmd;
608
609
610
611 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
612 "migrate_from", NULL)) {
613 if (op->connection_rc < 0) {
614 update_remaining_timeout(cmd);
615
616 if ((op->connection_rc == -ENOKEY)
617 || (op->connection_rc == -EKEYREJECTED)) {
618
619 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
620 PCMK_EXEC_ERROR,
621 pcmk_strerror(op->connection_rc));
622
623 } else if (cmd->remaining_timeout > 3000) {
624 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
625 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
626 return;
627
628 } else {
629 crm_trace("can't reschedule start, remaining timeout too small %d",
630 cmd->remaining_timeout);
631 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
632 PCMK_EXEC_TIMEOUT,
633 "%s without enough time to retry",
634 pcmk_strerror(op->connection_rc));
635 }
636
637 } else {
638 lrm_state_reset_tables(lrm_state, TRUE);
639 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
640 ra_data->active = TRUE;
641 }
642
643 crm_debug("Remote connection event matched %s action", cmd->action);
644 report_remote_ra_result(cmd);
645 cmd_handled = TRUE;
646
647 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
648
649 if (cmd->monitor_timeout_id) {
650 g_source_remove(cmd->monitor_timeout_id);
651 cmd->monitor_timeout_id = 0;
652 }
653
654
655
656
657 if (!cmd->reported_success) {
658 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
659 report_remote_ra_result(cmd);
660 cmd->reported_success = 1;
661 }
662
663 crm_debug("Remote poke event matched %s action", cmd->action);
664
665
666 if (cmd->interval_ms && (cmd->cancel == FALSE)) {
667 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
668 cmd->interval_id = g_timeout_add(cmd->interval_ms,
669 recurring_helper, cmd);
670 cmd = NULL;
671 }
672 cmd_handled = TRUE;
673
674 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
675 if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
676 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
677 PCMK_EXEC_ERROR,
678 "Remote connection unexpectedly dropped "
679 "during monitor");
680 report_remote_ra_result(cmd);
681 crm_err("Remote connection to %s unexpectedly dropped during monitor",
682 lrm_state->node_name);
683 }
684 cmd_handled = TRUE;
685
686 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
687
688 handle_remote_ra_stop(lrm_state, cmd);
689 cmd_handled = TRUE;
690
691 } else {
692 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
693 }
694
695 if (cmd_handled) {
696 ra_data->cur_cmd = NULL;
697 if (ra_data->cmds) {
698 mainloop_set_trigger(ra_data->work);
699 }
700 free_cmd(cmd);
701 }
702 }
703
704 static void
705 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
706 {
707 remote_ra_data_t *ra_data = NULL;
708
709 CRM_ASSERT(lrm_state);
710 ra_data = lrm_state->remote_ra_data;
711
712 if (ra_data->migrate_status != takeover_complete) {
713
714 g_hash_table_remove_all(lrm_state->pending_ops);
715 } else {
716
717
718 lrm_state_reset_tables(lrm_state, FALSE);
719 }
720
721 ra_data->active = FALSE;
722 lrm_state_disconnect(lrm_state);
723
724 if (ra_data->cmds) {
725 g_list_free_full(ra_data->cmds, free_cmd);
726 }
727 if (ra_data->recurring_cmds) {
728 g_list_free_full(ra_data->recurring_cmds, free_cmd);
729 }
730 ra_data->cmds = NULL;
731 ra_data->recurring_cmds = NULL;
732 ra_data->cur_cmd = NULL;
733
734 if (cmd) {
735 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
736 report_remote_ra_result(cmd);
737 }
738 }
739
740
741 static int
742 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
743 {
744 const char *server = NULL;
745 lrmd_key_value_t *tmp = NULL;
746 int port = 0;
747 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
748 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
749 int rc = pcmk_rc_ok;
750
751 for (tmp = cmd->params; tmp; tmp = tmp->next) {
752 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
753 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
754 server = tmp->value;
755 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
756 port = atoi(tmp->value);
757 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
758 ra_data->controlling_guest = TRUE;
759 }
760 }
761
762 rc = controld_connect_remote_executor(lrm_state, server, port,
763 timeout_used);
764 if (rc != pcmk_rc_ok) {
765 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
766 PCMK_EXEC_ERROR,
767 "Could not connect to Pacemaker Remote node %s: %s",
768 lrm_state->node_name, pcmk_rc_str(rc));
769 }
770 return rc;
771 }
772
773 static gboolean
774 handle_remote_ra_exec(gpointer user_data)
775 {
776 int rc = 0;
777 lrm_state_t *lrm_state = user_data;
778 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
779 remote_ra_cmd_t *cmd;
780 GList *first = NULL;
781
782 if (ra_data->cur_cmd) {
783
784 return TRUE;
785 }
786
787 while (ra_data->cmds) {
788 first = ra_data->cmds;
789 cmd = first->data;
790 if (cmd->delay_id) {
791
792 return TRUE;
793 }
794
795 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
796 g_list_free_1(first);
797
798 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
799 ra_data->migrate_status = 0;
800 if (handle_remote_ra_start(lrm_state, cmd,
801 cmd->timeout) == pcmk_rc_ok) {
802
803 crm_debug("Initiated async remote connection, %s action will complete after connect event",
804 cmd->action);
805 ra_data->cur_cmd = cmd;
806 return TRUE;
807 }
808 report_remote_ra_result(cmd);
809
810 } else if (!strcmp(cmd->action, "monitor")) {
811
812 if (lrm_state_is_connected(lrm_state) == TRUE) {
813 rc = lrm_state_poke_connection(lrm_state);
814 if (rc < 0) {
815 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
816 PCMK_EXEC_ERROR, pcmk_strerror(rc));
817 }
818 } else {
819 rc = -1;
820 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
821 PCMK_EXEC_DONE, "Remote connection inactive");
822 }
823
824 if (rc == 0) {
825 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
826 cmd->rsc_id);
827 ra_data->cur_cmd = cmd;
828 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
829 return TRUE;
830 }
831 report_remote_ra_result(cmd);
832
833 } else if (!strcmp(cmd->action, "stop")) {
834
835 if (ra_data->migrate_status == expect_takeover) {
836
837
838
839
840
841
842 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
843 ra_data->cur_cmd = cmd;
844 return TRUE;
845 }
846
847 handle_remote_ra_stop(lrm_state, cmd);
848
849 } else if (!strcmp(cmd->action, "migrate_to")) {
850 ra_data->migrate_status = expect_takeover;
851 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
852 report_remote_ra_result(cmd);
853 } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
854 CRMD_ACTION_RELOAD_AGENT, NULL)) {
855
856
857
858
859
860
861
862
863
864 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
865 report_remote_ra_result(cmd);
866 }
867
868 free_cmd(cmd);
869 }
870
871 return TRUE;
872 }
873
874 static void
875 remote_ra_data_init(lrm_state_t * lrm_state)
876 {
877 remote_ra_data_t *ra_data = NULL;
878
879 if (lrm_state->remote_ra_data) {
880 return;
881 }
882
883 ra_data = calloc(1, sizeof(remote_ra_data_t));
884 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
885 lrm_state->remote_ra_data = ra_data;
886 }
887
888 void
889 remote_ra_cleanup(lrm_state_t * lrm_state)
890 {
891 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
892
893 if (!ra_data) {
894 return;
895 }
896
897 if (ra_data->cmds) {
898 g_list_free_full(ra_data->cmds, free_cmd);
899 }
900
901 if (ra_data->recurring_cmds) {
902 g_list_free_full(ra_data->recurring_cmds, free_cmd);
903 }
904 mainloop_destroy_trigger(ra_data->work);
905 free(ra_data);
906 lrm_state->remote_ra_data = NULL;
907 }
908
909 gboolean
910 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
911 {
912 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
913 return TRUE;
914 }
915 if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
916 return TRUE;
917 }
918
919 return FALSE;
920 }
921
922 lrmd_rsc_info_t *
923 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
924 {
925 lrmd_rsc_info_t *info = NULL;
926
927 if ((lrm_state_find(rsc_id))) {
928 info = calloc(1, sizeof(lrmd_rsc_info_t));
929
930 info->id = strdup(rsc_id);
931 info->type = strdup(REMOTE_LRMD_RA);
932 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
933 info->provider = strdup("pacemaker");
934 }
935
936 return info;
937 }
938
939 static gboolean
940 is_remote_ra_supported_action(const char *action)
941 {
942 return pcmk__str_any_of(action,
943 CRMD_ACTION_START,
944 CRMD_ACTION_STOP,
945 CRMD_ACTION_STATUS,
946 CRMD_ACTION_MIGRATE,
947 CRMD_ACTION_MIGRATED,
948 CRMD_ACTION_RELOAD_AGENT,
949 CRMD_ACTION_RELOAD,
950 NULL);
951 }
952
953 static GList *
954 fail_all_monitor_cmds(GList * list)
955 {
956 GList *rm_list = NULL;
957 remote_ra_cmd_t *cmd = NULL;
958 GList *gIter = NULL;
959
960 for (gIter = list; gIter != NULL; gIter = gIter->next) {
961 cmd = gIter->data;
962 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
963 rm_list = g_list_append(rm_list, cmd);
964 }
965 }
966
967 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
968 cmd = gIter->data;
969
970 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
971 PCMK_EXEC_ERROR, "Lost connection to remote executor");
972 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
973 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
974 report_remote_ra_result(cmd);
975
976 list = g_list_remove(list, cmd);
977 free_cmd(cmd);
978 }
979
980
981 g_list_free(rm_list);
982 return list;
983 }
984
985 static GList *
986 remove_cmd(GList * list, const char *action, guint interval_ms)
987 {
988 remote_ra_cmd_t *cmd = NULL;
989 GList *gIter = NULL;
990
991 for (gIter = list; gIter != NULL; gIter = gIter->next) {
992 cmd = gIter->data;
993 if ((cmd->interval_ms == interval_ms)
994 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
995 break;
996 }
997 cmd = NULL;
998 }
999 if (cmd) {
1000 list = g_list_remove(list, cmd);
1001 free_cmd(cmd);
1002 }
1003 return list;
1004 }
1005
1006 int
1007 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
1008 const char *action, guint interval_ms)
1009 {
1010 lrm_state_t *connection_rsc = NULL;
1011 remote_ra_data_t *ra_data = NULL;
1012
1013 connection_rsc = lrm_state_find(rsc_id);
1014 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1015 return -EINVAL;
1016 }
1017
1018 ra_data = connection_rsc->remote_ra_data;
1019 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1020 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1021 interval_ms);
1022 if (ra_data->cur_cmd &&
1023 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1024 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1025
1026 ra_data->cur_cmd->cancel = TRUE;
1027 }
1028
1029 return 0;
1030 }
1031
1032 static remote_ra_cmd_t *
1033 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1034 const char *userdata)
1035 {
1036 GList *gIter = NULL;
1037 remote_ra_cmd_t *cmd = NULL;
1038
1039
1040
1041
1042
1043
1044
1045 if (interval_ms == 0) {
1046 return NULL;
1047 }
1048
1049 if (ra_data->cur_cmd &&
1050 ra_data->cur_cmd->cancel == FALSE &&
1051 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1052 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1053
1054 cmd = ra_data->cur_cmd;
1055 goto handle_dup;
1056 }
1057
1058 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1059 cmd = gIter->data;
1060 if ((cmd->interval_ms == interval_ms)
1061 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1062 goto handle_dup;
1063 }
1064 }
1065
1066 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1067 cmd = gIter->data;
1068 if ((cmd->interval_ms == interval_ms)
1069 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1070 goto handle_dup;
1071 }
1072 }
1073
1074 return NULL;
1075
1076 handle_dup:
1077
1078 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1079 cmd->rsc_id, "monitor", interval_ms);
1080
1081
1082 if (userdata) {
1083 free(cmd->userdata);
1084 cmd->userdata = strdup(userdata);
1085 }
1086
1087
1088 if (cmd->reported_success) {
1089 cmd->start_time = time(NULL);
1090 cmd->call_id = generate_callid();
1091 cmd->reported_success = 0;
1092 }
1093
1094
1095
1096
1097 if (cmd->interval_id) {
1098 g_source_remove(cmd->interval_id);
1099 cmd->interval_id = 0;
1100 recurring_helper(cmd);
1101 }
1102
1103 return cmd;
1104 }
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124 int
1125 controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
1126 const char *action, const char *userdata,
1127 guint interval_ms, int timeout_ms,
1128 int start_delay_ms, lrmd_key_value_t *params,
1129 int *call_id)
1130 {
1131 lrm_state_t *connection_rsc = NULL;
1132 remote_ra_cmd_t *cmd = NULL;
1133 remote_ra_data_t *ra_data = NULL;
1134
1135 *call_id = 0;
1136
1137 CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1138 && (userdata != NULL) && (call_id != NULL),
1139 lrmd_key_value_freeall(params); return EINVAL);
1140
1141 if (!is_remote_ra_supported_action(action)) {
1142 lrmd_key_value_freeall(params);
1143 return EOPNOTSUPP;
1144 }
1145
1146 connection_rsc = lrm_state_find(rsc_id);
1147 if (connection_rsc == NULL) {
1148 lrmd_key_value_freeall(params);
1149 return ENOTCONN;
1150 }
1151
1152 remote_ra_data_init(connection_rsc);
1153 ra_data = connection_rsc->remote_ra_data;
1154
1155 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1156 if (cmd) {
1157 *call_id = cmd->call_id;
1158 lrmd_key_value_freeall(params);
1159 return pcmk_rc_ok;
1160 }
1161
1162 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1163 if (cmd == NULL) {
1164 lrmd_key_value_freeall(params);
1165 return ENOMEM;
1166 }
1167
1168 cmd->owner = strdup(lrm_state->node_name);
1169 cmd->rsc_id = strdup(rsc_id);
1170 cmd->action = strdup(action);
1171 cmd->userdata = strdup(userdata);
1172 if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
1173 || (cmd->userdata == NULL)) {
1174 free_cmd(cmd);
1175 lrmd_key_value_freeall(params);
1176 return ENOMEM;
1177 }
1178
1179 cmd->interval_ms = interval_ms;
1180 cmd->timeout = timeout_ms;
1181 cmd->start_delay = start_delay_ms;
1182 cmd->params = params;
1183 cmd->start_time = time(NULL);
1184
1185 cmd->call_id = generate_callid();
1186
1187 if (cmd->start_delay) {
1188 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1189 }
1190
1191 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1192 mainloop_set_trigger(ra_data->work);
1193
1194 *call_id = cmd->call_id;
1195 return pcmk_rc_ok;
1196 }
1197
1198
1199
1200
1201
1202
1203
1204 void
1205 remote_ra_fail(const char *node_name)
1206 {
1207 lrm_state_t *lrm_state = lrm_state_find(node_name);
1208
1209 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1210 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1211
1212 crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1213 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1214 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1215 }
1216 }
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1230 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1231 "/" XML_CIB_TAG_NODE
1232
1233
1234
1235
1236
1237
1238
1239 void
1240 remote_ra_process_pseudo(xmlNode *xml)
1241 {
1242 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1243
1244 if (numXpathResults(search) == 1) {
1245 xmlNode *result = getXpathResult(search, 0);
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261 if (result) {
1262 const char *remote = ID(result);
1263
1264 if (remote) {
1265 remote_node_down(remote, DOWN_ERASE_LRM);
1266 }
1267 }
1268 }
1269 freeXpathObject(search);
1270 }
1271
1272 static void
1273 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1274 {
1275 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1276 xmlNode *update, *state;
1277 int call_opt, call_id = 0;
1278 crm_node_t *node;
1279
1280 call_opt = crmd_cib_smart_opt();
1281 node = crm_remote_peer_get(lrm_state->node_name);
1282 CRM_CHECK(node != NULL, return);
1283 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1284 state = create_node_state_update(node, node_update_none, update,
1285 __func__);
1286 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1287 fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1288 if (call_id < 0) {
1289 crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1290 } else {
1291
1292 ra_data->is_maintenance = maintenance;
1293 }
1294 free_xml(update);
1295 }
1296
1297 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1298 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1299 XML_GRAPH_TAG_MAINTENANCE
1300
1301
1302
1303
1304
1305
1306
1307 void
1308 remote_ra_process_maintenance_nodes(xmlNode *xml)
1309 {
1310 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1311
1312 if (numXpathResults(search) == 1) {
1313 xmlNode *node;
1314 int cnt = 0, cnt_remote = 0;
1315
1316 for (node =
1317 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1318 node != NULL; node = pcmk__xml_next(node)) {
1319 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1320
1321 cnt++;
1322 if (lrm_state && lrm_state->remote_ra_data &&
1323 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1324 int is_maint;
1325
1326 cnt_remote++;
1327 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1328 &is_maint, 0);
1329 remote_ra_maintenance(lrm_state, is_maint);
1330 }
1331 }
1332 crm_trace("Action holds %d nodes (%d remotes found) "
1333 "adjusting maintenance-mode", cnt, cnt_remote);
1334 }
1335 freeXpathObject(search);
1336 }
1337
1338 gboolean
1339 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1340 {
1341 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1342
1343 return ra_data->is_maintenance;
1344 }
1345
1346 gboolean
1347 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1348 {
1349 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1350
1351 return ra_data->controlling_guest;
1352 }