This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- should_purge_attributes
- section_to_delete
- purge_remote_node_attrs
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- controld_execute_remote_agent
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/lrmd_internal.h>
17 #include <crm/services.h>
18
19 #include <pacemaker-controld.h>
20
21 #define REMOTE_LRMD_RA "remote"
22
23
24 #define MAX_START_TIMEOUT_MS 10000
25
26 #define cmd_set_flags(cmd, flags_to_set) do { \
27 (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
28 "Remote command", (cmd)->rsc_id, (cmd)->status, \
29 (flags_to_set), #flags_to_set); \
30 } while (0)
31
32 #define cmd_clear_flags(cmd, flags_to_clear) do { \
33 (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
34 "Remote command", (cmd)->rsc_id, (cmd)->status, \
35 (flags_to_clear), #flags_to_clear); \
36 } while (0)
37
38 enum remote_cmd_status {
39 cmd_reported_success = (1 << 0),
40 cmd_cancel = (1 << 1),
41 };
42
43 typedef struct remote_ra_cmd_s {
44
45 char *owner;
46
47 char *rsc_id;
48
49 char *action;
50
51 char *userdata;
52
53 int start_delay;
54
55 int delay_id;
56
57 int timeout;
58 int remaining_timeout;
59
60 guint interval_ms;
61
62 int interval_id;
63 int monitor_timeout_id;
64 int takeover_timeout_id;
65
66 lrmd_key_value_t *params;
67 pcmk__action_result_t result;
68 int call_id;
69 time_t start_time;
70 uint32_t status;
71 } remote_ra_cmd_t;
72
73 #define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
74 lrm_state_t *lrm = (lrm_state); \
75 remote_ra_data_t *ra = lrm->remote_ra_data; \
76 ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
77 lrm->node_name, ra->status, \
78 (flags_to_set), #flags_to_set); \
79 } while (0)
80
81 #define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
82 lrm_state_t *lrm = (lrm_state); \
83 remote_ra_data_t *ra = lrm->remote_ra_data; \
84 ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
85 lrm->node_name, ra->status, \
86 (flags_to_clear), #flags_to_clear); \
87 } while (0)
88
89 enum remote_status {
90 expect_takeover = (1 << 0),
91 takeover_complete = (1 << 1),
92 remote_active = (1 << 2),
93
94
95
96 remote_in_maint = (1 << 3),
97
98
99
100
101
102
103 controlling_guest = (1 << 4),
104 };
105
106 typedef struct remote_ra_data_s {
107 crm_trigger_t *work;
108 remote_ra_cmd_t *cur_cmd;
109 GList *cmds;
110 GList *recurring_cmds;
111 uint32_t status;
112 } remote_ra_data_t;
113
114 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
115 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
116 static GList *fail_all_monitor_cmds(GList * list);
117
118 static void
119 free_cmd(gpointer user_data)
120 {
121 remote_ra_cmd_t *cmd = user_data;
122
123 if (!cmd) {
124 return;
125 }
126 if (cmd->delay_id) {
127 g_source_remove(cmd->delay_id);
128 }
129 if (cmd->interval_id) {
130 g_source_remove(cmd->interval_id);
131 }
132 if (cmd->monitor_timeout_id) {
133 g_source_remove(cmd->monitor_timeout_id);
134 }
135 if (cmd->takeover_timeout_id) {
136 g_source_remove(cmd->takeover_timeout_id);
137 }
138 free(cmd->owner);
139 free(cmd->rsc_id);
140 free(cmd->action);
141 free(cmd->userdata);
142 pcmk__reset_result(&(cmd->result));
143 lrmd_key_value_freeall(cmd->params);
144 free(cmd);
145 }
146
147 static int
148 generate_callid(void)
149 {
150 static int remote_ra_callid = 0;
151
152 remote_ra_callid++;
153 if (remote_ra_callid <= 0) {
154 remote_ra_callid = 1;
155 }
156
157 return remote_ra_callid;
158 }
159
160 static gboolean
161 recurring_helper(gpointer data)
162 {
163 remote_ra_cmd_t *cmd = data;
164 lrm_state_t *connection_rsc = NULL;
165
166 cmd->interval_id = 0;
167 connection_rsc = lrm_state_find(cmd->rsc_id);
168 if (connection_rsc && connection_rsc->remote_ra_data) {
169 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
170
171 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
172
173 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
174 mainloop_set_trigger(ra_data->work);
175 }
176 return FALSE;
177 }
178
179 static gboolean
180 start_delay_helper(gpointer data)
181 {
182 remote_ra_cmd_t *cmd = data;
183 lrm_state_t *connection_rsc = NULL;
184
185 cmd->delay_id = 0;
186 connection_rsc = lrm_state_find(cmd->rsc_id);
187 if (connection_rsc && connection_rsc->remote_ra_data) {
188 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
189
190 mainloop_set_trigger(ra_data->work);
191 }
192 return FALSE;
193 }
194
195 static bool
196 should_purge_attributes(crm_node_t *node)
197 {
198 bool purge = true;
199 crm_node_t *conn_node = NULL;
200 lrm_state_t *connection_rsc = NULL;
201
202 if (!node->conn_host) {
203 return purge;
204 }
205
206
207
208
209 conn_node = pcmk__get_node(0, node->conn_host, NULL,
210 pcmk__node_search_cluster_member);
211 if (conn_node == NULL) {
212 return purge;
213 }
214
215
216
217
218
219 connection_rsc = lrm_state_find(node->uname);
220
221 if (connection_rsc != NULL) {
222 lrmd_t *lrm = connection_rsc->conn;
223 time_t uptime = lrmd__uptime(lrm);
224 time_t now = time(NULL);
225
226
227
228
229
230 if (uptime > 0 &&
231 conn_node->peer_lost > 0 &&
232 uptime + 20 >= now - conn_node->peer_lost) {
233 purge = false;
234 }
235 }
236
237 return purge;
238 }
239
240 static enum controld_section_e
241 section_to_delete(bool purge)
242 {
243 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
244 if (purge) {
245 return controld_section_all_unlocked;
246 } else {
247 return controld_section_lrm_unlocked;
248 }
249 } else {
250 if (purge) {
251 return controld_section_all;
252 } else {
253 return controld_section_lrm;
254 }
255 }
256 }
257
258 static void
259 purge_remote_node_attrs(int call_opt, crm_node_t *node)
260 {
261 bool purge = should_purge_attributes(node);
262 enum controld_section_e section = section_to_delete(purge);
263
264
265 if (purge) {
266 update_attrd_remote_node_removed(node->uname, NULL);
267 }
268
269 controld_delete_node_state(node->uname, section, call_opt);
270 }
271
272
273
274
275
276
277
278 static void
279 remote_node_up(const char *node_name)
280 {
281 int call_opt;
282 xmlNode *update, *state;
283 crm_node_t *node;
284 lrm_state_t *connection_rsc = NULL;
285
286 CRM_CHECK(node_name != NULL, return);
287 crm_info("Announcing Pacemaker Remote node %s", node_name);
288
289 call_opt = crmd_cib_smart_opt();
290
291
292
293
294
295
296
297 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
298
299
300 node = pcmk__cluster_lookup_remote_node(node_name);
301 CRM_CHECK(node != NULL, return);
302
303 purge_remote_node_attrs(call_opt, node);
304 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
305
306
307
308
309 connection_rsc = lrm_state_find(node->uname);
310
311 if (connection_rsc != NULL) {
312 lrmd_t *lrm = connection_rsc->conn;
313 const char *start_state = lrmd__node_start_state(lrm);
314
315 if (start_state) {
316 set_join_state(start_state, node->uname, node->uuid, true);
317 }
318 }
319
320
321
322
323
324
325
326 broadcast_remote_state_message(node_name, true);
327
328 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
329 state = create_node_state_update(node, node_update_cluster, update,
330 __func__);
331
332
333
334
335
336 crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");
337
338
339
340
341
342
343
344
345 controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
346 free_xml(update);
347 }
348
349 enum down_opts {
350 DOWN_KEEP_LRM,
351 DOWN_ERASE_LRM
352 };
353
354
355
356
357
358
359
360
361 static void
362 remote_node_down(const char *node_name, const enum down_opts opts)
363 {
364 xmlNode *update;
365 int call_opt = crmd_cib_smart_opt();
366 crm_node_t *node;
367
368
369 update_attrd_remote_node_removed(node_name, NULL);
370
371
372
373
374
375
376 if (opts == DOWN_ERASE_LRM) {
377 controld_delete_node_state(node_name, controld_section_all, call_opt);
378 } else {
379 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
380 }
381
382
383 node = pcmk__cluster_lookup_remote_node(node_name);
384 CRM_CHECK(node != NULL, return);
385 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
386
387
388 broadcast_remote_state_message(node_name, false);
389
390
391 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
392 create_node_state_update(node, node_update_cluster, update, __func__);
393 controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
394 free_xml(update);
395 }
396
397
398
399
400
401
402
403 static void
404 check_remote_node_state(const remote_ra_cmd_t *cmd)
405 {
406
407 if (!pcmk__result_ok(&(cmd->result))) {
408 return;
409 }
410
411 if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
412 remote_node_up(cmd->rsc_id);
413
414 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
415 pcmk__str_casei)) {
416
417
418
419
420
421
422
423 crm_node_t *node = pcmk__cluster_lookup_remote_node(cmd->rsc_id);
424
425 CRM_CHECK(node != NULL, return);
426 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
427
428 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
429 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
430 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
431
432 if (ra_data) {
433 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
434
435 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
436 } else if (AM_I_DC == FALSE) {
437
438
439
440
441 pcmk__cluster_forget_remote_node(cmd->rsc_id);
442 }
443 }
444 }
445
446
447
448
449
450
451
452
453
454
455 }
456
457 static void
458 report_remote_ra_result(remote_ra_cmd_t * cmd)
459 {
460 lrmd_event_data_t op = { 0, };
461
462 check_remote_node_state(cmd);
463
464 op.type = lrmd_event_exec_complete;
465 op.rsc_id = cmd->rsc_id;
466 op.op_type = cmd->action;
467 op.user_data = cmd->userdata;
468 op.timeout = cmd->timeout;
469 op.interval_ms = cmd->interval_ms;
470
471 op.t_run = (unsigned int) cmd->start_time;
472
473 op.t_rcchange = (unsigned int) cmd->start_time;
474
475 lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
476 cmd->result.exit_reason);
477
478 if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
479
480 op.t_rcchange = (unsigned int) time(NULL);
481
482
483
484
485
486
487
488
489
490 if (op.t_rcchange == op.t_run) {
491 op.t_rcchange++;
492 }
493 }
494
495 if (cmd->params) {
496 lrmd_key_value_t *tmp;
497
498 op.params = pcmk__strkey_table(free, free);
499 for (tmp = cmd->params; tmp; tmp = tmp->next) {
500 pcmk__insert_dup(op.params, tmp->key, tmp->value);
501 }
502
503 }
504 op.call_id = cmd->call_id;
505 op.remote_nodename = cmd->owner;
506
507 lrm_op_callback(&op);
508
509 if (op.params) {
510 g_hash_table_destroy(op.params);
511 }
512 lrmd__reset_result(&op);
513 }
514
515 static void
516 update_remaining_timeout(remote_ra_cmd_t * cmd)
517 {
518 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
519 }
520
521 static gboolean
522 retry_start_cmd_cb(gpointer data)
523 {
524 lrm_state_t *lrm_state = data;
525 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
526 remote_ra_cmd_t *cmd = NULL;
527 int rc = ETIME;
528
529 if (!ra_data || !ra_data->cur_cmd) {
530 return FALSE;
531 }
532 cmd = ra_data->cur_cmd;
533 if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
534 PCMK_ACTION_MIGRATE_FROM, NULL)) {
535 return FALSE;
536 }
537 update_remaining_timeout(cmd);
538
539 if (cmd->remaining_timeout > 0) {
540 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
541 } else {
542 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
543 PCMK_EXEC_TIMEOUT,
544 "Not enough time remains to retry remote connection");
545 }
546
547 if (rc != pcmk_rc_ok) {
548 report_remote_ra_result(cmd);
549
550 if (ra_data->cmds) {
551 mainloop_set_trigger(ra_data->work);
552 }
553 ra_data->cur_cmd = NULL;
554 free_cmd(cmd);
555 } else {
556
557 }
558
559 return FALSE;
560 }
561
562
563 static gboolean
564 connection_takeover_timeout_cb(gpointer data)
565 {
566 lrm_state_t *lrm_state = NULL;
567 remote_ra_cmd_t *cmd = data;
568
569 crm_info("takeover event timed out for node %s", cmd->rsc_id);
570 cmd->takeover_timeout_id = 0;
571
572 lrm_state = lrm_state_find(cmd->rsc_id);
573
574 handle_remote_ra_stop(lrm_state, cmd);
575 free_cmd(cmd);
576
577 return FALSE;
578 }
579
580 static gboolean
581 monitor_timeout_cb(gpointer data)
582 {
583 lrm_state_t *lrm_state = NULL;
584 remote_ra_cmd_t *cmd = data;
585
586 lrm_state = lrm_state_find(cmd->rsc_id);
587
588 crm_info("Timed out waiting for remote poke response from %s%s",
589 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
590 cmd->monitor_timeout_id = 0;
591 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
592 "Remote executor did not respond");
593
594 if (lrm_state && lrm_state->remote_ra_data) {
595 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
596
597 if (ra_data->cur_cmd == cmd) {
598 ra_data->cur_cmd = NULL;
599 }
600 if (ra_data->cmds) {
601 mainloop_set_trigger(ra_data->work);
602 }
603 }
604
605 report_remote_ra_result(cmd);
606 free_cmd(cmd);
607
608 if(lrm_state) {
609 lrm_state_disconnect(lrm_state);
610 }
611 return FALSE;
612 }
613
614 static void
615 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
616 {
617 lrmd_event_data_t op = { 0, };
618
619 if (lrm_state == NULL) {
620
621 lrm_state = lrm_state_find(controld_globals.our_nodename);
622 }
623 pcmk__assert(lrm_state != NULL);
624
625 op.type = lrmd_event_exec_complete;
626 op.rsc_id = rsc_id;
627 op.op_type = op_type;
628
629 op.t_run = (unsigned int) time(NULL);
630 op.t_rcchange = op.t_run;
631 op.call_id = generate_callid();
632 lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
633 process_lrm_event(lrm_state, &op, NULL, NULL);
634 }
635
636 void
637 remote_lrm_op_callback(lrmd_event_data_t * op)
638 {
639 gboolean cmd_handled = FALSE;
640 lrm_state_t *lrm_state = NULL;
641 remote_ra_data_t *ra_data = NULL;
642 remote_ra_cmd_t *cmd = NULL;
643
644 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
645 "(%d) status=%s (%d)",
646 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
647 lrmd_event_type2str(op->type), op->remote_nodename,
648 services_ocf_exitcode_str(op->rc), op->rc,
649 pcmk_exec_status_str(op->op_status), op->op_status);
650
651 lrm_state = lrm_state_find(op->remote_nodename);
652 if (!lrm_state || !lrm_state->remote_ra_data) {
653 crm_debug("No state information found for remote connection event");
654 return;
655 }
656 ra_data = lrm_state->remote_ra_data;
657
658 if (op->type == lrmd_event_new_client) {
659
660
661 if (pcmk_is_set(ra_data->status, expect_takeover)) {
662
663 lrm_remote_clear_flags(lrm_state, expect_takeover);
664 lrm_remote_set_flags(lrm_state, takeover_complete);
665
666 } else {
667 crm_err("Disconnecting from Pacemaker Remote node %s due to "
668 "unexpected client takeover", op->remote_nodename);
669
670
671
672 lrm_state_disconnect_only(lrm_state);
673 }
674 return;
675 }
676
677
678 if (op->type == lrmd_event_exec_complete) {
679 if (pcmk_is_set(ra_data->status, takeover_complete)) {
680 crm_debug("ignoring event, this connection is taken over by another node");
681 } else {
682 lrm_op_callback(op);
683 }
684 return;
685 }
686
687 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
688
689 if (!pcmk_is_set(ra_data->status, remote_active)) {
690 crm_debug("Disconnection from Pacemaker Remote node %s complete",
691 lrm_state->node_name);
692
693 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
694 crm_err("Lost connection to Pacemaker Remote node %s",
695 lrm_state->node_name);
696 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
697 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
698
699 } else {
700 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
701 lrm_state->node_name);
702
703 handle_remote_ra_stop(lrm_state, NULL);
704 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
705
706 synthesize_lrmd_success(NULL, lrm_state->node_name,
707 PCMK_ACTION_STOP);
708 }
709 return;
710 }
711
712 if (!ra_data->cur_cmd) {
713 crm_debug("no event to match");
714 return;
715 }
716
717 cmd = ra_data->cur_cmd;
718
719
720
721 if ((op->type == lrmd_event_connect)
722 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
723 PCMK_ACTION_MIGRATE_FROM, NULL)) {
724 if (op->connection_rc < 0) {
725 update_remaining_timeout(cmd);
726
727 if ((op->connection_rc == -ENOKEY)
728 || (op->connection_rc == -EKEYREJECTED)) {
729
730 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
731 PCMK_EXEC_ERROR,
732 pcmk_strerror(op->connection_rc));
733
734 } else if (cmd->remaining_timeout > 3000) {
735 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
736 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
737 return;
738
739 } else {
740 crm_trace("can't reschedule start, remaining timeout too small %d",
741 cmd->remaining_timeout);
742 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
743 PCMK_EXEC_TIMEOUT,
744 "%s without enough time to retry",
745 pcmk_strerror(op->connection_rc));
746 }
747
748 } else {
749 lrm_state_reset_tables(lrm_state, TRUE);
750 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
751 lrm_remote_set_flags(lrm_state, remote_active);
752 }
753
754 crm_debug("Remote connection event matched %s action", cmd->action);
755 report_remote_ra_result(cmd);
756 cmd_handled = TRUE;
757
758 } else if ((op->type == lrmd_event_poke)
759 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
760 pcmk__str_casei)) {
761
762 if (cmd->monitor_timeout_id) {
763 g_source_remove(cmd->monitor_timeout_id);
764 cmd->monitor_timeout_id = 0;
765 }
766
767
768
769
770 if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
771 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
772 report_remote_ra_result(cmd);
773 cmd_set_flags(cmd, cmd_reported_success);
774 }
775
776 crm_debug("Remote poke event matched %s action", cmd->action);
777
778
779 if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
780 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
781 cmd->interval_id = g_timeout_add(cmd->interval_ms,
782 recurring_helper, cmd);
783 cmd = NULL;
784 }
785 cmd_handled = TRUE;
786
787 } else if ((op->type == lrmd_event_disconnect)
788 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
789 pcmk__str_casei)) {
790 if (pcmk_is_set(ra_data->status, remote_active) &&
791 !pcmk_is_set(cmd->status, cmd_cancel)) {
792 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
793 PCMK_EXEC_ERROR,
794 "Remote connection unexpectedly dropped "
795 "during monitor");
796 report_remote_ra_result(cmd);
797 crm_err("Remote connection to %s unexpectedly dropped during monitor",
798 lrm_state->node_name);
799 }
800 cmd_handled = TRUE;
801
802 } else if ((op->type == lrmd_event_new_client)
803 && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
804 pcmk__str_casei)) {
805
806 handle_remote_ra_stop(lrm_state, cmd);
807 cmd_handled = TRUE;
808
809 } else {
810 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
811 }
812
813 if (cmd_handled) {
814 ra_data->cur_cmd = NULL;
815 if (ra_data->cmds) {
816 mainloop_set_trigger(ra_data->work);
817 }
818 free_cmd(cmd);
819 }
820 }
821
822 static void
823 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
824 {
825 remote_ra_data_t *ra_data = NULL;
826
827 pcmk__assert(lrm_state != NULL);
828 ra_data = lrm_state->remote_ra_data;
829
830 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
831
832 g_hash_table_remove_all(lrm_state->active_ops);
833 } else {
834
835
836 lrm_state_reset_tables(lrm_state, FALSE);
837 }
838
839 lrm_remote_clear_flags(lrm_state, remote_active);
840 lrm_state_disconnect(lrm_state);
841
842 if (ra_data->cmds) {
843 g_list_free_full(ra_data->cmds, free_cmd);
844 }
845 if (ra_data->recurring_cmds) {
846 g_list_free_full(ra_data->recurring_cmds, free_cmd);
847 }
848 ra_data->cmds = NULL;
849 ra_data->recurring_cmds = NULL;
850 ra_data->cur_cmd = NULL;
851
852 if (cmd) {
853 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
854 report_remote_ra_result(cmd);
855 }
856 }
857
858
859 static int
860 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
861 {
862 const char *server = NULL;
863 lrmd_key_value_t *tmp = NULL;
864 int port = 0;
865 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
866 int rc = pcmk_rc_ok;
867
868 for (tmp = cmd->params; tmp; tmp = tmp->next) {
869 if (pcmk__strcase_any_of(tmp->key,
870 PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
871 NULL)) {
872 server = tmp->value;
873
874 } else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
875 pcmk__str_none)) {
876 port = atoi(tmp->value);
877
878 } else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
879 pcmk__str_none)) {
880 lrm_remote_set_flags(lrm_state, controlling_guest);
881 }
882 }
883
884 rc = controld_connect_remote_executor(lrm_state, server, port,
885 timeout_used);
886 if (rc != pcmk_rc_ok) {
887 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
888 PCMK_EXEC_ERROR,
889 "Could not connect to Pacemaker Remote node %s: %s",
890 lrm_state->node_name, pcmk_rc_str(rc));
891 }
892 return rc;
893 }
894
895 static gboolean
896 handle_remote_ra_exec(gpointer user_data)
897 {
898 int rc = 0;
899 lrm_state_t *lrm_state = user_data;
900 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
901 remote_ra_cmd_t *cmd;
902 GList *first = NULL;
903
904 if (ra_data->cur_cmd) {
905
906 return TRUE;
907 }
908
909 while (ra_data->cmds) {
910 first = ra_data->cmds;
911 cmd = first->data;
912 if (cmd->delay_id) {
913
914 return TRUE;
915 }
916
917 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
918 g_list_free_1(first);
919
920 if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
921 PCMK_ACTION_MIGRATE_FROM, NULL)) {
922 lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
923 if (handle_remote_ra_start(lrm_state, cmd,
924 cmd->timeout) == pcmk_rc_ok) {
925
926 crm_debug("Initiated async remote connection, %s action will complete after connect event",
927 cmd->action);
928 ra_data->cur_cmd = cmd;
929 return TRUE;
930 }
931 report_remote_ra_result(cmd);
932
933 } else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
934
935 if (lrm_state_is_connected(lrm_state) == TRUE) {
936 rc = lrm_state_poke_connection(lrm_state);
937 if (rc < 0) {
938 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
939 PCMK_EXEC_ERROR, pcmk_strerror(rc));
940 }
941 } else {
942 rc = -1;
943 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
944 PCMK_EXEC_DONE, "Remote connection inactive");
945 }
946
947 if (rc == 0) {
948 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
949 cmd->rsc_id);
950 ra_data->cur_cmd = cmd;
951 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
952 return TRUE;
953 }
954 report_remote_ra_result(cmd);
955
956 } else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
957
958 if (pcmk_is_set(ra_data->status, expect_takeover)) {
959
960
961
962
963
964
965 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
966 ra_data->cur_cmd = cmd;
967 return TRUE;
968 }
969
970 handle_remote_ra_stop(lrm_state, cmd);
971
972 } else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
973 lrm_remote_clear_flags(lrm_state, takeover_complete);
974 lrm_remote_set_flags(lrm_state, expect_takeover);
975 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
976 report_remote_ra_result(cmd);
977
978 } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
979 PCMK_ACTION_RELOAD_AGENT, NULL)) {
980
981
982
983
984
985
986
987
988
989 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
990 report_remote_ra_result(cmd);
991 }
992
993 free_cmd(cmd);
994 }
995
996 return TRUE;
997 }
998
999 static void
1000 remote_ra_data_init(lrm_state_t * lrm_state)
1001 {
1002 remote_ra_data_t *ra_data = NULL;
1003
1004 if (lrm_state->remote_ra_data) {
1005 return;
1006 }
1007
1008 ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
1009 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
1010 lrm_state->remote_ra_data = ra_data;
1011 }
1012
1013 void
1014 remote_ra_cleanup(lrm_state_t * lrm_state)
1015 {
1016 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1017
1018 if (!ra_data) {
1019 return;
1020 }
1021
1022 if (ra_data->cmds) {
1023 g_list_free_full(ra_data->cmds, free_cmd);
1024 }
1025
1026 if (ra_data->recurring_cmds) {
1027 g_list_free_full(ra_data->recurring_cmds, free_cmd);
1028 }
1029 mainloop_destroy_trigger(ra_data->work);
1030 free(ra_data);
1031 lrm_state->remote_ra_data = NULL;
1032 }
1033
1034 gboolean
1035 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
1036 {
1037 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
1038 return TRUE;
1039 }
1040 if ((id != NULL) && (lrm_state_find(id) != NULL)
1041 && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
1042 return TRUE;
1043 }
1044
1045 return FALSE;
1046 }
1047
1048 lrmd_rsc_info_t *
1049 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
1050 {
1051 lrmd_rsc_info_t *info = NULL;
1052
1053 if ((lrm_state_find(rsc_id))) {
1054 info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
1055
1056 info->id = pcmk__str_copy(rsc_id);
1057 info->type = pcmk__str_copy(REMOTE_LRMD_RA);
1058 info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
1059 info->provider = pcmk__str_copy("pacemaker");
1060 }
1061
1062 return info;
1063 }
1064
1065 static gboolean
1066 is_remote_ra_supported_action(const char *action)
1067 {
1068 return pcmk__str_any_of(action,
1069 PCMK_ACTION_START,
1070 PCMK_ACTION_STOP,
1071 PCMK_ACTION_MONITOR,
1072 PCMK_ACTION_MIGRATE_TO,
1073 PCMK_ACTION_MIGRATE_FROM,
1074 PCMK_ACTION_RELOAD_AGENT,
1075 PCMK_ACTION_RELOAD,
1076 NULL);
1077 }
1078
1079 static GList *
1080 fail_all_monitor_cmds(GList * list)
1081 {
1082 GList *rm_list = NULL;
1083 remote_ra_cmd_t *cmd = NULL;
1084 GList *gIter = NULL;
1085
1086 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1087 cmd = gIter->data;
1088 if ((cmd->interval_ms > 0)
1089 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1090 pcmk__str_casei)) {
1091 rm_list = g_list_append(rm_list, cmd);
1092 }
1093 }
1094
1095 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
1096 cmd = gIter->data;
1097
1098 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1099 PCMK_EXEC_ERROR, "Lost connection to remote executor");
1100 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
1101 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
1102 report_remote_ra_result(cmd);
1103
1104 list = g_list_remove(list, cmd);
1105 free_cmd(cmd);
1106 }
1107
1108
1109 g_list_free(rm_list);
1110 return list;
1111 }
1112
1113 static GList *
1114 remove_cmd(GList * list, const char *action, guint interval_ms)
1115 {
1116 remote_ra_cmd_t *cmd = NULL;
1117 GList *gIter = NULL;
1118
1119 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1120 cmd = gIter->data;
1121 if ((cmd->interval_ms == interval_ms)
1122 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
1123 break;
1124 }
1125 cmd = NULL;
1126 }
1127 if (cmd) {
1128 list = g_list_remove(list, cmd);
1129 free_cmd(cmd);
1130 }
1131 return list;
1132 }
1133
1134 int
1135 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
1136 const char *action, guint interval_ms)
1137 {
1138 lrm_state_t *connection_rsc = NULL;
1139 remote_ra_data_t *ra_data = NULL;
1140
1141 connection_rsc = lrm_state_find(rsc_id);
1142 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1143 return -EINVAL;
1144 }
1145
1146 ra_data = connection_rsc->remote_ra_data;
1147 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1148 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1149 interval_ms);
1150 if (ra_data->cur_cmd &&
1151 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1152 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1153
1154 cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
1155 }
1156
1157 return 0;
1158 }
1159
1160 static remote_ra_cmd_t *
1161 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1162 const char *userdata)
1163 {
1164 GList *gIter = NULL;
1165 remote_ra_cmd_t *cmd = NULL;
1166
1167
1168
1169
1170
1171
1172
1173 if (interval_ms == 0) {
1174 return NULL;
1175 }
1176
1177 if (ra_data->cur_cmd &&
1178 !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
1179 (ra_data->cur_cmd->interval_ms == interval_ms)
1180 && pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
1181 pcmk__str_casei)) {
1182
1183 cmd = ra_data->cur_cmd;
1184 goto handle_dup;
1185 }
1186
1187 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1188 cmd = gIter->data;
1189 if ((cmd->interval_ms == interval_ms)
1190 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1191 pcmk__str_casei)) {
1192 goto handle_dup;
1193 }
1194 }
1195
1196 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1197 cmd = gIter->data;
1198 if ((cmd->interval_ms == interval_ms)
1199 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1200 pcmk__str_casei)) {
1201 goto handle_dup;
1202 }
1203 }
1204
1205 return NULL;
1206
1207 handle_dup:
1208
1209 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1210 cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
1211
1212
1213 if (userdata) {
1214 free(cmd->userdata);
1215 cmd->userdata = pcmk__str_copy(userdata);
1216 }
1217
1218
1219 if (pcmk_is_set(cmd->status, cmd_reported_success)) {
1220 cmd->start_time = time(NULL);
1221 cmd->call_id = generate_callid();
1222 cmd_clear_flags(cmd, cmd_reported_success);
1223 }
1224
1225
1226
1227
1228 if (cmd->interval_id) {
1229 g_source_remove(cmd->interval_id);
1230 cmd->interval_id = 0;
1231 recurring_helper(cmd);
1232 }
1233
1234 return cmd;
1235 }
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255 int
1256 controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
1257 const char *action, const char *userdata,
1258 guint interval_ms, int timeout_ms,
1259 int start_delay_ms, lrmd_key_value_t *params,
1260 int *call_id)
1261 {
1262 lrm_state_t *connection_rsc = NULL;
1263 remote_ra_cmd_t *cmd = NULL;
1264 remote_ra_data_t *ra_data = NULL;
1265
1266 *call_id = 0;
1267
1268 CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1269 && (userdata != NULL) && (call_id != NULL),
1270 lrmd_key_value_freeall(params); return EINVAL);
1271
1272 if (!is_remote_ra_supported_action(action)) {
1273 lrmd_key_value_freeall(params);
1274 return EOPNOTSUPP;
1275 }
1276
1277 connection_rsc = lrm_state_find(rsc_id);
1278 if (connection_rsc == NULL) {
1279 lrmd_key_value_freeall(params);
1280 return ENOTCONN;
1281 }
1282
1283 remote_ra_data_init(connection_rsc);
1284 ra_data = connection_rsc->remote_ra_data;
1285
1286 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1287 if (cmd) {
1288 *call_id = cmd->call_id;
1289 lrmd_key_value_freeall(params);
1290 return pcmk_rc_ok;
1291 }
1292
1293 cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));
1294
1295 cmd->owner = pcmk__str_copy(lrm_state->node_name);
1296 cmd->rsc_id = pcmk__str_copy(rsc_id);
1297 cmd->action = pcmk__str_copy(action);
1298 cmd->userdata = pcmk__str_copy(userdata);
1299 cmd->interval_ms = interval_ms;
1300 cmd->timeout = timeout_ms;
1301 cmd->start_delay = start_delay_ms;
1302 cmd->params = params;
1303 cmd->start_time = time(NULL);
1304
1305 cmd->call_id = generate_callid();
1306
1307 if (cmd->start_delay) {
1308 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1309 }
1310
1311 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1312 mainloop_set_trigger(ra_data->work);
1313
1314 *call_id = cmd->call_id;
1315 return pcmk_rc_ok;
1316 }
1317
1318
1319
1320
1321
1322
1323
1324 void
1325 remote_ra_fail(const char *node_name)
1326 {
1327 lrm_state_t *lrm_state = lrm_state_find(node_name);
1328
1329 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1330 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1331
1332 crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1333 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1334 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1335 }
1336 }
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349 #define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
1350 "[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE
1351
1352
1353
1354
1355
1356
1357
1358 void
1359 remote_ra_process_pseudo(xmlNode *xml)
1360 {
1361 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1362
1363 if (numXpathResults(search) == 1) {
1364 xmlNode *result = getXpathResult(search, 0);
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380 if (result) {
1381 const char *remote = pcmk__xe_id(result);
1382
1383 if (remote) {
1384 remote_node_down(remote, DOWN_ERASE_LRM);
1385 }
1386 }
1387 }
1388 freeXpathObject(search);
1389 }
1390
1391 static void
1392 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1393 {
1394 xmlNode *update, *state;
1395 int call_opt;
1396 crm_node_t *node;
1397
1398 call_opt = crmd_cib_smart_opt();
1399 node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
1400 CRM_CHECK(node != NULL, return);
1401 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
1402 state = create_node_state_update(node, node_update_none, update,
1403 __func__);
1404 crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
1405 if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
1406 NULL) == pcmk_rc_ok) {
1407
1408 if (maintenance) {
1409 lrm_remote_set_flags(lrm_state, remote_in_maint);
1410 } else {
1411 lrm_remote_clear_flags(lrm_state, remote_in_maint);
1412 }
1413 }
1414 free_xml(update);
1415 }
1416
1417 #define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
1418 "[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
1419 PCMK__XE_MAINTENANCE
1420
1421
1422
1423
1424
1425
1426
1427 void
1428 remote_ra_process_maintenance_nodes(xmlNode *xml)
1429 {
1430 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1431
1432 if (numXpathResults(search) == 1) {
1433 xmlNode *node;
1434 int cnt = 0, cnt_remote = 0;
1435
1436 for (node = pcmk__xe_first_child(getXpathResult(search, 0),
1437 PCMK_XE_NODE, NULL, NULL);
1438 node != NULL; node = pcmk__xe_next_same(node)) {
1439
1440 lrm_state_t *lrm_state = lrm_state_find(pcmk__xe_id(node));
1441
1442 cnt++;
1443 if (lrm_state && lrm_state->remote_ra_data &&
1444 pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
1445
1446 const char *in_maint_s = NULL;
1447 int in_maint;
1448
1449 cnt_remote++;
1450 in_maint_s = crm_element_value(node,
1451 PCMK__XA_NODE_IN_MAINTENANCE);
1452 pcmk__scan_min_int(in_maint_s, &in_maint, 0);
1453 remote_ra_maintenance(lrm_state, in_maint);
1454 }
1455 }
1456 crm_trace("Action holds %d nodes (%d remotes found) adjusting "
1457 PCMK_OPT_MAINTENANCE_MODE,
1458 cnt, cnt_remote);
1459 }
1460 freeXpathObject(search);
1461 }
1462
1463 gboolean
1464 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1465 {
1466 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1467 return pcmk_is_set(ra_data->status, remote_in_maint);
1468 }
1469
1470 gboolean
1471 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1472 {
1473 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1474 return pcmk_is_set(ra_data->status, controlling_guest);
1475 }