This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- should_purge_attributes
- section_to_delete
- purge_remote_node_attrs
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- controld_execute_remote_agent
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/msg_xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/lrmd_internal.h>
17 #include <crm/services.h>
18
19 #include <pacemaker-controld.h>
20
21 #define REMOTE_LRMD_RA "remote"
22
23
24 #define MAX_START_TIMEOUT_MS 10000
25
26 #define cmd_set_flags(cmd, flags_to_set) do { \
27 (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
28 "Remote command", (cmd)->rsc_id, (cmd)->status, \
29 (flags_to_set), #flags_to_set); \
30 } while (0)
31
32 #define cmd_clear_flags(cmd, flags_to_clear) do { \
33 (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
34 "Remote command", (cmd)->rsc_id, (cmd)->status, \
35 (flags_to_clear), #flags_to_clear); \
36 } while (0)
37
38 enum remote_cmd_status {
39 cmd_reported_success = (1 << 0),
40 cmd_cancel = (1 << 1),
41 };
42
43 typedef struct remote_ra_cmd_s {
44
45 char *owner;
46
47 char *rsc_id;
48
49 char *action;
50
51 char *userdata;
52
53 int start_delay;
54
55 int delay_id;
56
57 int timeout;
58 int remaining_timeout;
59
60 guint interval_ms;
61
62 int interval_id;
63 int monitor_timeout_id;
64 int takeover_timeout_id;
65
66 lrmd_key_value_t *params;
67 pcmk__action_result_t result;
68 int call_id;
69 time_t start_time;
70 uint32_t status;
71 } remote_ra_cmd_t;
72
73 #define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
74 lrm_state_t *lrm = (lrm_state); \
75 remote_ra_data_t *ra = lrm->remote_ra_data; \
76 ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
77 lrm->node_name, ra->status, \
78 (flags_to_set), #flags_to_set); \
79 } while (0)
80
81 #define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
82 lrm_state_t *lrm = (lrm_state); \
83 remote_ra_data_t *ra = lrm->remote_ra_data; \
84 ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
85 lrm->node_name, ra->status, \
86 (flags_to_clear), #flags_to_clear); \
87 } while (0)
88
89 enum remote_status {
90 expect_takeover = (1 << 0),
91 takeover_complete = (1 << 1),
92 remote_active = (1 << 2),
93
94
95
96 remote_in_maint = (1 << 3),
97
98
99
100
101
102
103 controlling_guest = (1 << 4),
104 };
105
106 typedef struct remote_ra_data_s {
107 crm_trigger_t *work;
108 remote_ra_cmd_t *cur_cmd;
109 GList *cmds;
110 GList *recurring_cmds;
111 uint32_t status;
112 } remote_ra_data_t;
113
114 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
115 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
116 static GList *fail_all_monitor_cmds(GList * list);
117
118 static void
119 free_cmd(gpointer user_data)
120 {
121 remote_ra_cmd_t *cmd = user_data;
122
123 if (!cmd) {
124 return;
125 }
126 if (cmd->delay_id) {
127 g_source_remove(cmd->delay_id);
128 }
129 if (cmd->interval_id) {
130 g_source_remove(cmd->interval_id);
131 }
132 if (cmd->monitor_timeout_id) {
133 g_source_remove(cmd->monitor_timeout_id);
134 }
135 if (cmd->takeover_timeout_id) {
136 g_source_remove(cmd->takeover_timeout_id);
137 }
138 free(cmd->owner);
139 free(cmd->rsc_id);
140 free(cmd->action);
141 free(cmd->userdata);
142 pcmk__reset_result(&(cmd->result));
143 lrmd_key_value_freeall(cmd->params);
144 free(cmd);
145 }
146
147 static int
148 generate_callid(void)
149 {
150 static int remote_ra_callid = 0;
151
152 remote_ra_callid++;
153 if (remote_ra_callid <= 0) {
154 remote_ra_callid = 1;
155 }
156
157 return remote_ra_callid;
158 }
159
160 static gboolean
161 recurring_helper(gpointer data)
162 {
163 remote_ra_cmd_t *cmd = data;
164 lrm_state_t *connection_rsc = NULL;
165
166 cmd->interval_id = 0;
167 connection_rsc = lrm_state_find(cmd->rsc_id);
168 if (connection_rsc && connection_rsc->remote_ra_data) {
169 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
170
171 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
172
173 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
174 mainloop_set_trigger(ra_data->work);
175 }
176 return FALSE;
177 }
178
179 static gboolean
180 start_delay_helper(gpointer data)
181 {
182 remote_ra_cmd_t *cmd = data;
183 lrm_state_t *connection_rsc = NULL;
184
185 cmd->delay_id = 0;
186 connection_rsc = lrm_state_find(cmd->rsc_id);
187 if (connection_rsc && connection_rsc->remote_ra_data) {
188 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
189
190 mainloop_set_trigger(ra_data->work);
191 }
192 return FALSE;
193 }
194
195 static bool
196 should_purge_attributes(crm_node_t *node)
197 {
198 bool purge = true;
199 crm_node_t *conn_node = NULL;
200 lrm_state_t *connection_rsc = NULL;
201
202 if (!node->conn_host) {
203 return purge;
204 }
205
206
207
208
209 conn_node = crm_get_peer(0, node->conn_host);
210 if (conn_node == NULL) {
211 return purge;
212 }
213
214
215
216
217
218 connection_rsc = lrm_state_find(node->uname);
219
220 if (connection_rsc != NULL) {
221 lrmd_t *lrm = connection_rsc->conn;
222 time_t uptime = lrmd__uptime(lrm);
223 time_t now = time(NULL);
224
225
226
227
228
229 if (uptime > 0 &&
230 conn_node->peer_lost > 0 &&
231 uptime + 20 >= now - conn_node->peer_lost) {
232 purge = false;
233 }
234 }
235
236 return purge;
237 }
238
239 static enum controld_section_e
240 section_to_delete(bool purge)
241 {
242 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
243 if (purge) {
244 return controld_section_all_unlocked;
245 } else {
246 return controld_section_lrm_unlocked;
247 }
248 } else {
249 if (purge) {
250 return controld_section_all;
251 } else {
252 return controld_section_lrm;
253 }
254 }
255 }
256
257 static void
258 purge_remote_node_attrs(int call_opt, crm_node_t *node)
259 {
260 bool purge = should_purge_attributes(node);
261 enum controld_section_e section = section_to_delete(purge);
262
263
264 if (purge) {
265 update_attrd_remote_node_removed(node->uname, NULL);
266 }
267
268 controld_delete_node_state(node->uname, section, call_opt);
269 }
270
271
272
273
274
275
276
277 static void
278 remote_node_up(const char *node_name)
279 {
280 int call_opt;
281 xmlNode *update, *state;
282 crm_node_t *node;
283
284 CRM_CHECK(node_name != NULL, return);
285 crm_info("Announcing Pacemaker Remote node %s", node_name);
286
287 call_opt = crmd_cib_smart_opt();
288
289
290
291
292
293
294
295 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
296
297
298 node = crm_remote_peer_get(node_name);
299 CRM_CHECK(node != NULL, return);
300
301 purge_remote_node_attrs(call_opt, node);
302 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
303
304
305
306
307
308
309
310 broadcast_remote_state_message(node_name, true);
311
312 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
313 state = create_node_state_update(node, node_update_cluster, update,
314 __func__);
315
316
317
318
319
320 crm_xml_add(state, XML_NODE_IS_FENCED, "0");
321
322
323
324
325
326
327
328
329 controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
330 free_xml(update);
331 }
332
333 enum down_opts {
334 DOWN_KEEP_LRM,
335 DOWN_ERASE_LRM
336 };
337
338
339
340
341
342
343
344
345 static void
346 remote_node_down(const char *node_name, const enum down_opts opts)
347 {
348 xmlNode *update;
349 int call_opt = crmd_cib_smart_opt();
350 crm_node_t *node;
351
352
353 update_attrd_remote_node_removed(node_name, NULL);
354
355
356
357
358
359
360 if (opts == DOWN_ERASE_LRM) {
361 controld_delete_node_state(node_name, controld_section_all, call_opt);
362 } else {
363 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
364 }
365
366
367 node = crm_remote_peer_get(node_name);
368 CRM_CHECK(node != NULL, return);
369 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
370
371
372 broadcast_remote_state_message(node_name, false);
373
374
375 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
376 create_node_state_update(node, node_update_cluster, update, __func__);
377 controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
378 free_xml(update);
379 }
380
381
382
383
384
385
386
387 static void
388 check_remote_node_state(const remote_ra_cmd_t *cmd)
389 {
390
391 if (!pcmk__result_ok(&(cmd->result))) {
392 return;
393 }
394
395 if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
396 remote_node_up(cmd->rsc_id);
397
398 } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
399
400
401
402
403
404
405
406 crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
407
408 CRM_CHECK(node != NULL, return);
409 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
410
411 } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
412 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
413 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
414
415 if (ra_data) {
416 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
417
418 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
419 } else if (AM_I_DC == FALSE) {
420
421
422
423
424 crm_remote_peer_cache_remove(cmd->rsc_id);
425 }
426 }
427 }
428
429
430
431
432
433
434
435
436
437
438 }
439
440 static void
441 report_remote_ra_result(remote_ra_cmd_t * cmd)
442 {
443 lrmd_event_data_t op = { 0, };
444
445 check_remote_node_state(cmd);
446
447 op.type = lrmd_event_exec_complete;
448 op.rsc_id = cmd->rsc_id;
449 op.op_type = cmd->action;
450 op.user_data = cmd->userdata;
451 op.timeout = cmd->timeout;
452 op.interval_ms = cmd->interval_ms;
453 op.t_run = (unsigned int) cmd->start_time;
454 op.t_rcchange = (unsigned int) cmd->start_time;
455
456 lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
457 cmd->result.exit_reason);
458
459 if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
460 op.t_rcchange = (unsigned int) time(NULL);
461
462
463
464
465
466
467
468
469
470 if (op.t_rcchange == op.t_run) {
471 op.t_rcchange++;
472 }
473 }
474
475 if (cmd->params) {
476 lrmd_key_value_t *tmp;
477
478 op.params = pcmk__strkey_table(free, free);
479 for (tmp = cmd->params; tmp; tmp = tmp->next) {
480 g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
481 }
482
483 }
484 op.call_id = cmd->call_id;
485 op.remote_nodename = cmd->owner;
486
487 lrm_op_callback(&op);
488
489 if (op.params) {
490 g_hash_table_destroy(op.params);
491 }
492 lrmd__reset_result(&op);
493 }
494
495 static void
496 update_remaining_timeout(remote_ra_cmd_t * cmd)
497 {
498 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
499 }
500
501 static gboolean
502 retry_start_cmd_cb(gpointer data)
503 {
504 lrm_state_t *lrm_state = data;
505 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
506 remote_ra_cmd_t *cmd = NULL;
507 int rc = ETIME;
508
509 if (!ra_data || !ra_data->cur_cmd) {
510 return FALSE;
511 }
512 cmd = ra_data->cur_cmd;
513 if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
514 return FALSE;
515 }
516 update_remaining_timeout(cmd);
517
518 if (cmd->remaining_timeout > 0) {
519 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
520 } else {
521 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
522 PCMK_EXEC_TIMEOUT,
523 "Not enough time remains to retry remote connection");
524 }
525
526 if (rc != pcmk_rc_ok) {
527 report_remote_ra_result(cmd);
528
529 if (ra_data->cmds) {
530 mainloop_set_trigger(ra_data->work);
531 }
532 ra_data->cur_cmd = NULL;
533 free_cmd(cmd);
534 } else {
535
536 }
537
538 return FALSE;
539 }
540
541
542 static gboolean
543 connection_takeover_timeout_cb(gpointer data)
544 {
545 lrm_state_t *lrm_state = NULL;
546 remote_ra_cmd_t *cmd = data;
547
548 crm_info("takeover event timed out for node %s", cmd->rsc_id);
549 cmd->takeover_timeout_id = 0;
550
551 lrm_state = lrm_state_find(cmd->rsc_id);
552
553 handle_remote_ra_stop(lrm_state, cmd);
554 free_cmd(cmd);
555
556 return FALSE;
557 }
558
559 static gboolean
560 monitor_timeout_cb(gpointer data)
561 {
562 lrm_state_t *lrm_state = NULL;
563 remote_ra_cmd_t *cmd = data;
564
565 lrm_state = lrm_state_find(cmd->rsc_id);
566
567 crm_info("Timed out waiting for remote poke response from %s%s",
568 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
569 cmd->monitor_timeout_id = 0;
570 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
571 "Remote executor did not respond");
572
573 if (lrm_state && lrm_state->remote_ra_data) {
574 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
575
576 if (ra_data->cur_cmd == cmd) {
577 ra_data->cur_cmd = NULL;
578 }
579 if (ra_data->cmds) {
580 mainloop_set_trigger(ra_data->work);
581 }
582 }
583
584 report_remote_ra_result(cmd);
585 free_cmd(cmd);
586
587 if(lrm_state) {
588 lrm_state_disconnect(lrm_state);
589 }
590 return FALSE;
591 }
592
593 static void
594 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
595 {
596 lrmd_event_data_t op = { 0, };
597
598 if (lrm_state == NULL) {
599
600 lrm_state = lrm_state_find(controld_globals.our_nodename);
601 }
602 CRM_ASSERT(lrm_state != NULL);
603
604 op.type = lrmd_event_exec_complete;
605 op.rsc_id = rsc_id;
606 op.op_type = op_type;
607 op.t_run = (unsigned int) time(NULL);
608 op.t_rcchange = op.t_run;
609 op.call_id = generate_callid();
610 lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
611 process_lrm_event(lrm_state, &op, NULL, NULL);
612 }
613
614 void
615 remote_lrm_op_callback(lrmd_event_data_t * op)
616 {
617 gboolean cmd_handled = FALSE;
618 lrm_state_t *lrm_state = NULL;
619 remote_ra_data_t *ra_data = NULL;
620 remote_ra_cmd_t *cmd = NULL;
621
622 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
623 "(%d) status=%s (%d)",
624 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
625 lrmd_event_type2str(op->type), op->remote_nodename,
626 services_ocf_exitcode_str(op->rc), op->rc,
627 pcmk_exec_status_str(op->op_status), op->op_status);
628
629 lrm_state = lrm_state_find(op->remote_nodename);
630 if (!lrm_state || !lrm_state->remote_ra_data) {
631 crm_debug("No state information found for remote connection event");
632 return;
633 }
634 ra_data = lrm_state->remote_ra_data;
635
636 if (op->type == lrmd_event_new_client) {
637
638
639 if (pcmk_is_set(ra_data->status, expect_takeover)) {
640
641 lrm_remote_clear_flags(lrm_state, expect_takeover);
642 lrm_remote_set_flags(lrm_state, takeover_complete);
643
644 } else {
645 crm_err("Disconnecting from Pacemaker Remote node %s due to "
646 "unexpected client takeover", op->remote_nodename);
647
648
649
650 lrm_state_disconnect_only(lrm_state);
651 }
652 return;
653 }
654
655
656 if (op->type == lrmd_event_exec_complete) {
657 if (pcmk_is_set(ra_data->status, takeover_complete)) {
658 crm_debug("ignoring event, this connection is taken over by another node");
659 } else {
660 lrm_op_callback(op);
661 }
662 return;
663 }
664
665 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
666
667 if (!pcmk_is_set(ra_data->status, remote_active)) {
668 crm_debug("Disconnection from Pacemaker Remote node %s complete",
669 lrm_state->node_name);
670
671 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
672 crm_err("Lost connection to Pacemaker Remote node %s",
673 lrm_state->node_name);
674 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
675 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
676
677 } else {
678 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
679 lrm_state->node_name);
680
681 handle_remote_ra_stop(lrm_state, NULL);
682 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
683
684 synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
685 }
686 return;
687 }
688
689 if (!ra_data->cur_cmd) {
690 crm_debug("no event to match");
691 return;
692 }
693
694 cmd = ra_data->cur_cmd;
695
696
697
698 if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
699 "migrate_from", NULL)) {
700 if (op->connection_rc < 0) {
701 update_remaining_timeout(cmd);
702
703 if ((op->connection_rc == -ENOKEY)
704 || (op->connection_rc == -EKEYREJECTED)) {
705
706 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
707 PCMK_EXEC_ERROR,
708 pcmk_strerror(op->connection_rc));
709
710 } else if (cmd->remaining_timeout > 3000) {
711 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
712 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
713 return;
714
715 } else {
716 crm_trace("can't reschedule start, remaining timeout too small %d",
717 cmd->remaining_timeout);
718 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
719 PCMK_EXEC_TIMEOUT,
720 "%s without enough time to retry",
721 pcmk_strerror(op->connection_rc));
722 }
723
724 } else {
725 lrm_state_reset_tables(lrm_state, TRUE);
726 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
727 lrm_remote_set_flags(lrm_state, remote_active);
728 }
729
730 crm_debug("Remote connection event matched %s action", cmd->action);
731 report_remote_ra_result(cmd);
732 cmd_handled = TRUE;
733
734 } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
735
736 if (cmd->monitor_timeout_id) {
737 g_source_remove(cmd->monitor_timeout_id);
738 cmd->monitor_timeout_id = 0;
739 }
740
741
742
743
744 if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
745 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
746 report_remote_ra_result(cmd);
747 cmd_set_flags(cmd, cmd_reported_success);
748 }
749
750 crm_debug("Remote poke event matched %s action", cmd->action);
751
752
753 if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
754 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
755 cmd->interval_id = g_timeout_add(cmd->interval_ms,
756 recurring_helper, cmd);
757 cmd = NULL;
758 }
759 cmd_handled = TRUE;
760
761 } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
762 if (pcmk_is_set(ra_data->status, remote_active) &&
763 !pcmk_is_set(cmd->status, cmd_cancel)) {
764 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
765 PCMK_EXEC_ERROR,
766 "Remote connection unexpectedly dropped "
767 "during monitor");
768 report_remote_ra_result(cmd);
769 crm_err("Remote connection to %s unexpectedly dropped during monitor",
770 lrm_state->node_name);
771 }
772 cmd_handled = TRUE;
773
774 } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
775
776 handle_remote_ra_stop(lrm_state, cmd);
777 cmd_handled = TRUE;
778
779 } else {
780 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
781 }
782
783 if (cmd_handled) {
784 ra_data->cur_cmd = NULL;
785 if (ra_data->cmds) {
786 mainloop_set_trigger(ra_data->work);
787 }
788 free_cmd(cmd);
789 }
790 }
791
792 static void
793 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
794 {
795 remote_ra_data_t *ra_data = NULL;
796
797 CRM_ASSERT(lrm_state);
798 ra_data = lrm_state->remote_ra_data;
799
800 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
801
802 g_hash_table_remove_all(lrm_state->active_ops);
803 } else {
804
805
806 lrm_state_reset_tables(lrm_state, FALSE);
807 }
808
809 lrm_remote_clear_flags(lrm_state, remote_active);
810 lrm_state_disconnect(lrm_state);
811
812 if (ra_data->cmds) {
813 g_list_free_full(ra_data->cmds, free_cmd);
814 }
815 if (ra_data->recurring_cmds) {
816 g_list_free_full(ra_data->recurring_cmds, free_cmd);
817 }
818 ra_data->cmds = NULL;
819 ra_data->recurring_cmds = NULL;
820 ra_data->cur_cmd = NULL;
821
822 if (cmd) {
823 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
824 report_remote_ra_result(cmd);
825 }
826 }
827
828
829 static int
830 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
831 {
832 const char *server = NULL;
833 lrmd_key_value_t *tmp = NULL;
834 int port = 0;
835 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
836 int rc = pcmk_rc_ok;
837
838 for (tmp = cmd->params; tmp; tmp = tmp->next) {
839 if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
840 XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
841 server = tmp->value;
842 } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
843 port = atoi(tmp->value);
844 } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
845 lrm_remote_set_flags(lrm_state, controlling_guest);
846 }
847 }
848
849 rc = controld_connect_remote_executor(lrm_state, server, port,
850 timeout_used);
851 if (rc != pcmk_rc_ok) {
852 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
853 PCMK_EXEC_ERROR,
854 "Could not connect to Pacemaker Remote node %s: %s",
855 lrm_state->node_name, pcmk_rc_str(rc));
856 }
857 return rc;
858 }
859
860 static gboolean
861 handle_remote_ra_exec(gpointer user_data)
862 {
863 int rc = 0;
864 lrm_state_t *lrm_state = user_data;
865 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
866 remote_ra_cmd_t *cmd;
867 GList *first = NULL;
868
869 if (ra_data->cur_cmd) {
870
871 return TRUE;
872 }
873
874 while (ra_data->cmds) {
875 first = ra_data->cmds;
876 cmd = first->data;
877 if (cmd->delay_id) {
878
879 return TRUE;
880 }
881
882 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
883 g_list_free_1(first);
884
885 if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
886 lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
887 if (handle_remote_ra_start(lrm_state, cmd,
888 cmd->timeout) == pcmk_rc_ok) {
889
890 crm_debug("Initiated async remote connection, %s action will complete after connect event",
891 cmd->action);
892 ra_data->cur_cmd = cmd;
893 return TRUE;
894 }
895 report_remote_ra_result(cmd);
896
897 } else if (!strcmp(cmd->action, "monitor")) {
898
899 if (lrm_state_is_connected(lrm_state) == TRUE) {
900 rc = lrm_state_poke_connection(lrm_state);
901 if (rc < 0) {
902 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
903 PCMK_EXEC_ERROR, pcmk_strerror(rc));
904 }
905 } else {
906 rc = -1;
907 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
908 PCMK_EXEC_DONE, "Remote connection inactive");
909 }
910
911 if (rc == 0) {
912 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
913 cmd->rsc_id);
914 ra_data->cur_cmd = cmd;
915 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
916 return TRUE;
917 }
918 report_remote_ra_result(cmd);
919
920 } else if (!strcmp(cmd->action, "stop")) {
921
922 if (pcmk_is_set(ra_data->status, expect_takeover)) {
923
924
925
926
927
928
929 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
930 ra_data->cur_cmd = cmd;
931 return TRUE;
932 }
933
934 handle_remote_ra_stop(lrm_state, cmd);
935
936 } else if (!strcmp(cmd->action, "migrate_to")) {
937 lrm_remote_clear_flags(lrm_state, takeover_complete);
938 lrm_remote_set_flags(lrm_state, expect_takeover);
939 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
940 report_remote_ra_result(cmd);
941 } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
942 CRMD_ACTION_RELOAD_AGENT, NULL)) {
943
944
945
946
947
948
949
950
951
952 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
953 report_remote_ra_result(cmd);
954 }
955
956 free_cmd(cmd);
957 }
958
959 return TRUE;
960 }
961
962 static void
963 remote_ra_data_init(lrm_state_t * lrm_state)
964 {
965 remote_ra_data_t *ra_data = NULL;
966
967 if (lrm_state->remote_ra_data) {
968 return;
969 }
970
971 ra_data = calloc(1, sizeof(remote_ra_data_t));
972 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
973 lrm_state->remote_ra_data = ra_data;
974 }
975
976 void
977 remote_ra_cleanup(lrm_state_t * lrm_state)
978 {
979 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
980
981 if (!ra_data) {
982 return;
983 }
984
985 if (ra_data->cmds) {
986 g_list_free_full(ra_data->cmds, free_cmd);
987 }
988
989 if (ra_data->recurring_cmds) {
990 g_list_free_full(ra_data->recurring_cmds, free_cmd);
991 }
992 mainloop_destroy_trigger(ra_data->work);
993 free(ra_data);
994 lrm_state->remote_ra_data = NULL;
995 }
996
997 gboolean
998 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
999 {
1000 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
1001 return TRUE;
1002 }
1003 if ((id != NULL) && (lrm_state_find(id) != NULL)
1004 && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
1005 return TRUE;
1006 }
1007
1008 return FALSE;
1009 }
1010
1011 lrmd_rsc_info_t *
1012 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
1013 {
1014 lrmd_rsc_info_t *info = NULL;
1015
1016 if ((lrm_state_find(rsc_id))) {
1017 info = calloc(1, sizeof(lrmd_rsc_info_t));
1018
1019 info->id = strdup(rsc_id);
1020 info->type = strdup(REMOTE_LRMD_RA);
1021 info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
1022 info->provider = strdup("pacemaker");
1023 }
1024
1025 return info;
1026 }
1027
1028 static gboolean
1029 is_remote_ra_supported_action(const char *action)
1030 {
1031 return pcmk__str_any_of(action,
1032 CRMD_ACTION_START,
1033 CRMD_ACTION_STOP,
1034 CRMD_ACTION_STATUS,
1035 CRMD_ACTION_MIGRATE,
1036 CRMD_ACTION_MIGRATED,
1037 CRMD_ACTION_RELOAD_AGENT,
1038 CRMD_ACTION_RELOAD,
1039 NULL);
1040 }
1041
1042 static GList *
1043 fail_all_monitor_cmds(GList * list)
1044 {
1045 GList *rm_list = NULL;
1046 remote_ra_cmd_t *cmd = NULL;
1047 GList *gIter = NULL;
1048
1049 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1050 cmd = gIter->data;
1051 if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1052 rm_list = g_list_append(rm_list, cmd);
1053 }
1054 }
1055
1056 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
1057 cmd = gIter->data;
1058
1059 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1060 PCMK_EXEC_ERROR, "Lost connection to remote executor");
1061 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
1062 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
1063 report_remote_ra_result(cmd);
1064
1065 list = g_list_remove(list, cmd);
1066 free_cmd(cmd);
1067 }
1068
1069
1070 g_list_free(rm_list);
1071 return list;
1072 }
1073
1074 static GList *
1075 remove_cmd(GList * list, const char *action, guint interval_ms)
1076 {
1077 remote_ra_cmd_t *cmd = NULL;
1078 GList *gIter = NULL;
1079
1080 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1081 cmd = gIter->data;
1082 if ((cmd->interval_ms == interval_ms)
1083 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
1084 break;
1085 }
1086 cmd = NULL;
1087 }
1088 if (cmd) {
1089 list = g_list_remove(list, cmd);
1090 free_cmd(cmd);
1091 }
1092 return list;
1093 }
1094
1095 int
1096 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
1097 const char *action, guint interval_ms)
1098 {
1099 lrm_state_t *connection_rsc = NULL;
1100 remote_ra_data_t *ra_data = NULL;
1101
1102 connection_rsc = lrm_state_find(rsc_id);
1103 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1104 return -EINVAL;
1105 }
1106
1107 ra_data = connection_rsc->remote_ra_data;
1108 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1109 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1110 interval_ms);
1111 if (ra_data->cur_cmd &&
1112 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1113 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1114
1115 cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
1116 }
1117
1118 return 0;
1119 }
1120
1121 static remote_ra_cmd_t *
1122 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1123 const char *userdata)
1124 {
1125 GList *gIter = NULL;
1126 remote_ra_cmd_t *cmd = NULL;
1127
1128
1129
1130
1131
1132
1133
1134 if (interval_ms == 0) {
1135 return NULL;
1136 }
1137
1138 if (ra_data->cur_cmd &&
1139 !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
1140 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1141 pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1142
1143 cmd = ra_data->cur_cmd;
1144 goto handle_dup;
1145 }
1146
1147 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1148 cmd = gIter->data;
1149 if ((cmd->interval_ms == interval_ms)
1150 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1151 goto handle_dup;
1152 }
1153 }
1154
1155 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1156 cmd = gIter->data;
1157 if ((cmd->interval_ms == interval_ms)
1158 && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1159 goto handle_dup;
1160 }
1161 }
1162
1163 return NULL;
1164
1165 handle_dup:
1166
1167 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1168 cmd->rsc_id, "monitor", interval_ms);
1169
1170
1171 if (userdata) {
1172 free(cmd->userdata);
1173 cmd->userdata = strdup(userdata);
1174 }
1175
1176
1177 if (pcmk_is_set(cmd->status, cmd_reported_success)) {
1178 cmd->start_time = time(NULL);
1179 cmd->call_id = generate_callid();
1180 cmd_clear_flags(cmd, cmd_reported_success);
1181 }
1182
1183
1184
1185
1186 if (cmd->interval_id) {
1187 g_source_remove(cmd->interval_id);
1188 cmd->interval_id = 0;
1189 recurring_helper(cmd);
1190 }
1191
1192 return cmd;
1193 }
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213 int
1214 controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
1215 const char *action, const char *userdata,
1216 guint interval_ms, int timeout_ms,
1217 int start_delay_ms, lrmd_key_value_t *params,
1218 int *call_id)
1219 {
1220 lrm_state_t *connection_rsc = NULL;
1221 remote_ra_cmd_t *cmd = NULL;
1222 remote_ra_data_t *ra_data = NULL;
1223
1224 *call_id = 0;
1225
1226 CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1227 && (userdata != NULL) && (call_id != NULL),
1228 lrmd_key_value_freeall(params); return EINVAL);
1229
1230 if (!is_remote_ra_supported_action(action)) {
1231 lrmd_key_value_freeall(params);
1232 return EOPNOTSUPP;
1233 }
1234
1235 connection_rsc = lrm_state_find(rsc_id);
1236 if (connection_rsc == NULL) {
1237 lrmd_key_value_freeall(params);
1238 return ENOTCONN;
1239 }
1240
1241 remote_ra_data_init(connection_rsc);
1242 ra_data = connection_rsc->remote_ra_data;
1243
1244 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1245 if (cmd) {
1246 *call_id = cmd->call_id;
1247 lrmd_key_value_freeall(params);
1248 return pcmk_rc_ok;
1249 }
1250
1251 cmd = calloc(1, sizeof(remote_ra_cmd_t));
1252 if (cmd == NULL) {
1253 lrmd_key_value_freeall(params);
1254 return ENOMEM;
1255 }
1256
1257 cmd->owner = strdup(lrm_state->node_name);
1258 cmd->rsc_id = strdup(rsc_id);
1259 cmd->action = strdup(action);
1260 cmd->userdata = strdup(userdata);
1261 if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
1262 || (cmd->userdata == NULL)) {
1263 free_cmd(cmd);
1264 lrmd_key_value_freeall(params);
1265 return ENOMEM;
1266 }
1267
1268 cmd->interval_ms = interval_ms;
1269 cmd->timeout = timeout_ms;
1270 cmd->start_delay = start_delay_ms;
1271 cmd->params = params;
1272 cmd->start_time = time(NULL);
1273
1274 cmd->call_id = generate_callid();
1275
1276 if (cmd->start_delay) {
1277 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1278 }
1279
1280 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1281 mainloop_set_trigger(ra_data->work);
1282
1283 *call_id = cmd->call_id;
1284 return pcmk_rc_ok;
1285 }
1286
1287
1288
1289
1290
1291
1292
1293 void
1294 remote_ra_fail(const char *node_name)
1295 {
1296 lrm_state_t *lrm_state = lrm_state_find(node_name);
1297
1298 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1299 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1300
1301 crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1302 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1303 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1304 }
1305 }
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 #define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
1319 "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1320 "/" XML_CIB_TAG_NODE
1321
1322
1323
1324
1325
1326
1327
1328 void
1329 remote_ra_process_pseudo(xmlNode *xml)
1330 {
1331 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1332
1333 if (numXpathResults(search) == 1) {
1334 xmlNode *result = getXpathResult(search, 0);
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350 if (result) {
1351 const char *remote = ID(result);
1352
1353 if (remote) {
1354 remote_node_down(remote, DOWN_ERASE_LRM);
1355 }
1356 }
1357 }
1358 freeXpathObject(search);
1359 }
1360
1361 static void
1362 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1363 {
1364 xmlNode *update, *state;
1365 int call_opt;
1366 crm_node_t *node;
1367
1368 call_opt = crmd_cib_smart_opt();
1369 node = crm_remote_peer_get(lrm_state->node_name);
1370 CRM_CHECK(node != NULL, return);
1371 update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1372 state = create_node_state_update(node, node_update_none, update,
1373 __func__);
1374 crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1375 if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt,
1376 NULL) == pcmk_rc_ok) {
1377
1378 if (maintenance) {
1379 lrm_remote_set_flags(lrm_state, remote_in_maint);
1380 } else {
1381 lrm_remote_clear_flags(lrm_state, remote_in_maint);
1382 }
1383 }
1384 free_xml(update);
1385 }
1386
1387 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1388 "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1389 XML_GRAPH_TAG_MAINTENANCE
1390
1391
1392
1393
1394
1395
1396
1397 void
1398 remote_ra_process_maintenance_nodes(xmlNode *xml)
1399 {
1400 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1401
1402 if (numXpathResults(search) == 1) {
1403 xmlNode *node;
1404 int cnt = 0, cnt_remote = 0;
1405
1406 for (node =
1407 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1408 node != NULL; node = pcmk__xml_next(node)) {
1409 lrm_state_t *lrm_state = lrm_state_find(ID(node));
1410
1411 cnt++;
1412 if (lrm_state && lrm_state->remote_ra_data &&
1413 pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
1414 int is_maint;
1415
1416 cnt_remote++;
1417 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1418 &is_maint, 0);
1419 remote_ra_maintenance(lrm_state, is_maint);
1420 }
1421 }
1422 crm_trace("Action holds %d nodes (%d remotes found) "
1423 "adjusting maintenance-mode", cnt, cnt_remote);
1424 }
1425 freeXpathObject(search);
1426 }
1427
1428 gboolean
1429 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1430 {
1431 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1432 return pcmk_is_set(ra_data->status, remote_in_maint);
1433 }
1434
1435 gboolean
1436 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1437 {
1438 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1439 return pcmk_is_set(ra_data->status, controlling_guest);
1440 }