This source file includes following definitions.
- free_cmd
- generate_callid
- recurring_helper
- start_delay_helper
- should_purge_attributes
- section_to_delete
- purge_remote_node_attrs
- remote_node_up
- remote_node_down
- check_remote_node_state
- report_remote_ra_result
- update_remaining_timeout
- retry_start_cmd_cb
- connection_takeover_timeout_cb
- monitor_timeout_cb
- synthesize_lrmd_success
- remote_lrm_op_callback
- handle_remote_ra_stop
- handle_remote_ra_start
- handle_remote_ra_exec
- remote_ra_data_init
- remote_ra_cleanup
- is_remote_lrmd_ra
- remote_ra_get_rsc_info
- is_remote_ra_supported_action
- fail_all_monitor_cmds
- remove_cmd
- remote_ra_cancel
- handle_dup_monitor
- controld_execute_remote_agent
- remote_ra_fail
- remote_ra_process_pseudo
- remote_ra_maintenance
- remote_ra_process_maintenance_nodes
- remote_ra_is_in_maintenance
- remote_ra_controlling_guest
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <crm/crm.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/xml_internal.h>
15 #include <crm/lrmd.h>
16 #include <crm/lrmd_internal.h>
17 #include <crm/services.h>
18
19 #include <pacemaker-controld.h>
20
21 #define REMOTE_LRMD_RA "remote"
22
23
24 #define MAX_START_TIMEOUT_MS 10000
25
26 #define cmd_set_flags(cmd, flags_to_set) do { \
27 (cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
28 "Remote command", (cmd)->rsc_id, (cmd)->status, \
29 (flags_to_set), #flags_to_set); \
30 } while (0)
31
32 #define cmd_clear_flags(cmd, flags_to_clear) do { \
33 (cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
34 "Remote command", (cmd)->rsc_id, (cmd)->status, \
35 (flags_to_clear), #flags_to_clear); \
36 } while (0)
37
38 enum remote_cmd_status {
39 cmd_reported_success = (1 << 0),
40 cmd_cancel = (1 << 1),
41 };
42
43 typedef struct remote_ra_cmd_s {
44
45 char *owner;
46
47 char *rsc_id;
48
49 char *action;
50
51 char *userdata;
52
53 int start_delay;
54
55 int delay_id;
56
57 int timeout;
58 int remaining_timeout;
59
60 guint interval_ms;
61
62 int interval_id;
63 int monitor_timeout_id;
64 int takeover_timeout_id;
65
66 lrmd_key_value_t *params;
67 pcmk__action_result_t result;
68 int call_id;
69 time_t start_time;
70 uint32_t status;
71 } remote_ra_cmd_t;
72
73 #define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
74 lrm_state_t *lrm = (lrm_state); \
75 remote_ra_data_t *ra = lrm->remote_ra_data; \
76 ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
77 lrm->node_name, ra->status, \
78 (flags_to_set), #flags_to_set); \
79 } while (0)
80
81 #define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
82 lrm_state_t *lrm = (lrm_state); \
83 remote_ra_data_t *ra = lrm->remote_ra_data; \
84 ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
85 lrm->node_name, ra->status, \
86 (flags_to_clear), #flags_to_clear); \
87 } while (0)
88
89 enum remote_status {
90 expect_takeover = (1 << 0),
91 takeover_complete = (1 << 1),
92 remote_active = (1 << 2),
93
94
95
96 remote_in_maint = (1 << 3),
97
98
99
100
101
102
103 controlling_guest = (1 << 4),
104 };
105
106 typedef struct remote_ra_data_s {
107 crm_trigger_t *work;
108 remote_ra_cmd_t *cur_cmd;
109 GList *cmds;
110 GList *recurring_cmds;
111 uint32_t status;
112 } remote_ra_data_t;
113
114 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
115 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
116 static GList *fail_all_monitor_cmds(GList * list);
117
118 static void
119 free_cmd(gpointer user_data)
120 {
121 remote_ra_cmd_t *cmd = user_data;
122
123 if (!cmd) {
124 return;
125 }
126 if (cmd->delay_id) {
127 g_source_remove(cmd->delay_id);
128 }
129 if (cmd->interval_id) {
130 g_source_remove(cmd->interval_id);
131 }
132 if (cmd->monitor_timeout_id) {
133 g_source_remove(cmd->monitor_timeout_id);
134 }
135 if (cmd->takeover_timeout_id) {
136 g_source_remove(cmd->takeover_timeout_id);
137 }
138 free(cmd->owner);
139 free(cmd->rsc_id);
140 free(cmd->action);
141 free(cmd->userdata);
142 pcmk__reset_result(&(cmd->result));
143 lrmd_key_value_freeall(cmd->params);
144 free(cmd);
145 }
146
147 static int
148 generate_callid(void)
149 {
150 static int remote_ra_callid = 0;
151
152 remote_ra_callid++;
153 if (remote_ra_callid <= 0) {
154 remote_ra_callid = 1;
155 }
156
157 return remote_ra_callid;
158 }
159
160 static gboolean
161 recurring_helper(gpointer data)
162 {
163 remote_ra_cmd_t *cmd = data;
164 lrm_state_t *connection_rsc = NULL;
165
166 cmd->interval_id = 0;
167 connection_rsc = lrm_state_find(cmd->rsc_id);
168 if (connection_rsc && connection_rsc->remote_ra_data) {
169 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
170
171 ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
172
173 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
174 mainloop_set_trigger(ra_data->work);
175 }
176 return FALSE;
177 }
178
179 static gboolean
180 start_delay_helper(gpointer data)
181 {
182 remote_ra_cmd_t *cmd = data;
183 lrm_state_t *connection_rsc = NULL;
184
185 cmd->delay_id = 0;
186 connection_rsc = lrm_state_find(cmd->rsc_id);
187 if (connection_rsc && connection_rsc->remote_ra_data) {
188 remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
189
190 mainloop_set_trigger(ra_data->work);
191 }
192 return FALSE;
193 }
194
195 static bool
196 should_purge_attributes(crm_node_t *node)
197 {
198 bool purge = true;
199 crm_node_t *conn_node = NULL;
200 lrm_state_t *connection_rsc = NULL;
201
202 if (!node->conn_host) {
203 return purge;
204 }
205
206
207
208
209 conn_node = pcmk__get_node(0, node->conn_host, NULL,
210 pcmk__node_search_cluster_member);
211 if (conn_node == NULL) {
212 return purge;
213 }
214
215
216
217
218
219 connection_rsc = lrm_state_find(node->uname);
220
221 if (connection_rsc != NULL) {
222 lrmd_t *lrm = connection_rsc->conn;
223 time_t uptime = lrmd__uptime(lrm);
224 time_t now = time(NULL);
225
226
227
228
229
230 if (uptime > 0 &&
231 conn_node->peer_lost > 0 &&
232 uptime + 20 >= now - conn_node->peer_lost) {
233 purge = false;
234 }
235 }
236
237 return purge;
238 }
239
240 static enum controld_section_e
241 section_to_delete(bool purge)
242 {
243 if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
244 if (purge) {
245 return controld_section_all_unlocked;
246 } else {
247 return controld_section_lrm_unlocked;
248 }
249 } else {
250 if (purge) {
251 return controld_section_all;
252 } else {
253 return controld_section_lrm;
254 }
255 }
256 }
257
258 static void
259 purge_remote_node_attrs(int call_opt, crm_node_t *node)
260 {
261 bool purge = should_purge_attributes(node);
262 enum controld_section_e section = section_to_delete(purge);
263
264
265 if (purge) {
266 update_attrd_remote_node_removed(node->uname, NULL);
267 }
268
269 controld_delete_node_state(node->uname, section, call_opt);
270 }
271
272
273
274
275
276
277
278 static void
279 remote_node_up(const char *node_name)
280 {
281 int call_opt;
282 xmlNode *update, *state;
283 crm_node_t *node;
284 lrm_state_t *connection_rsc = NULL;
285
286 CRM_CHECK(node_name != NULL, return);
287 crm_info("Announcing Pacemaker Remote node %s", node_name);
288
289 call_opt = crmd_cib_smart_opt();
290
291
292
293
294
295
296
297 update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
298
299
300 node = pcmk__cluster_lookup_remote_node(node_name);
301 CRM_CHECK(node != NULL, return);
302
303 purge_remote_node_attrs(call_opt, node);
304 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
305
306
307
308
309 connection_rsc = lrm_state_find(node->uname);
310
311 if (connection_rsc != NULL) {
312 lrmd_t *lrm = connection_rsc->conn;
313 const char *start_state = lrmd__node_start_state(lrm);
314
315 if (start_state) {
316 set_join_state(start_state, node->uname, node->uuid, true);
317 }
318 }
319
320
321
322
323
324
325
326 broadcast_remote_state_message(node_name, true);
327
328 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
329 state = create_node_state_update(node, node_update_cluster, update,
330 __func__);
331
332
333
334
335
336 crm_xml_add(state, PCMK__XA_NODE_FENCED, "0");
337
338
339
340
341
342
343
344
345 controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
346 free_xml(update);
347 }
348
349 enum down_opts {
350 DOWN_KEEP_LRM,
351 DOWN_ERASE_LRM
352 };
353
354
355
356
357
358
359
360
361 static void
362 remote_node_down(const char *node_name, const enum down_opts opts)
363 {
364 xmlNode *update;
365 int call_opt = crmd_cib_smart_opt();
366 crm_node_t *node;
367
368
369 update_attrd_remote_node_removed(node_name, NULL);
370
371
372
373
374
375
376 if (opts == DOWN_ERASE_LRM) {
377 controld_delete_node_state(node_name, controld_section_all, call_opt);
378 } else {
379 controld_delete_node_state(node_name, controld_section_attrs, call_opt);
380 }
381
382
383 node = pcmk__cluster_lookup_remote_node(node_name);
384 CRM_CHECK(node != NULL, return);
385 pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
386
387
388 broadcast_remote_state_message(node_name, false);
389
390
391 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
392 create_node_state_update(node, node_update_cluster, update, __func__);
393 controld_update_cib(PCMK_XE_STATUS, update, call_opt, NULL);
394 free_xml(update);
395 }
396
397
398
399
400
401
402
403 static void
404 check_remote_node_state(const remote_ra_cmd_t *cmd)
405 {
406
407 if (!pcmk__result_ok(&(cmd->result))) {
408 return;
409 }
410
411 if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
412 remote_node_up(cmd->rsc_id);
413
414 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
415 pcmk__str_casei)) {
416
417
418
419
420
421
422
423 crm_node_t *node = pcmk__cluster_lookup_remote_node(cmd->rsc_id);
424
425 CRM_CHECK(node != NULL, return);
426 pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
427
428 } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
429 lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
430 remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
431
432 if (ra_data) {
433 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
434
435 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
436 } else if (AM_I_DC == FALSE) {
437
438
439
440
441 pcmk__cluster_forget_remote_node(cmd->rsc_id);
442 }
443 }
444 }
445
446
447
448
449
450
451
452
453
454
455 }
456
457 static void
458 report_remote_ra_result(remote_ra_cmd_t * cmd)
459 {
460 lrmd_event_data_t op = { 0, };
461
462 check_remote_node_state(cmd);
463
464 op.type = lrmd_event_exec_complete;
465 op.rsc_id = cmd->rsc_id;
466 op.op_type = cmd->action;
467 op.user_data = cmd->userdata;
468 op.timeout = cmd->timeout;
469 op.interval_ms = cmd->interval_ms;
470 op.t_run = (unsigned int) cmd->start_time;
471 op.t_rcchange = (unsigned int) cmd->start_time;
472
473 lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
474 cmd->result.exit_reason);
475
476 if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
477 op.t_rcchange = (unsigned int) time(NULL);
478
479
480
481
482
483
484
485
486
487 if (op.t_rcchange == op.t_run) {
488 op.t_rcchange++;
489 }
490 }
491
492 if (cmd->params) {
493 lrmd_key_value_t *tmp;
494
495 op.params = pcmk__strkey_table(free, free);
496 for (tmp = cmd->params; tmp; tmp = tmp->next) {
497 pcmk__insert_dup(op.params, tmp->key, tmp->value);
498 }
499
500 }
501 op.call_id = cmd->call_id;
502 op.remote_nodename = cmd->owner;
503
504 lrm_op_callback(&op);
505
506 if (op.params) {
507 g_hash_table_destroy(op.params);
508 }
509 lrmd__reset_result(&op);
510 }
511
512 static void
513 update_remaining_timeout(remote_ra_cmd_t * cmd)
514 {
515 cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
516 }
517
518 static gboolean
519 retry_start_cmd_cb(gpointer data)
520 {
521 lrm_state_t *lrm_state = data;
522 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
523 remote_ra_cmd_t *cmd = NULL;
524 int rc = ETIME;
525
526 if (!ra_data || !ra_data->cur_cmd) {
527 return FALSE;
528 }
529 cmd = ra_data->cur_cmd;
530 if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
531 PCMK_ACTION_MIGRATE_FROM, NULL)) {
532 return FALSE;
533 }
534 update_remaining_timeout(cmd);
535
536 if (cmd->remaining_timeout > 0) {
537 rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
538 } else {
539 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
540 PCMK_EXEC_TIMEOUT,
541 "Not enough time remains to retry remote connection");
542 }
543
544 if (rc != pcmk_rc_ok) {
545 report_remote_ra_result(cmd);
546
547 if (ra_data->cmds) {
548 mainloop_set_trigger(ra_data->work);
549 }
550 ra_data->cur_cmd = NULL;
551 free_cmd(cmd);
552 } else {
553
554 }
555
556 return FALSE;
557 }
558
559
560 static gboolean
561 connection_takeover_timeout_cb(gpointer data)
562 {
563 lrm_state_t *lrm_state = NULL;
564 remote_ra_cmd_t *cmd = data;
565
566 crm_info("takeover event timed out for node %s", cmd->rsc_id);
567 cmd->takeover_timeout_id = 0;
568
569 lrm_state = lrm_state_find(cmd->rsc_id);
570
571 handle_remote_ra_stop(lrm_state, cmd);
572 free_cmd(cmd);
573
574 return FALSE;
575 }
576
577 static gboolean
578 monitor_timeout_cb(gpointer data)
579 {
580 lrm_state_t *lrm_state = NULL;
581 remote_ra_cmd_t *cmd = data;
582
583 lrm_state = lrm_state_find(cmd->rsc_id);
584
585 crm_info("Timed out waiting for remote poke response from %s%s",
586 cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
587 cmd->monitor_timeout_id = 0;
588 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
589 "Remote executor did not respond");
590
591 if (lrm_state && lrm_state->remote_ra_data) {
592 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
593
594 if (ra_data->cur_cmd == cmd) {
595 ra_data->cur_cmd = NULL;
596 }
597 if (ra_data->cmds) {
598 mainloop_set_trigger(ra_data->work);
599 }
600 }
601
602 report_remote_ra_result(cmd);
603 free_cmd(cmd);
604
605 if(lrm_state) {
606 lrm_state_disconnect(lrm_state);
607 }
608 return FALSE;
609 }
610
611 static void
612 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
613 {
614 lrmd_event_data_t op = { 0, };
615
616 if (lrm_state == NULL) {
617
618 lrm_state = lrm_state_find(controld_globals.our_nodename);
619 }
620 CRM_ASSERT(lrm_state != NULL);
621
622 op.type = lrmd_event_exec_complete;
623 op.rsc_id = rsc_id;
624 op.op_type = op_type;
625 op.t_run = (unsigned int) time(NULL);
626 op.t_rcchange = op.t_run;
627 op.call_id = generate_callid();
628 lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
629 process_lrm_event(lrm_state, &op, NULL, NULL);
630 }
631
632 void
633 remote_lrm_op_callback(lrmd_event_data_t * op)
634 {
635 gboolean cmd_handled = FALSE;
636 lrm_state_t *lrm_state = NULL;
637 remote_ra_data_t *ra_data = NULL;
638 remote_ra_cmd_t *cmd = NULL;
639
640 crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
641 "(%d) status=%s (%d)",
642 (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
643 lrmd_event_type2str(op->type), op->remote_nodename,
644 services_ocf_exitcode_str(op->rc), op->rc,
645 pcmk_exec_status_str(op->op_status), op->op_status);
646
647 lrm_state = lrm_state_find(op->remote_nodename);
648 if (!lrm_state || !lrm_state->remote_ra_data) {
649 crm_debug("No state information found for remote connection event");
650 return;
651 }
652 ra_data = lrm_state->remote_ra_data;
653
654 if (op->type == lrmd_event_new_client) {
655
656
657 if (pcmk_is_set(ra_data->status, expect_takeover)) {
658
659 lrm_remote_clear_flags(lrm_state, expect_takeover);
660 lrm_remote_set_flags(lrm_state, takeover_complete);
661
662 } else {
663 crm_err("Disconnecting from Pacemaker Remote node %s due to "
664 "unexpected client takeover", op->remote_nodename);
665
666
667
668 lrm_state_disconnect_only(lrm_state);
669 }
670 return;
671 }
672
673
674 if (op->type == lrmd_event_exec_complete) {
675 if (pcmk_is_set(ra_data->status, takeover_complete)) {
676 crm_debug("ignoring event, this connection is taken over by another node");
677 } else {
678 lrm_op_callback(op);
679 }
680 return;
681 }
682
683 if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
684
685 if (!pcmk_is_set(ra_data->status, remote_active)) {
686 crm_debug("Disconnection from Pacemaker Remote node %s complete",
687 lrm_state->node_name);
688
689 } else if (!remote_ra_is_in_maintenance(lrm_state)) {
690 crm_err("Lost connection to Pacemaker Remote node %s",
691 lrm_state->node_name);
692 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
693 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
694
695 } else {
696 crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
697 lrm_state->node_name);
698
699 handle_remote_ra_stop(lrm_state, NULL);
700 remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
701
702 synthesize_lrmd_success(NULL, lrm_state->node_name,
703 PCMK_ACTION_STOP);
704 }
705 return;
706 }
707
708 if (!ra_data->cur_cmd) {
709 crm_debug("no event to match");
710 return;
711 }
712
713 cmd = ra_data->cur_cmd;
714
715
716
717 if ((op->type == lrmd_event_connect)
718 && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
719 PCMK_ACTION_MIGRATE_FROM, NULL)) {
720 if (op->connection_rc < 0) {
721 update_remaining_timeout(cmd);
722
723 if ((op->connection_rc == -ENOKEY)
724 || (op->connection_rc == -EKEYREJECTED)) {
725
726 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
727 PCMK_EXEC_ERROR,
728 pcmk_strerror(op->connection_rc));
729
730 } else if (cmd->remaining_timeout > 3000) {
731 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
732 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
733 return;
734
735 } else {
736 crm_trace("can't reschedule start, remaining timeout too small %d",
737 cmd->remaining_timeout);
738 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
739 PCMK_EXEC_TIMEOUT,
740 "%s without enough time to retry",
741 pcmk_strerror(op->connection_rc));
742 }
743
744 } else {
745 lrm_state_reset_tables(lrm_state, TRUE);
746 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
747 lrm_remote_set_flags(lrm_state, remote_active);
748 }
749
750 crm_debug("Remote connection event matched %s action", cmd->action);
751 report_remote_ra_result(cmd);
752 cmd_handled = TRUE;
753
754 } else if ((op->type == lrmd_event_poke)
755 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
756 pcmk__str_casei)) {
757
758 if (cmd->monitor_timeout_id) {
759 g_source_remove(cmd->monitor_timeout_id);
760 cmd->monitor_timeout_id = 0;
761 }
762
763
764
765
766 if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
767 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
768 report_remote_ra_result(cmd);
769 cmd_set_flags(cmd, cmd_reported_success);
770 }
771
772 crm_debug("Remote poke event matched %s action", cmd->action);
773
774
775 if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
776 ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
777 cmd->interval_id = g_timeout_add(cmd->interval_ms,
778 recurring_helper, cmd);
779 cmd = NULL;
780 }
781 cmd_handled = TRUE;
782
783 } else if ((op->type == lrmd_event_disconnect)
784 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
785 pcmk__str_casei)) {
786 if (pcmk_is_set(ra_data->status, remote_active) &&
787 !pcmk_is_set(cmd->status, cmd_cancel)) {
788 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
789 PCMK_EXEC_ERROR,
790 "Remote connection unexpectedly dropped "
791 "during monitor");
792 report_remote_ra_result(cmd);
793 crm_err("Remote connection to %s unexpectedly dropped during monitor",
794 lrm_state->node_name);
795 }
796 cmd_handled = TRUE;
797
798 } else if ((op->type == lrmd_event_new_client)
799 && pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
800 pcmk__str_casei)) {
801
802 handle_remote_ra_stop(lrm_state, cmd);
803 cmd_handled = TRUE;
804
805 } else {
806 crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
807 }
808
809 if (cmd_handled) {
810 ra_data->cur_cmd = NULL;
811 if (ra_data->cmds) {
812 mainloop_set_trigger(ra_data->work);
813 }
814 free_cmd(cmd);
815 }
816 }
817
818 static void
819 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
820 {
821 remote_ra_data_t *ra_data = NULL;
822
823 CRM_ASSERT(lrm_state);
824 ra_data = lrm_state->remote_ra_data;
825
826 if (!pcmk_is_set(ra_data->status, takeover_complete)) {
827
828 g_hash_table_remove_all(lrm_state->active_ops);
829 } else {
830
831
832 lrm_state_reset_tables(lrm_state, FALSE);
833 }
834
835 lrm_remote_clear_flags(lrm_state, remote_active);
836 lrm_state_disconnect(lrm_state);
837
838 if (ra_data->cmds) {
839 g_list_free_full(ra_data->cmds, free_cmd);
840 }
841 if (ra_data->recurring_cmds) {
842 g_list_free_full(ra_data->recurring_cmds, free_cmd);
843 }
844 ra_data->cmds = NULL;
845 ra_data->recurring_cmds = NULL;
846 ra_data->cur_cmd = NULL;
847
848 if (cmd) {
849 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
850 report_remote_ra_result(cmd);
851 }
852 }
853
854
855 static int
856 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
857 {
858 const char *server = NULL;
859 lrmd_key_value_t *tmp = NULL;
860 int port = 0;
861 int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
862 int rc = pcmk_rc_ok;
863
864 for (tmp = cmd->params; tmp; tmp = tmp->next) {
865 if (pcmk__strcase_any_of(tmp->key,
866 PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
867 NULL)) {
868 server = tmp->value;
869
870 } else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
871 pcmk__str_none)) {
872 port = atoi(tmp->value);
873
874 } else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
875 pcmk__str_none)) {
876 lrm_remote_set_flags(lrm_state, controlling_guest);
877 }
878 }
879
880 rc = controld_connect_remote_executor(lrm_state, server, port,
881 timeout_used);
882 if (rc != pcmk_rc_ok) {
883 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
884 PCMK_EXEC_ERROR,
885 "Could not connect to Pacemaker Remote node %s: %s",
886 lrm_state->node_name, pcmk_rc_str(rc));
887 }
888 return rc;
889 }
890
891 static gboolean
892 handle_remote_ra_exec(gpointer user_data)
893 {
894 int rc = 0;
895 lrm_state_t *lrm_state = user_data;
896 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
897 remote_ra_cmd_t *cmd;
898 GList *first = NULL;
899
900 if (ra_data->cur_cmd) {
901
902 return TRUE;
903 }
904
905 while (ra_data->cmds) {
906 first = ra_data->cmds;
907 cmd = first->data;
908 if (cmd->delay_id) {
909
910 return TRUE;
911 }
912
913 ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
914 g_list_free_1(first);
915
916 if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
917 PCMK_ACTION_MIGRATE_FROM, NULL)) {
918 lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
919 if (handle_remote_ra_start(lrm_state, cmd,
920 cmd->timeout) == pcmk_rc_ok) {
921
922 crm_debug("Initiated async remote connection, %s action will complete after connect event",
923 cmd->action);
924 ra_data->cur_cmd = cmd;
925 return TRUE;
926 }
927 report_remote_ra_result(cmd);
928
929 } else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
930
931 if (lrm_state_is_connected(lrm_state) == TRUE) {
932 rc = lrm_state_poke_connection(lrm_state);
933 if (rc < 0) {
934 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
935 PCMK_EXEC_ERROR, pcmk_strerror(rc));
936 }
937 } else {
938 rc = -1;
939 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
940 PCMK_EXEC_DONE, "Remote connection inactive");
941 }
942
943 if (rc == 0) {
944 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
945 cmd->rsc_id);
946 ra_data->cur_cmd = cmd;
947 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
948 return TRUE;
949 }
950 report_remote_ra_result(cmd);
951
952 } else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
953
954 if (pcmk_is_set(ra_data->status, expect_takeover)) {
955
956
957
958
959
960
961 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
962 ra_data->cur_cmd = cmd;
963 return TRUE;
964 }
965
966 handle_remote_ra_stop(lrm_state, cmd);
967
968 } else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
969 lrm_remote_clear_flags(lrm_state, takeover_complete);
970 lrm_remote_set_flags(lrm_state, expect_takeover);
971 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
972 report_remote_ra_result(cmd);
973
974 } else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
975 PCMK_ACTION_RELOAD_AGENT, NULL)) {
976
977
978
979
980
981
982
983
984
985 pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
986 report_remote_ra_result(cmd);
987 }
988
989 free_cmd(cmd);
990 }
991
992 return TRUE;
993 }
994
995 static void
996 remote_ra_data_init(lrm_state_t * lrm_state)
997 {
998 remote_ra_data_t *ra_data = NULL;
999
1000 if (lrm_state->remote_ra_data) {
1001 return;
1002 }
1003
1004 ra_data = pcmk__assert_alloc(1, sizeof(remote_ra_data_t));
1005 ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
1006 lrm_state->remote_ra_data = ra_data;
1007 }
1008
1009 void
1010 remote_ra_cleanup(lrm_state_t * lrm_state)
1011 {
1012 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1013
1014 if (!ra_data) {
1015 return;
1016 }
1017
1018 if (ra_data->cmds) {
1019 g_list_free_full(ra_data->cmds, free_cmd);
1020 }
1021
1022 if (ra_data->recurring_cmds) {
1023 g_list_free_full(ra_data->recurring_cmds, free_cmd);
1024 }
1025 mainloop_destroy_trigger(ra_data->work);
1026 free(ra_data);
1027 lrm_state->remote_ra_data = NULL;
1028 }
1029
1030 gboolean
1031 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
1032 {
1033 if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
1034 return TRUE;
1035 }
1036 if ((id != NULL) && (lrm_state_find(id) != NULL)
1037 && !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
1038 return TRUE;
1039 }
1040
1041 return FALSE;
1042 }
1043
1044 lrmd_rsc_info_t *
1045 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
1046 {
1047 lrmd_rsc_info_t *info = NULL;
1048
1049 if ((lrm_state_find(rsc_id))) {
1050 info = pcmk__assert_alloc(1, sizeof(lrmd_rsc_info_t));
1051
1052 info->id = pcmk__str_copy(rsc_id);
1053 info->type = pcmk__str_copy(REMOTE_LRMD_RA);
1054 info->standard = pcmk__str_copy(PCMK_RESOURCE_CLASS_OCF);
1055 info->provider = pcmk__str_copy("pacemaker");
1056 }
1057
1058 return info;
1059 }
1060
1061 static gboolean
1062 is_remote_ra_supported_action(const char *action)
1063 {
1064 return pcmk__str_any_of(action,
1065 PCMK_ACTION_START,
1066 PCMK_ACTION_STOP,
1067 PCMK_ACTION_MONITOR,
1068 PCMK_ACTION_MIGRATE_TO,
1069 PCMK_ACTION_MIGRATE_FROM,
1070 PCMK_ACTION_RELOAD_AGENT,
1071 PCMK_ACTION_RELOAD,
1072 NULL);
1073 }
1074
1075 static GList *
1076 fail_all_monitor_cmds(GList * list)
1077 {
1078 GList *rm_list = NULL;
1079 remote_ra_cmd_t *cmd = NULL;
1080 GList *gIter = NULL;
1081
1082 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1083 cmd = gIter->data;
1084 if ((cmd->interval_ms > 0)
1085 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1086 pcmk__str_casei)) {
1087 rm_list = g_list_append(rm_list, cmd);
1088 }
1089 }
1090
1091 for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
1092 cmd = gIter->data;
1093
1094 pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
1095 PCMK_EXEC_ERROR, "Lost connection to remote executor");
1096 crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
1097 cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
1098 report_remote_ra_result(cmd);
1099
1100 list = g_list_remove(list, cmd);
1101 free_cmd(cmd);
1102 }
1103
1104
1105 g_list_free(rm_list);
1106 return list;
1107 }
1108
1109 static GList *
1110 remove_cmd(GList * list, const char *action, guint interval_ms)
1111 {
1112 remote_ra_cmd_t *cmd = NULL;
1113 GList *gIter = NULL;
1114
1115 for (gIter = list; gIter != NULL; gIter = gIter->next) {
1116 cmd = gIter->data;
1117 if ((cmd->interval_ms == interval_ms)
1118 && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
1119 break;
1120 }
1121 cmd = NULL;
1122 }
1123 if (cmd) {
1124 list = g_list_remove(list, cmd);
1125 free_cmd(cmd);
1126 }
1127 return list;
1128 }
1129
1130 int
1131 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
1132 const char *action, guint interval_ms)
1133 {
1134 lrm_state_t *connection_rsc = NULL;
1135 remote_ra_data_t *ra_data = NULL;
1136
1137 connection_rsc = lrm_state_find(rsc_id);
1138 if (!connection_rsc || !connection_rsc->remote_ra_data) {
1139 return -EINVAL;
1140 }
1141
1142 ra_data = connection_rsc->remote_ra_data;
1143 ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1144 ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1145 interval_ms);
1146 if (ra_data->cur_cmd &&
1147 (ra_data->cur_cmd->interval_ms == interval_ms) &&
1148 (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1149
1150 cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
1151 }
1152
1153 return 0;
1154 }
1155
1156 static remote_ra_cmd_t *
1157 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
1158 const char *userdata)
1159 {
1160 GList *gIter = NULL;
1161 remote_ra_cmd_t *cmd = NULL;
1162
1163
1164
1165
1166
1167
1168
1169 if (interval_ms == 0) {
1170 return NULL;
1171 }
1172
1173 if (ra_data->cur_cmd &&
1174 !pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
1175 (ra_data->cur_cmd->interval_ms == interval_ms)
1176 && pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
1177 pcmk__str_casei)) {
1178
1179 cmd = ra_data->cur_cmd;
1180 goto handle_dup;
1181 }
1182
1183 for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1184 cmd = gIter->data;
1185 if ((cmd->interval_ms == interval_ms)
1186 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1187 pcmk__str_casei)) {
1188 goto handle_dup;
1189 }
1190 }
1191
1192 for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1193 cmd = gIter->data;
1194 if ((cmd->interval_ms == interval_ms)
1195 && pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
1196 pcmk__str_casei)) {
1197 goto handle_dup;
1198 }
1199 }
1200
1201 return NULL;
1202
1203 handle_dup:
1204
1205 crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1206 cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
1207
1208
1209 if (userdata) {
1210 free(cmd->userdata);
1211 cmd->userdata = pcmk__str_copy(userdata);
1212 }
1213
1214
1215 if (pcmk_is_set(cmd->status, cmd_reported_success)) {
1216 cmd->start_time = time(NULL);
1217 cmd->call_id = generate_callid();
1218 cmd_clear_flags(cmd, cmd_reported_success);
1219 }
1220
1221
1222
1223
1224 if (cmd->interval_id) {
1225 g_source_remove(cmd->interval_id);
1226 cmd->interval_id = 0;
1227 recurring_helper(cmd);
1228 }
1229
1230 return cmd;
1231 }
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251 int
1252 controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
1253 const char *action, const char *userdata,
1254 guint interval_ms, int timeout_ms,
1255 int start_delay_ms, lrmd_key_value_t *params,
1256 int *call_id)
1257 {
1258 lrm_state_t *connection_rsc = NULL;
1259 remote_ra_cmd_t *cmd = NULL;
1260 remote_ra_data_t *ra_data = NULL;
1261
1262 *call_id = 0;
1263
1264 CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1265 && (userdata != NULL) && (call_id != NULL),
1266 lrmd_key_value_freeall(params); return EINVAL);
1267
1268 if (!is_remote_ra_supported_action(action)) {
1269 lrmd_key_value_freeall(params);
1270 return EOPNOTSUPP;
1271 }
1272
1273 connection_rsc = lrm_state_find(rsc_id);
1274 if (connection_rsc == NULL) {
1275 lrmd_key_value_freeall(params);
1276 return ENOTCONN;
1277 }
1278
1279 remote_ra_data_init(connection_rsc);
1280 ra_data = connection_rsc->remote_ra_data;
1281
1282 cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1283 if (cmd) {
1284 *call_id = cmd->call_id;
1285 lrmd_key_value_freeall(params);
1286 return pcmk_rc_ok;
1287 }
1288
1289 cmd = pcmk__assert_alloc(1, sizeof(remote_ra_cmd_t));
1290
1291 cmd->owner = pcmk__str_copy(lrm_state->node_name);
1292 cmd->rsc_id = pcmk__str_copy(rsc_id);
1293 cmd->action = pcmk__str_copy(action);
1294 cmd->userdata = pcmk__str_copy(userdata);
1295 cmd->interval_ms = interval_ms;
1296 cmd->timeout = timeout_ms;
1297 cmd->start_delay = start_delay_ms;
1298 cmd->params = params;
1299 cmd->start_time = time(NULL);
1300
1301 cmd->call_id = generate_callid();
1302
1303 if (cmd->start_delay) {
1304 cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1305 }
1306
1307 ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1308 mainloop_set_trigger(ra_data->work);
1309
1310 *call_id = cmd->call_id;
1311 return pcmk_rc_ok;
1312 }
1313
1314
1315
1316
1317
1318
1319
1320 void
1321 remote_ra_fail(const char *node_name)
1322 {
1323 lrm_state_t *lrm_state = lrm_state_find(node_name);
1324
1325 if (lrm_state && lrm_state_is_connected(lrm_state)) {
1326 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1327
1328 crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1329 ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1330 ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1331 }
1332 }
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345 #define XPATH_PSEUDO_FENCE "/" PCMK__XE_PSEUDO_EVENT \
1346 "[@" PCMK_XA_OPERATION "='stonith']/" PCMK__XE_DOWNED "/" PCMK_XE_NODE
1347
1348
1349
1350
1351
1352
1353
1354 void
1355 remote_ra_process_pseudo(xmlNode *xml)
1356 {
1357 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1358
1359 if (numXpathResults(search) == 1) {
1360 xmlNode *result = getXpathResult(search, 0);
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376 if (result) {
1377 const char *remote = pcmk__xe_id(result);
1378
1379 if (remote) {
1380 remote_node_down(remote, DOWN_ERASE_LRM);
1381 }
1382 }
1383 }
1384 freeXpathObject(search);
1385 }
1386
1387 static void
1388 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
1389 {
1390 xmlNode *update, *state;
1391 int call_opt;
1392 crm_node_t *node;
1393
1394 call_opt = crmd_cib_smart_opt();
1395 node = pcmk__cluster_lookup_remote_node(lrm_state->node_name);
1396 CRM_CHECK(node != NULL, return);
1397 update = pcmk__xe_create(NULL, PCMK_XE_STATUS);
1398 state = create_node_state_update(node, node_update_none, update,
1399 __func__);
1400 crm_xml_add(state, PCMK__XA_NODE_IN_MAINTENANCE, (maintenance? "1" : "0"));
1401 if (controld_update_cib(PCMK_XE_STATUS, update, call_opt,
1402 NULL) == pcmk_rc_ok) {
1403
1404 if (maintenance) {
1405 lrm_remote_set_flags(lrm_state, remote_in_maint);
1406 } else {
1407 lrm_remote_clear_flags(lrm_state, remote_in_maint);
1408 }
1409 }
1410 free_xml(update);
1411 }
1412
1413 #define XPATH_PSEUDO_MAINTENANCE "//" PCMK__XE_PSEUDO_EVENT \
1414 "[@" PCMK_XA_OPERATION "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
1415 PCMK__XE_MAINTENANCE
1416
1417
1418
1419
1420
1421
1422
1423 void
1424 remote_ra_process_maintenance_nodes(xmlNode *xml)
1425 {
1426 xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1427
1428 if (numXpathResults(search) == 1) {
1429 xmlNode *node;
1430 int cnt = 0, cnt_remote = 0;
1431
1432 for (node = pcmk__xe_first_child(getXpathResult(search, 0),
1433 PCMK_XE_NODE, NULL, NULL);
1434 node != NULL; node = pcmk__xe_next_same(node)) {
1435
1436 lrm_state_t *lrm_state = lrm_state_find(pcmk__xe_id(node));
1437
1438 cnt++;
1439 if (lrm_state && lrm_state->remote_ra_data &&
1440 pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
1441
1442 const char *in_maint_s = NULL;
1443 int in_maint;
1444
1445 cnt_remote++;
1446 in_maint_s = crm_element_value(node,
1447 PCMK__XA_NODE_IN_MAINTENANCE);
1448 pcmk__scan_min_int(in_maint_s, &in_maint, 0);
1449 remote_ra_maintenance(lrm_state, in_maint);
1450 }
1451 }
1452 crm_trace("Action holds %d nodes (%d remotes found) adjusting "
1453 PCMK_OPT_MAINTENANCE_MODE,
1454 cnt, cnt_remote);
1455 }
1456 freeXpathObject(search);
1457 }
1458
1459 gboolean
1460 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
1461 {
1462 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1463 return pcmk_is_set(ra_data->status, remote_in_maint);
1464 }
1465
1466 gboolean
1467 remote_ra_controlling_guest(lrm_state_t * lrm_state)
1468 {
1469 remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1470 return pcmk_is_set(ra_data->status, controlling_guest);
1471 }