This source file includes following definitions.
- free_cmd
 
- generate_callid
 
- recurring_helper
 
- start_delay_helper
 
- remote_node_up
 
- remote_node_down
 
- check_remote_node_state
 
- report_remote_ra_result
 
- update_remaining_timeout
 
- retry_start_cmd_cb
 
- connection_takeover_timeout_cb
 
- monitor_timeout_cb
 
- synthesize_lrmd_success
 
- remote_lrm_op_callback
 
- handle_remote_ra_stop
 
- handle_remote_ra_start
 
- handle_remote_ra_exec
 
- remote_ra_data_init
 
- remote_ra_cleanup
 
- is_remote_lrmd_ra
 
- remote_ra_get_rsc_info
 
- is_remote_ra_supported_action
 
- fail_all_monitor_cmds
 
- remove_cmd
 
- remote_ra_cancel
 
- handle_dup_monitor
 
- controld_execute_remote_agent
 
- remote_ra_fail
 
- remote_ra_process_pseudo
 
- remote_ra_maintenance
 
- remote_ra_process_maintenance_nodes
 
- remote_ra_is_in_maintenance
 
- remote_ra_controlling_guest
 
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 #include <crm/msg_xml.h>
  14 #include <crm/common/xml_internal.h>
  15 #include <crm/lrmd.h>
  16 #include <crm/lrmd_internal.h>
  17 #include <crm/services.h>
  18 
  19 #include <pacemaker-controld.h>
  20 
  21 #define REMOTE_LRMD_RA "remote"
  22 
  23 
  24 #define MAX_START_TIMEOUT_MS 10000
  25 
  26 typedef struct remote_ra_cmd_s {
  27     
  28     char *owner;
  29     
  30     char *rsc_id;
  31     
  32     char *action;
  33     
  34     char *userdata;
  35     
  36     int start_delay;
  37     
  38     int delay_id;
  39     
  40     int timeout;
  41     int remaining_timeout;
  42     
  43     guint interval_ms;
  44     
  45     int interval_id;
  46     int reported_success;
  47     int monitor_timeout_id;
  48     int takeover_timeout_id;
  49     
  50     lrmd_key_value_t *params;
  51     pcmk__action_result_t result;
  52     int call_id;
  53     time_t start_time;
  54     gboolean cancel;
  55 } remote_ra_cmd_t;
  56 
  57 enum remote_migration_status {
  58     expect_takeover = 1,
  59     takeover_complete,
  60 };
  61 
  62 typedef struct remote_ra_data_s {
  63     crm_trigger_t *work;
  64     remote_ra_cmd_t *cur_cmd;
  65     GList *cmds;
  66     GList *recurring_cmds;
  67 
  68     enum remote_migration_status migrate_status;
  69 
  70     gboolean active;
  71 
  72     
  73 
  74 
  75     gboolean is_maintenance;
  76 
  77     
  78 
  79 
  80 
  81 
  82 
  83     gboolean controlling_guest;
  84 } remote_ra_data_t;
  85 
  86 static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
  87 static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
  88 static GList *fail_all_monitor_cmds(GList * list);
  89 
  90 static void
  91 free_cmd(gpointer user_data)
     
  92 {
  93     remote_ra_cmd_t *cmd = user_data;
  94 
  95     if (!cmd) {
  96         return;
  97     }
  98     if (cmd->delay_id) {
  99         g_source_remove(cmd->delay_id);
 100     }
 101     if (cmd->interval_id) {
 102         g_source_remove(cmd->interval_id);
 103     }
 104     if (cmd->monitor_timeout_id) {
 105         g_source_remove(cmd->monitor_timeout_id);
 106     }
 107     if (cmd->takeover_timeout_id) {
 108         g_source_remove(cmd->takeover_timeout_id);
 109     }
 110     free(cmd->owner);
 111     free(cmd->rsc_id);
 112     free(cmd->action);
 113     free(cmd->userdata);
 114     pcmk__reset_result(&(cmd->result));
 115     lrmd_key_value_freeall(cmd->params);
 116     free(cmd);
 117 }
 118 
 119 static int
 120 generate_callid(void)
     
 121 {
 122     static int remote_ra_callid = 0;
 123 
 124     remote_ra_callid++;
 125     if (remote_ra_callid <= 0) {
 126         remote_ra_callid = 1;
 127     }
 128 
 129     return remote_ra_callid;
 130 }
 131 
 132 static gboolean
 133 recurring_helper(gpointer data)
     
 134 {
 135     remote_ra_cmd_t *cmd = data;
 136     lrm_state_t *connection_rsc = NULL;
 137 
 138     cmd->interval_id = 0;
 139     connection_rsc = lrm_state_find(cmd->rsc_id);
 140     if (connection_rsc && connection_rsc->remote_ra_data) {
 141         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 142 
 143         ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
 144 
 145         ra_data->cmds = g_list_append(ra_data->cmds, cmd);
 146         mainloop_set_trigger(ra_data->work);
 147     }
 148     return FALSE;
 149 }
 150 
 151 static gboolean
 152 start_delay_helper(gpointer data)
     
 153 {
 154     remote_ra_cmd_t *cmd = data;
 155     lrm_state_t *connection_rsc = NULL;
 156 
 157     cmd->delay_id = 0;
 158     connection_rsc = lrm_state_find(cmd->rsc_id);
 159     if (connection_rsc && connection_rsc->remote_ra_data) {
 160         remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
 161 
 162         mainloop_set_trigger(ra_data->work);
 163     }
 164     return FALSE;
 165 }
 166 
 167 
 168 
 169 
 170 
 171 
 172 
 173 static void
 174 remote_node_up(const char *node_name)
     
 175 {
 176     int call_opt, call_id = 0;
 177     xmlNode *update, *state;
 178     crm_node_t *node;
 179     enum controld_section_e section = controld_section_all;
 180 
 181     CRM_CHECK(node_name != NULL, return);
 182     crm_info("Announcing Pacemaker Remote node %s", node_name);
 183 
 184     
 185 
 186 
 187 
 188 
 189     call_opt = crmd_cib_smart_opt();
 190     if (controld_shutdown_lock_enabled) {
 191         section = controld_section_all_unlocked;
 192     }
 193     
 194     update_attrd_remote_node_removed(node_name, NULL);
 195 
 196     controld_delete_node_state(node_name, section, call_opt);
 197 
 198     
 199 
 200 
 201 
 202 
 203 
 204     update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
 205 
 206     
 207     node = crm_remote_peer_get(node_name);
 208     CRM_CHECK(node != NULL, return);
 209     pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
 210 
 211     
 212 
 213 
 214 
 215 
 216 
 217     send_remote_state_message(node_name, TRUE);
 218 
 219     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
 220     state = create_node_state_update(node, node_update_cluster, update,
 221                                      __func__);
 222 
 223     
 224 
 225 
 226 
 227     crm_xml_add(state, XML_NODE_IS_FENCED, "0");
 228 
 229     
 230 
 231 
 232 
 233 
 234 
 235 
 236     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
 237     if (call_id < 0) {
 238         crm_perror(LOG_WARNING, "%s CIB node state setup", node_name);
 239     }
 240     free_xml(update);
 241 }
 242 
 243 enum down_opts {
 244     DOWN_KEEP_LRM,
 245     DOWN_ERASE_LRM
 246 };
 247 
 248 
 249 
 250 
 251 
 252 
 253 
 254 
 255 static void
 256 remote_node_down(const char *node_name, const enum down_opts opts)
     
 257 {
 258     xmlNode *update;
 259     int call_id = 0;
 260     int call_opt = crmd_cib_smart_opt();
 261     crm_node_t *node;
 262 
 263     
 264     update_attrd_remote_node_removed(node_name, NULL);
 265 
 266     
 267 
 268 
 269 
 270 
 271     if (opts == DOWN_ERASE_LRM) {
 272         controld_delete_node_state(node_name, controld_section_all, call_opt);
 273     } else {
 274         controld_delete_node_state(node_name, controld_section_attrs, call_opt);
 275     }
 276 
 277     
 278     node = crm_remote_peer_get(node_name);
 279     CRM_CHECK(node != NULL, return);
 280     pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
 281 
 282     
 283     send_remote_state_message(node_name, FALSE);
 284 
 285     
 286     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
 287     create_node_state_update(node, node_update_cluster, update, __func__);
 288     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
 289     if (call_id < 0) {
 290         crm_perror(LOG_ERR, "%s CIB node state update", node_name);
 291     }
 292     free_xml(update);
 293 }
 294 
 295 
 296 
 297 
 298 
 299 
 300 
 301 static void
 302 check_remote_node_state(const remote_ra_cmd_t *cmd)
     
 303 {
 304     
 305     if (!pcmk__result_ok(&(cmd->result))) {
 306         return;
 307     }
 308 
 309     if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) {
 310         remote_node_up(cmd->rsc_id);
 311 
 312     } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
 313         
 314 
 315 
 316 
 317 
 318 
 319 
 320         crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
 321 
 322         CRM_CHECK(node != NULL, return);
 323         pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
 324 
 325     } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 326         lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
 327         remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
 328 
 329         if (ra_data) {
 330             if (ra_data->migrate_status != takeover_complete) {
 331                 
 332                 remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
 333             } else if (AM_I_DC == FALSE) {
 334                 
 335 
 336 
 337 
 338                 crm_remote_peer_cache_remove(cmd->rsc_id);
 339             }
 340         }
 341     }
 342 
 343     
 344 
 345 
 346 
 347 
 348 
 349 
 350 
 351 
 352 }
 353 
 354 static void
 355 report_remote_ra_result(remote_ra_cmd_t * cmd)
     
 356 {
 357     lrmd_event_data_t op = { 0, };
 358 
 359     check_remote_node_state(cmd);
 360 
 361     op.type = lrmd_event_exec_complete;
 362     op.rsc_id = cmd->rsc_id;
 363     op.op_type = cmd->action;
 364     op.user_data = cmd->userdata;
 365     op.timeout = cmd->timeout;
 366     op.interval_ms = cmd->interval_ms;
 367     op.t_run = (unsigned int) cmd->start_time;
 368     op.t_rcchange = (unsigned int) cmd->start_time;
 369 
 370     lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
 371                      cmd->result.exit_reason);
 372 
 373     if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) {
 374         op.t_rcchange = (unsigned int) time(NULL);
 375         
 376 
 377 
 378 
 379 
 380 
 381 
 382 
 383 
 384         if (op.t_rcchange == op.t_run) {
 385             op.t_rcchange++;
 386         }
 387     }
 388 
 389     if (cmd->params) {
 390         lrmd_key_value_t *tmp;
 391 
 392         op.params = pcmk__strkey_table(free, free);
 393         for (tmp = cmd->params; tmp; tmp = tmp->next) {
 394             g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
 395         }
 396 
 397     }
 398     op.call_id = cmd->call_id;
 399     op.remote_nodename = cmd->owner;
 400 
 401     lrm_op_callback(&op);
 402 
 403     if (op.params) {
 404         g_hash_table_destroy(op.params);
 405     }
 406     lrmd__reset_result(&op);
 407 }
 408 
 409 static void
 410 update_remaining_timeout(remote_ra_cmd_t * cmd)
     
 411 {
 412     cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
 413 }
 414 
 415 static gboolean
 416 retry_start_cmd_cb(gpointer data)
     
 417 {
 418     lrm_state_t *lrm_state = data;
 419     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 420     remote_ra_cmd_t *cmd = NULL;
 421     int rc = ETIME;
 422 
 423     if (!ra_data || !ra_data->cur_cmd) {
 424         return FALSE;
 425     }
 426     cmd = ra_data->cur_cmd;
 427     if (!pcmk__strcase_any_of(cmd->action, "start", "migrate_from", NULL)) {
 428         return FALSE;
 429     }
 430     update_remaining_timeout(cmd);
 431 
 432     if (cmd->remaining_timeout > 0) {
 433         rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
 434     } else {
 435         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 436                          PCMK_EXEC_TIMEOUT,
 437                          "Not enough time remains to retry remote connection");
 438     }
 439 
 440     if (rc != pcmk_rc_ok) {
 441         report_remote_ra_result(cmd);
 442 
 443         if (ra_data->cmds) {
 444             mainloop_set_trigger(ra_data->work);
 445         }
 446         ra_data->cur_cmd = NULL;
 447         free_cmd(cmd);
 448     } else {
 449         
 450     }
 451 
 452     return FALSE;
 453 }
 454 
 455 
 456 static gboolean
 457 connection_takeover_timeout_cb(gpointer data)
     
 458 {
 459     lrm_state_t *lrm_state = NULL;
 460     remote_ra_cmd_t *cmd = data;
 461 
 462     crm_info("takeover event timed out for node %s", cmd->rsc_id);
 463     cmd->takeover_timeout_id = 0;
 464 
 465     lrm_state = lrm_state_find(cmd->rsc_id);
 466 
 467     handle_remote_ra_stop(lrm_state, cmd);
 468     free_cmd(cmd);
 469 
 470     return FALSE;
 471 }
 472 
 473 static gboolean
 474 monitor_timeout_cb(gpointer data)
     
 475 {
 476     lrm_state_t *lrm_state = NULL;
 477     remote_ra_cmd_t *cmd = data;
 478 
 479     lrm_state = lrm_state_find(cmd->rsc_id);
 480 
 481     crm_info("Timed out waiting for remote poke response from %s%s",
 482              cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
 483     cmd->monitor_timeout_id = 0;
 484     pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
 485                      "Remote executor did not respond");
 486 
 487     if (lrm_state && lrm_state->remote_ra_data) {
 488         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 489 
 490         if (ra_data->cur_cmd == cmd) {
 491             ra_data->cur_cmd = NULL;
 492         }
 493         if (ra_data->cmds) {
 494             mainloop_set_trigger(ra_data->work);
 495         }
 496     }
 497 
 498     report_remote_ra_result(cmd);
 499     free_cmd(cmd);
 500 
 501     if(lrm_state) {
 502         lrm_state_disconnect(lrm_state);
 503     }
 504     return FALSE;
 505 }
 506 
 507 static void
 508 synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
     
 509 {
 510     lrmd_event_data_t op = { 0, };
 511 
 512     if (lrm_state == NULL) {
 513         
 514         lrm_state = lrm_state_find(fsa_our_uname);
 515     }
 516     CRM_ASSERT(lrm_state != NULL);
 517 
 518     op.type = lrmd_event_exec_complete;
 519     op.rsc_id = rsc_id;
 520     op.op_type = op_type;
 521     op.t_run = (unsigned int) time(NULL);
 522     op.t_rcchange = op.t_run;
 523     op.call_id = generate_callid();
 524     lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 525     process_lrm_event(lrm_state, &op, NULL, NULL);
 526 }
 527 
 528 void
 529 remote_lrm_op_callback(lrmd_event_data_t * op)
     
 530 {
 531     gboolean cmd_handled = FALSE;
 532     lrm_state_t *lrm_state = NULL;
 533     remote_ra_data_t *ra_data = NULL;
 534     remote_ra_cmd_t *cmd = NULL;
 535 
 536     crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
 537               "(%d) status=%s (%d)",
 538               (op->op_type? op->op_type : ""), (op->op_type? " " : ""),
 539               lrmd_event_type2str(op->type), op->remote_nodename,
 540               services_ocf_exitcode_str(op->rc), op->rc,
 541               pcmk_exec_status_str(op->op_status), op->op_status);
 542 
 543     lrm_state = lrm_state_find(op->remote_nodename);
 544     if (!lrm_state || !lrm_state->remote_ra_data) {
 545         crm_debug("No state information found for remote connection event");
 546         return;
 547     }
 548     ra_data = lrm_state->remote_ra_data;
 549 
 550     if (op->type == lrmd_event_new_client) {
 551         
 552 
 553         if (ra_data->migrate_status == expect_takeover) {
 554             
 555             ra_data->migrate_status = takeover_complete;
 556 
 557         } else {
 558             crm_err("Disconnecting from Pacemaker Remote node %s due to "
 559                     "unexpected client takeover", op->remote_nodename);
 560             
 561             
 562             
 563             lrm_state_disconnect_only(lrm_state);
 564         }
 565         return;
 566     }
 567 
 568     
 569     if (op->type == lrmd_event_exec_complete) {
 570         if (ra_data->migrate_status == takeover_complete) {
 571             crm_debug("ignoring event, this connection is taken over by another node");
 572         } else {
 573             lrm_op_callback(op);
 574         }
 575         return;
 576     }
 577 
 578     if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
 579 
 580         if (ra_data->active == FALSE) {
 581             crm_debug("Disconnection from Pacemaker Remote node %s complete",
 582                       lrm_state->node_name);
 583 
 584         } else if (!remote_ra_is_in_maintenance(lrm_state)) {
 585             crm_err("Lost connection to Pacemaker Remote node %s",
 586                     lrm_state->node_name);
 587             ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
 588             ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
 589 
 590         } else {
 591             crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
 592                        lrm_state->node_name);
 593             
 594             handle_remote_ra_stop(lrm_state, NULL);
 595             remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
 596             
 597             synthesize_lrmd_success(NULL, lrm_state->node_name, "stop");
 598         }
 599         return;
 600     }
 601 
 602     if (!ra_data->cur_cmd) {
 603         crm_debug("no event to match");
 604         return;
 605     }
 606 
 607     cmd = ra_data->cur_cmd;
 608 
 609     
 610 
 611     if (op->type == lrmd_event_connect && pcmk__strcase_any_of(cmd->action, "start",
 612                                                                "migrate_from", NULL)) {
 613         if (op->connection_rc < 0) {
 614             update_remaining_timeout(cmd);
 615 
 616             if ((op->connection_rc == -ENOKEY)
 617                 || (op->connection_rc == -EKEYREJECTED)) {
 618                 
 619                 pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
 620                                  PCMK_EXEC_ERROR,
 621                                  pcmk_strerror(op->connection_rc));
 622 
 623             } else if (cmd->remaining_timeout > 3000) {
 624                 crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
 625                 g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
 626                 return;
 627 
 628             } else {
 629                 crm_trace("can't reschedule start, remaining timeout too small %d",
 630                           cmd->remaining_timeout);
 631                 pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 632                                     PCMK_EXEC_TIMEOUT,
 633                                     "%s without enough time to retry",
 634                                     pcmk_strerror(op->connection_rc));
 635             }
 636 
 637         } else {
 638             lrm_state_reset_tables(lrm_state, TRUE);
 639             pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 640             ra_data->active = TRUE;
 641         }
 642 
 643         crm_debug("Remote connection event matched %s action", cmd->action);
 644         report_remote_ra_result(cmd);
 645         cmd_handled = TRUE;
 646 
 647     } else if (op->type == lrmd_event_poke && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 648 
 649         if (cmd->monitor_timeout_id) {
 650             g_source_remove(cmd->monitor_timeout_id);
 651             cmd->monitor_timeout_id = 0;
 652         }
 653 
 654         
 655 
 656 
 657         if (!cmd->reported_success) {
 658             pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 659             report_remote_ra_result(cmd);
 660             cmd->reported_success = 1;
 661         }
 662 
 663         crm_debug("Remote poke event matched %s action", cmd->action);
 664 
 665         
 666         if (cmd->interval_ms && (cmd->cancel == FALSE)) {
 667             ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
 668             cmd->interval_id = g_timeout_add(cmd->interval_ms,
 669                                              recurring_helper, cmd);
 670             cmd = NULL;         
 671         }
 672         cmd_handled = TRUE;
 673 
 674     } else if (op->type == lrmd_event_disconnect && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 675         if (ra_data->active == TRUE && (cmd->cancel == FALSE)) {
 676             pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 677                              PCMK_EXEC_ERROR,
 678                              "Remote connection unexpectedly dropped "
 679                              "during monitor");
 680             report_remote_ra_result(cmd);
 681             crm_err("Remote connection to %s unexpectedly dropped during monitor",
 682                     lrm_state->node_name);
 683         }
 684         cmd_handled = TRUE;
 685 
 686     } else if (op->type == lrmd_event_new_client && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) {
 687 
 688         handle_remote_ra_stop(lrm_state, cmd);
 689         cmd_handled = TRUE;
 690 
 691     } else {
 692         crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
 693     }
 694 
 695     if (cmd_handled) {
 696         ra_data->cur_cmd = NULL;
 697         if (ra_data->cmds) {
 698             mainloop_set_trigger(ra_data->work);
 699         }
 700         free_cmd(cmd);
 701     }
 702 }
 703 
 704 static void
 705 handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
     
 706 {
 707     remote_ra_data_t *ra_data = NULL;
 708 
 709     CRM_ASSERT(lrm_state);
 710     ra_data = lrm_state->remote_ra_data;
 711 
 712     if (ra_data->migrate_status != takeover_complete) {
 713         
 714         g_hash_table_remove_all(lrm_state->pending_ops);
 715     } else {
 716         
 717 
 718         lrm_state_reset_tables(lrm_state, FALSE);
 719     }
 720 
 721     ra_data->active = FALSE;
 722     lrm_state_disconnect(lrm_state);
 723 
 724     if (ra_data->cmds) {
 725         g_list_free_full(ra_data->cmds, free_cmd);
 726     }
 727     if (ra_data->recurring_cmds) {
 728         g_list_free_full(ra_data->recurring_cmds, free_cmd);
 729     }
 730     ra_data->cmds = NULL;
 731     ra_data->recurring_cmds = NULL;
 732     ra_data->cur_cmd = NULL;
 733 
 734     if (cmd) {
 735         pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 736         report_remote_ra_result(cmd);
 737     }
 738 }
 739 
 740 
 741 static int
 742 handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
     
 743 {
 744     const char *server = NULL;
 745     lrmd_key_value_t *tmp = NULL;
 746     int port = 0;
 747     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 748     int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
 749     int rc = pcmk_rc_ok;
 750 
 751     for (tmp = cmd->params; tmp; tmp = tmp->next) {
 752         if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
 753                                  XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
 754             server = tmp->value;
 755         } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
 756             port = atoi(tmp->value);
 757         } else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
 758             ra_data->controlling_guest = TRUE;
 759         }
 760     }
 761 
 762     rc = controld_connect_remote_executor(lrm_state, server, port,
 763                                           timeout_used);
 764     if (rc != pcmk_rc_ok) {
 765         pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 766                             PCMK_EXEC_ERROR,
 767                             "Could not connect to Pacemaker Remote node %s: %s",
 768                             lrm_state->node_name, pcmk_rc_str(rc));
 769     }
 770     return rc;
 771 }
 772 
 773 static gboolean
 774 handle_remote_ra_exec(gpointer user_data)
     
 775 {
 776     int rc = 0;
 777     lrm_state_t *lrm_state = user_data;
 778     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 779     remote_ra_cmd_t *cmd;
 780     GList *first = NULL;
 781 
 782     if (ra_data->cur_cmd) {
 783         
 784         return TRUE;
 785     }
 786 
 787     while (ra_data->cmds) {
 788         first = ra_data->cmds;
 789         cmd = first->data;
 790         if (cmd->delay_id) {
 791             
 792             return TRUE;
 793         }
 794 
 795         ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
 796         g_list_free_1(first);
 797 
 798         if (!strcmp(cmd->action, "start") || !strcmp(cmd->action, "migrate_from")) {
 799             ra_data->migrate_status = 0;
 800             if (handle_remote_ra_start(lrm_state, cmd,
 801                                        cmd->timeout) == pcmk_rc_ok) {
 802                 
 803                 crm_debug("Initiated async remote connection, %s action will complete after connect event",
 804                           cmd->action);
 805                 ra_data->cur_cmd = cmd;
 806                 return TRUE;
 807             }
 808             report_remote_ra_result(cmd);
 809 
 810         } else if (!strcmp(cmd->action, "monitor")) {
 811 
 812             if (lrm_state_is_connected(lrm_state) == TRUE) {
 813                 rc = lrm_state_poke_connection(lrm_state);
 814                 if (rc < 0) {
 815                     pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 816                                      PCMK_EXEC_ERROR, pcmk_strerror(rc));
 817                 }
 818             } else {
 819                 rc = -1;
 820                 pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
 821                                  PCMK_EXEC_DONE, "Remote connection inactive");
 822             }
 823 
 824             if (rc == 0) {
 825                 crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
 826                           cmd->rsc_id);
 827                 ra_data->cur_cmd = cmd;
 828                 cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
 829                 return TRUE;
 830             }
 831             report_remote_ra_result(cmd);
 832 
 833         } else if (!strcmp(cmd->action, "stop")) {
 834 
 835             if (ra_data->migrate_status == expect_takeover) {
 836                 
 837 
 838 
 839 
 840 
 841 
 842                 cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
 843                 ra_data->cur_cmd = cmd;
 844                 return TRUE;
 845             }
 846 
 847             handle_remote_ra_stop(lrm_state, cmd);
 848 
 849         } else if (!strcmp(cmd->action, "migrate_to")) {
 850             ra_data->migrate_status = expect_takeover;
 851             pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 852             report_remote_ra_result(cmd);
 853         } else if (pcmk__str_any_of(cmd->action, CRMD_ACTION_RELOAD,
 854                                     CRMD_ACTION_RELOAD_AGENT, NULL))  {
 855             
 856 
 857 
 858 
 859 
 860 
 861 
 862 
 863 
 864             pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 865             report_remote_ra_result(cmd);
 866         }
 867 
 868         free_cmd(cmd);
 869     }
 870 
 871     return TRUE;
 872 }
 873 
 874 static void
 875 remote_ra_data_init(lrm_state_t * lrm_state)
     
 876 {
 877     remote_ra_data_t *ra_data = NULL;
 878 
 879     if (lrm_state->remote_ra_data) {
 880         return;
 881     }
 882 
 883     ra_data = calloc(1, sizeof(remote_ra_data_t));
 884     ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
 885     lrm_state->remote_ra_data = ra_data;
 886 }
 887 
 888 void
 889 remote_ra_cleanup(lrm_state_t * lrm_state)
     
 890 {
 891     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
 892 
 893     if (!ra_data) {
 894         return;
 895     }
 896 
 897     if (ra_data->cmds) {
 898         g_list_free_full(ra_data->cmds, free_cmd);
 899     }
 900 
 901     if (ra_data->recurring_cmds) {
 902         g_list_free_full(ra_data->recurring_cmds, free_cmd);
 903     }
 904     mainloop_destroy_trigger(ra_data->work);
 905     free(ra_data);
 906     lrm_state->remote_ra_data = NULL;
 907 }
 908 
 909 gboolean
 910 is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
     
 911 {
 912     if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
 913         return TRUE;
 914     }
 915     if (id && lrm_state_find(id) && !pcmk__str_eq(id, fsa_our_uname, pcmk__str_casei)) {
 916         return TRUE;
 917     }
 918 
 919     return FALSE;
 920 }
 921 
 922 lrmd_rsc_info_t *
 923 remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
     
 924 {
 925     lrmd_rsc_info_t *info = NULL;
 926 
 927     if ((lrm_state_find(rsc_id))) {
 928         info = calloc(1, sizeof(lrmd_rsc_info_t));
 929 
 930         info->id = strdup(rsc_id);
 931         info->type = strdup(REMOTE_LRMD_RA);
 932         info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
 933         info->provider = strdup("pacemaker");
 934     }
 935 
 936     return info;
 937 }
 938 
 939 static gboolean
 940 is_remote_ra_supported_action(const char *action)
     
 941 {
 942     return pcmk__str_any_of(action,
 943                             CRMD_ACTION_START,
 944                             CRMD_ACTION_STOP,
 945                             CRMD_ACTION_STATUS,
 946                             CRMD_ACTION_MIGRATE,
 947                             CRMD_ACTION_MIGRATED,
 948                             CRMD_ACTION_RELOAD_AGENT,
 949                             CRMD_ACTION_RELOAD,
 950                             NULL);
 951 }
 952 
 953 static GList *
 954 fail_all_monitor_cmds(GList * list)
     
 955 {
 956     GList *rm_list = NULL;
 957     remote_ra_cmd_t *cmd = NULL;
 958     GList *gIter = NULL;
 959 
 960     for (gIter = list; gIter != NULL; gIter = gIter->next) {
 961         cmd = gIter->data;
 962         if ((cmd->interval_ms > 0) && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
 963             rm_list = g_list_append(rm_list, cmd);
 964         }
 965     }
 966 
 967     for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
 968         cmd = gIter->data;
 969 
 970         pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
 971                          PCMK_EXEC_ERROR, "Lost connection to remote executor");
 972         crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
 973                   cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
 974         report_remote_ra_result(cmd);
 975 
 976         list = g_list_remove(list, cmd);
 977         free_cmd(cmd);
 978     }
 979 
 980     
 981     g_list_free(rm_list);
 982     return list;
 983 }
 984 
 985 static GList *
 986 remove_cmd(GList * list, const char *action, guint interval_ms)
     
 987 {
 988     remote_ra_cmd_t *cmd = NULL;
 989     GList *gIter = NULL;
 990 
 991     for (gIter = list; gIter != NULL; gIter = gIter->next) {
 992         cmd = gIter->data;
 993         if ((cmd->interval_ms == interval_ms)
 994             && pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
 995             break;
 996         }
 997         cmd = NULL;
 998     }
 999     if (cmd) {
1000         list = g_list_remove(list, cmd);
1001         free_cmd(cmd);
1002     }
1003     return list;
1004 }
1005 
1006 int
1007 remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
     
1008                  const char *action, guint interval_ms)
1009 {
1010     lrm_state_t *connection_rsc = NULL;
1011     remote_ra_data_t *ra_data = NULL;
1012 
1013     connection_rsc = lrm_state_find(rsc_id);
1014     if (!connection_rsc || !connection_rsc->remote_ra_data) {
1015         return -EINVAL;
1016     }
1017 
1018     ra_data = connection_rsc->remote_ra_data;
1019     ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
1020     ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
1021                                          interval_ms);
1022     if (ra_data->cur_cmd &&
1023         (ra_data->cur_cmd->interval_ms == interval_ms) &&
1024         (pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
1025 
1026         ra_data->cur_cmd->cancel = TRUE;
1027     }
1028 
1029     return 0;
1030 }
1031 
1032 static remote_ra_cmd_t *
1033 handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
     
1034                    const char *userdata)
1035 {
1036     GList *gIter = NULL;
1037     remote_ra_cmd_t *cmd = NULL;
1038 
1039     
1040 
1041 
1042 
1043 
1044 
1045     if (interval_ms == 0) {
1046         return NULL;
1047     }
1048 
1049     if (ra_data->cur_cmd &&
1050         ra_data->cur_cmd->cancel == FALSE &&
1051         (ra_data->cur_cmd->interval_ms == interval_ms) &&
1052         pcmk__str_eq(ra_data->cur_cmd->action, "monitor", pcmk__str_casei)) {
1053 
1054         cmd = ra_data->cur_cmd;
1055         goto handle_dup;
1056     }
1057 
1058     for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
1059         cmd = gIter->data;
1060         if ((cmd->interval_ms == interval_ms)
1061             && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1062             goto handle_dup;
1063         }
1064     }
1065 
1066     for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
1067         cmd = gIter->data;
1068         if ((cmd->interval_ms == interval_ms)
1069             && pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) {
1070             goto handle_dup;
1071         }
1072     }
1073 
1074     return NULL;
1075 
1076 handle_dup:
1077 
1078     crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
1079               cmd->rsc_id, "monitor", interval_ms);
1080 
1081     
1082     if (userdata) {
1083        free(cmd->userdata);
1084        cmd->userdata = strdup(userdata);
1085     }
1086 
1087     
1088     if (cmd->reported_success) {
1089         cmd->start_time = time(NULL);
1090         cmd->call_id = generate_callid();
1091         cmd->reported_success = 0;
1092     }
1093 
1094     
1095 
1096 
1097     if (cmd->interval_id) {
1098         g_source_remove(cmd->interval_id);
1099         cmd->interval_id = 0;
1100         recurring_helper(cmd);
1101     }
1102 
1103     return cmd;
1104 }
1105 
1106 
1107 
1108 
1109 
1110 
1111 
1112 
1113 
1114 
1115 
1116 
1117 
1118 
1119 
1120 
1121 
1122 
1123 
1124 int
1125 controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
     
1126                               const char *action, const char *userdata,
1127                               guint interval_ms, int timeout_ms,
1128                               int start_delay_ms, lrmd_key_value_t *params,
1129                               int *call_id)
1130 {
1131     lrm_state_t *connection_rsc = NULL;
1132     remote_ra_cmd_t *cmd = NULL;
1133     remote_ra_data_t *ra_data = NULL;
1134 
1135     *call_id = 0;
1136 
1137     CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
1138               && (userdata != NULL) && (call_id != NULL),
1139               lrmd_key_value_freeall(params); return EINVAL);
1140 
1141     if (!is_remote_ra_supported_action(action)) {
1142         lrmd_key_value_freeall(params);
1143         return EOPNOTSUPP;
1144     }
1145 
1146     connection_rsc = lrm_state_find(rsc_id);
1147     if (connection_rsc == NULL) {
1148         lrmd_key_value_freeall(params);
1149         return ENOTCONN;
1150     }
1151 
1152     remote_ra_data_init(connection_rsc);
1153     ra_data = connection_rsc->remote_ra_data;
1154 
1155     cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
1156     if (cmd) {
1157         *call_id = cmd->call_id;
1158         lrmd_key_value_freeall(params);
1159         return pcmk_rc_ok;
1160     }
1161 
1162     cmd = calloc(1, sizeof(remote_ra_cmd_t));
1163     if (cmd == NULL) {
1164         lrmd_key_value_freeall(params);
1165         return ENOMEM;
1166     }
1167 
1168     cmd->owner = strdup(lrm_state->node_name);
1169     cmd->rsc_id = strdup(rsc_id);
1170     cmd->action = strdup(action);
1171     cmd->userdata = strdup(userdata);
1172     if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
1173         || (cmd->userdata == NULL)) {
1174         free_cmd(cmd);
1175         lrmd_key_value_freeall(params);
1176         return ENOMEM;
1177     }
1178 
1179     cmd->interval_ms = interval_ms;
1180     cmd->timeout = timeout_ms;
1181     cmd->start_delay = start_delay_ms;
1182     cmd->params = params;
1183     cmd->start_time = time(NULL);
1184 
1185     cmd->call_id = generate_callid();
1186 
1187     if (cmd->start_delay) {
1188         cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
1189     }
1190 
1191     ra_data->cmds = g_list_append(ra_data->cmds, cmd);
1192     mainloop_set_trigger(ra_data->work);
1193 
1194     *call_id = cmd->call_id;
1195     return pcmk_rc_ok;
1196 }
1197 
1198 
1199 
1200 
1201 
1202 
1203 
1204 void
1205 remote_ra_fail(const char *node_name)
     
1206 {
1207     lrm_state_t *lrm_state = lrm_state_find(node_name);
1208 
1209     if (lrm_state && lrm_state_is_connected(lrm_state)) {
1210         remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1211 
1212         crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
1213         ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
1214         ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
1215     }
1216 }
1217 
1218 
1219 
1220 
1221 
1222 
1223 
1224 
1225 
1226 
1227 
1228 
1229 #define XPATH_PSEUDO_FENCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1230     "[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
1231     "/" XML_CIB_TAG_NODE
1232 
1233 
1234 
1235 
1236 
1237 
1238 
1239 void
1240 remote_ra_process_pseudo(xmlNode *xml)
     
1241 {
1242     xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
1243 
1244     if (numXpathResults(search) == 1) {
1245         xmlNode *result = getXpathResult(search, 0);
1246 
1247         
1248 
1249 
1250 
1251 
1252 
1253 
1254 
1255 
1256 
1257 
1258 
1259 
1260 
1261         if (result) {
1262             const char *remote = ID(result);
1263 
1264             if (remote) {
1265                 remote_node_down(remote, DOWN_ERASE_LRM);
1266             }
1267         }
1268     }
1269     freeXpathObject(search);
1270 }
1271 
1272 static void
1273 remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
     
1274 {
1275     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1276     xmlNode *update, *state;
1277     int call_opt, call_id = 0;
1278     crm_node_t *node;
1279 
1280     call_opt = crmd_cib_smart_opt();
1281     node = crm_remote_peer_get(lrm_state->node_name);
1282     CRM_CHECK(node != NULL, return);
1283     update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
1284     state = create_node_state_update(node, node_update_none, update,
1285                                      __func__);
1286     crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
1287     fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, call_id, NULL);
1288     if (call_id < 0) {
1289         crm_perror(LOG_WARNING, "%s CIB node state update failed", lrm_state->node_name);
1290     } else {
1291         
1292         ra_data->is_maintenance = maintenance;
1293     }
1294     free_xml(update);
1295 }
1296 
1297 #define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
1298     "[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
1299     XML_GRAPH_TAG_MAINTENANCE
1300 
1301 
1302 
1303 
1304 
1305 
1306 
1307 void
1308 remote_ra_process_maintenance_nodes(xmlNode *xml)
     
1309 {
1310     xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
1311 
1312     if (numXpathResults(search) == 1) {
1313         xmlNode *node;
1314         int cnt = 0, cnt_remote = 0;
1315 
1316         for (node =
1317                 first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
1318             node != NULL; node = pcmk__xml_next(node)) {
1319             lrm_state_t *lrm_state = lrm_state_find(ID(node));
1320 
1321             cnt++;
1322             if (lrm_state && lrm_state->remote_ra_data &&
1323                 ((remote_ra_data_t *) lrm_state->remote_ra_data)->active) {
1324                 int is_maint;
1325 
1326                 cnt_remote++;
1327                 pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
1328                                    &is_maint, 0);
1329                 remote_ra_maintenance(lrm_state, is_maint);
1330             }
1331         }
1332         crm_trace("Action holds %d nodes (%d remotes found) "
1333                     "adjusting maintenance-mode", cnt, cnt_remote);
1334     }
1335     freeXpathObject(search);
1336 }
1337 
1338 gboolean
1339 remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
     
1340 {
1341     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1342 
1343     return ra_data->is_maintenance;
1344 }
1345 
1346 gboolean
1347 remote_ra_controlling_guest(lrm_state_t * lrm_state)
     
1348 {
1349     remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
1350 
1351     return ra_data->controlling_guest;
1352 }