root/daemons/controld/controld_schedulerd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. controld_shutdown_schedulerd_ipc
  2. save_cib_contents
  3. handle_disconnect
  4. handle_reply
  5. scheduler_event_callback
  6. new_schedulerd_ipc_connection
  7. do_pe_control
  8. controld_sched_timeout
  9. controld_stop_sched_timer
  10. controld_expect_sched_reply
  11. controld_free_sched_timer
  12. do_pe_invoke
  13. force_local_option
  14. do_pe_invoke_callback

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <unistd.h>  /* pid_t, sleep, ssize_t */
  13 
  14 #include <crm/cib.h>
  15 #include <crm/cluster.h>
  16 #include <crm/common/xml.h>
  17 #include <crm/crm.h>
  18 #include <crm/common/xml_internal.h>
  19 #include <crm/common/ipc.h>
  20 #include <crm/common/ipc_schedulerd.h>
  21 
  22 #include <pacemaker-controld.h>
  23 
  24 static void handle_disconnect(void);
  25 
  26 static pcmk_ipc_api_t *schedulerd_api = NULL;
  27 
  28 /*!
  29  * \internal
  30  * \brief Close any scheduler connection and free associated memory
  31  */
  32 void
  33 controld_shutdown_schedulerd_ipc(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  34 {
  35     controld_clear_fsa_input_flags(R_PE_REQUIRED);
  36     pcmk_disconnect_ipc(schedulerd_api);
  37     handle_disconnect();
  38 
  39     pcmk_free_ipc_api(schedulerd_api);
  40     schedulerd_api = NULL;
  41 }
  42 
  43 /*!
  44  * \internal
  45  * \brief Save CIB query result to file, raising FSA error
  46  *
  47  * \param[in] msg        Ignored
  48  * \param[in] call_id    Call ID of CIB query
  49  * \param[in] rc         Return code of CIB query
  50  * \param[in] output     Result of CIB query
  51  * \param[in] user_data  Unique identifier for filename
  52  *
  53  * \note This is intended to be called after a scheduler connection fails.
  54  */
  55 static void
  56 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
     /* [previous][next][first][last][top][bottom][index][help] */
  57                   void *user_data)
  58 {
  59     const char *id = user_data;
  60 
  61     register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
  62     CRM_CHECK(id != NULL, return);
  63 
  64     if (rc == pcmk_ok) {
  65         char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id);
  66 
  67         if (pcmk__xml_write_file(output, filename, true, NULL) != pcmk_rc_ok) {
  68             crm_err("Could not save Cluster Information Base to %s after scheduler crash",
  69                     filename);
  70         } else {
  71             crm_notice("Saved Cluster Information Base to %s after scheduler crash",
  72                        filename);
  73         }
  74         free(filename);
  75     }
  76 }
  77 
  78 /*!
  79  * \internal
  80  * \brief Respond to scheduler connection failure
  81  */
  82 static void
  83 handle_disconnect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  84 {
  85     // If we aren't connected to the scheduler, we can't expect a reply
  86     controld_expect_sched_reply(NULL);
  87 
  88     if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
  89         int rc = pcmk_ok;
  90         char *uuid_str = crm_generate_uuid();
  91 
  92         crm_crit("Lost connection to the scheduler "
  93                  CRM_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2",
  94                  uuid_str);
  95 
  96         /*
  97          * The scheduler died...
  98          *
  99          * Save the current CIB so that we have a chance of
 100          * figuring out what killed it.
 101          *
 102          * Delay raising the I_ERROR until the query below completes or
 103          * 5s is up, whichever comes first.
 104          *
 105          */
 106         rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
 107                                                     NULL, NULL,
 108                                                     cib_scope_local);
 109         fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
 110     }
 111 
 112     controld_clear_fsa_input_flags(R_PE_CONNECTED);
 113     controld_trigger_fsa();
 114     return;
 115 }
 116 
 117 static void
 118 handle_reply(pcmk_schedulerd_api_reply_t *reply)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     const char *msg_ref = NULL;
 121 
 122     if (!AM_I_DC) {
 123         return;
 124     }
 125 
 126     msg_ref = reply->data.graph.reference;
 127 
 128     if (msg_ref == NULL) {
 129         crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
 130 
 131     } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
 132                             pcmk__str_none)) {
 133         ha_msg_input_t fsa_input;
 134         xmlNode *crm_data_node;
 135 
 136         controld_stop_sched_timer();
 137 
 138         /* do_te_invoke (which will eventually process the fsa_input we are constructing
 139          * here) requires that fsa_input.xml be non-NULL.  That will only happen if
 140          * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
 141          * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
 142          * gave us the values we need, we just need to put them into XML.
 143          *
 144          * The name of the top level element here is irrelevant.  Nothing checks it.
 145          */
 146         fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
 147         crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
 148         crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
 149                     reply->data.graph.input);
 150 
 151         crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
 152         pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
 153         register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
 154 
 155         free_xml(fsa_input.msg);
 156 
 157     } else {
 158         crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
 159     }
 160 }
 161 
 162 static void
 163 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
     /* [previous][next][first][last][top][bottom][index][help] */
 164                          crm_exit_t status, void *event_data, void *user_data)
 165 {
 166     pcmk_schedulerd_api_reply_t *reply = event_data;
 167 
 168     switch (event_type) {
 169         case pcmk_ipc_event_disconnect:
 170             handle_disconnect();
 171             break;
 172 
 173         case pcmk_ipc_event_reply:
 174             handle_reply(reply);
 175             break;
 176 
 177         default:
 178             break;
 179     }
 180 }
 181 
 182 static bool
 183 new_schedulerd_ipc_connection(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 184 {
 185     int rc;
 186 
 187     controld_set_fsa_input_flags(R_PE_REQUIRED);
 188 
 189     if (schedulerd_api == NULL) {
 190         rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
 191 
 192         if (rc != pcmk_rc_ok) {
 193             crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
 194             return false;
 195         }
 196     }
 197 
 198     pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
 199 
 200     rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
 201     if (rc != pcmk_rc_ok) {
 202         crm_err("Error connecting to %s: %s",
 203                 pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
 204         return false;
 205     }
 206 
 207     controld_set_fsa_input_flags(R_PE_CONNECTED);
 208     return true;
 209 }
 210 
 211 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
 212                                   xmlNode *output, void *user_data);
 213 
 214 /*       A_PE_START, A_PE_STOP, O_PE_RESTART    */
 215 void
 216 do_pe_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 217               enum crmd_fsa_cause cause,
 218               enum crmd_fsa_state cur_state,
 219               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 220 {
 221     if (pcmk_is_set(action, A_PE_STOP)) {
 222         controld_clear_fsa_input_flags(R_PE_REQUIRED);
 223         pcmk_disconnect_ipc(schedulerd_api);
 224         handle_disconnect();
 225     }
 226     if (pcmk_is_set(action, A_PE_START)
 227         && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
 228 
 229         if (cur_state == S_STOPPING) {
 230             crm_info("Ignoring request to connect to scheduler while shutting down");
 231 
 232         } else if (!new_schedulerd_ipc_connection()) {
 233             crm_warn("Could not connect to scheduler");
 234             register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 235         }
 236     }
 237 }
 238 
 239 static int fsa_pe_query = 0;
 240 static mainloop_timer_t *controld_sched_timer = NULL;
 241 
 242 // @TODO Make this a configurable cluster option if there's demand for it
 243 #define SCHED_TIMEOUT_MS (120000)
 244 
 245 /*!
 246  * \internal
 247  * \brief Handle a timeout waiting for scheduler reply
 248  *
 249  * \param[in] user_data  Ignored
 250  *
 251  * \return FALSE (indicating that timer should not be restarted)
 252  */
 253 static gboolean
 254 controld_sched_timeout(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 255 {
 256     if (AM_I_DC) {
 257         /* If this node is the DC but can't communicate with the scheduler, just
 258          * exit (and likely get fenced) so this node doesn't interfere with any
 259          * further DC elections.
 260          *
 261          * @TODO We could try something less drastic first, like disconnecting
 262          * and reconnecting to the scheduler, but something is likely going
 263          * seriously wrong, so perhaps it's better to just fail as quickly as
 264          * possible.
 265          */
 266         crmd_exit(CRM_EX_FATAL);
 267     }
 268     return FALSE;
 269 }
 270 
 271 void
 272 controld_stop_sched_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274     if ((controld_sched_timer != NULL)
 275         && (controld_globals.fsa_pe_ref != NULL)) {
 276         crm_trace("Stopping timer for scheduler reply %s",
 277                   controld_globals.fsa_pe_ref);
 278     }
 279     mainloop_timer_stop(controld_sched_timer);
 280 }
 281 
 282 /*!
 283  * \internal
 284  * \brief Set the scheduler request currently being waited on
 285  *
 286  * \param[in] ref  Request to expect reply to (or NULL for none)
 287  *
 288  * \note This function takes ownership of \p ref.
 289  */
 290 void
 291 controld_expect_sched_reply(char *ref)
     /* [previous][next][first][last][top][bottom][index][help] */
 292 {
 293     if (ref) {
 294         if (controld_sched_timer == NULL) {
 295             controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
 296                                                       SCHED_TIMEOUT_MS, FALSE,
 297                                                       controld_sched_timeout,
 298                                                       NULL);
 299         }
 300         mainloop_timer_start(controld_sched_timer);
 301     } else {
 302         controld_stop_sched_timer();
 303     }
 304     free(controld_globals.fsa_pe_ref);
 305     controld_globals.fsa_pe_ref = ref;
 306 }
 307 
 308 /*!
 309  * \internal
 310  * \brief Free the scheduler reply timer
 311  */
 312 void
 313 controld_free_sched_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 314 {
 315     if (controld_sched_timer != NULL) {
 316         mainloop_timer_del(controld_sched_timer);
 317         controld_sched_timer = NULL;
 318     }
 319 }
 320 
 321 /*       A_PE_INVOKE    */
 322 void
 323 do_pe_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 324              enum crmd_fsa_cause cause,
 325              enum crmd_fsa_state cur_state,
 326              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 327 {
 328     cib_t *cib_conn = controld_globals.cib_conn;
 329 
 330     if (AM_I_DC == FALSE) {
 331         crm_err("Not invoking scheduler because not DC: %s",
 332                 fsa_action2string(action));
 333         return;
 334     }
 335 
 336     if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
 337         if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 338             crm_err("Cannot shut down gracefully without the scheduler");
 339             register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
 340 
 341         } else {
 342             crm_info("Waiting for the scheduler to connect");
 343             crmd_fsa_stall(FALSE);
 344             controld_set_fsa_action_flags(A_PE_START);
 345             controld_trigger_fsa();
 346         }
 347         return;
 348     }
 349 
 350     if (cur_state != S_POLICY_ENGINE) {
 351         crm_notice("Not invoking scheduler because in state %s",
 352                    fsa_state2string(cur_state));
 353         return;
 354     }
 355     if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
 356         crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
 357 
 358         /* start the join from scratch */
 359         register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
 360         return;
 361     }
 362 
 363     fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local);
 364 
 365     crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
 366               fsa_state2string(controld_globals.fsa_state));
 367 
 368     controld_expect_sched_reply(NULL);
 369     fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
 370 }
 371 
 372 static void
 373 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375     int max = 0;
 376     int lpc = 0;
 377     const char *xpath_base = NULL;
 378     char *xpath_string = NULL;
 379     xmlXPathObjectPtr xpathObj = NULL;
 380 
 381     xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
 382     if (xpath_base == NULL) {
 383         crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
 384         return;
 385     }
 386 
 387     xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
 388                                      xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
 389                                      attr_name);
 390     xpathObj = xpath_search(xml, xpath_string);
 391     max = numXpathResults(xpathObj);
 392     free(xpath_string);
 393 
 394     for (lpc = 0; lpc < max; lpc++) {
 395         xmlNode *match = getXpathResult(xpathObj, lpc);
 396         crm_trace("Forcing %s/%s = %s",
 397                   pcmk__xe_id(match), attr_name, attr_value);
 398         crm_xml_add(match, PCMK_XA_VALUE, attr_value);
 399     }
 400 
 401     if(max == 0) {
 402         xmlNode *configuration = NULL;
 403         xmlNode *crm_config = NULL;
 404         xmlNode *cluster_property_set = NULL;
 405 
 406         crm_trace("Creating %s-%s for %s=%s",
 407                   PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
 408                   attr_value);
 409 
 410         configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
 411                                              NULL);
 412         if (configuration == NULL) {
 413             configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
 414         }
 415 
 416         crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
 417                                           NULL, NULL);
 418         if (crm_config == NULL) {
 419             crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
 420         }
 421 
 422         cluster_property_set =
 423             pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
 424                                  NULL);
 425         if (cluster_property_set == NULL) {
 426             cluster_property_set =
 427                 pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
 428             crm_xml_add(cluster_property_set, PCMK_XA_ID,
 429                         PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
 430         }
 431 
 432         xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
 433 
 434         crm_xml_set_id(xml, "%s-%s",
 435                        PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
 436         crm_xml_add(xml, PCMK_XA_NAME, attr_name);
 437         crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
 438     }
 439     freeXpathObject(xpathObj);
 440 }
 441 
 442 static void
 443 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 444 {
 445     char *ref = NULL;
 446     pid_t watchdog = pcmk__locate_sbd();
 447 
 448     if (rc != pcmk_ok) {
 449         crm_err("Could not retrieve the Cluster Information Base: %s "
 450                 CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
 451         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
 452         return;
 453 
 454     } else if (call_id != fsa_pe_query) {
 455         crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
 456         return;
 457 
 458     } else if (!AM_I_DC
 459                || !pcmk_is_set(controld_globals.fsa_input_register,
 460                                R_PE_CONNECTED)) {
 461         crm_debug("No need to invoke the scheduler anymore");
 462         return;
 463 
 464     } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
 465         crm_debug("Discarding scheduler request in state: %s",
 466                   fsa_state2string(controld_globals.fsa_state));
 467         return;
 468 
 469     /* this callback counts as 1 */
 470     } else if (num_cib_op_callbacks() > 1) {
 471         crm_debug("Re-asking for the CIB: %d other peer updates still pending",
 472                   (num_cib_op_callbacks() - 1));
 473         sleep(1);
 474         controld_set_fsa_action_flags(A_PE_INVOKE);
 475         controld_trigger_fsa();
 476         return;
 477     }
 478 
 479     CRM_LOG_ASSERT(output != NULL);
 480 
 481     /* Refresh the remote node cache and the known node cache when the
 482      * scheduler is invoked */
 483     pcmk__refresh_node_caches_from_cib(output);
 484 
 485     crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
 486     pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
 487                            pcmk_is_set(controld_globals.flags,
 488                                        controld_has_quorum));
 489 
 490     force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
 491 
 492     if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
 493         && !crm_have_quorum) {
 494         crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
 495     }
 496 
 497     rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref);
 498     if (rc != pcmk_rc_ok) {
 499         free(ref);
 500         crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d",
 501                 pcmk_rc_str(rc), rc);
 502         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
 503     } else {
 504         pcmk__assert(ref != NULL);
 505         controld_expect_sched_reply(ref);
 506         crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
 507                   "quorate=%s",
 508                   fsa_pe_query, controld_globals.fsa_pe_ref, crm_peer_seq,
 509                   pcmk__flag_text(controld_globals.flags, controld_has_quorum));
 510     }
 511 }

/* [previous][next][first][last][top][bottom][index][help] */