root/daemons/controld/controld_schedulerd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. controld_shutdown_schedulerd_ipc
  2. save_cib_contents
  3. handle_disconnect
  4. handle_reply
  5. scheduler_event_callback
  6. new_schedulerd_ipc_connection
  7. do_pe_control
  8. controld_sched_timeout
  9. controld_stop_sched_timer
  10. controld_expect_sched_reply
  11. controld_free_sched_timer
  12. do_pe_invoke
  13. force_local_option
  14. do_pe_invoke_callback

   1 /*
   2  * Copyright 2004-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <unistd.h>  /* pid_t, sleep, ssize_t */
  13 
  14 #include <crm/cib.h>
  15 #include <crm/cluster.h>
  16 #include <crm/common/xml.h>
  17 #include <crm/crm.h>
  18 #include <crm/common/xml_internal.h>
  19 #include <crm/common/ipc.h>
  20 #include <crm/common/ipc_schedulerd.h>
  21 
  22 #include <libxml/xpath.h>               // xmlXPathObject, etc.
  23 
  24 #include <pacemaker-controld.h>
  25 
  26 static void handle_disconnect(void);
  27 
  28 static pcmk_ipc_api_t *schedulerd_api = NULL;
  29 
  30 /*!
  31  * \internal
  32  * \brief Close any scheduler connection and free associated memory
  33  */
  34 void
  35 controld_shutdown_schedulerd_ipc(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  36 {
  37     controld_clear_fsa_input_flags(R_PE_REQUIRED);
  38     pcmk_disconnect_ipc(schedulerd_api);
  39     handle_disconnect();
  40 
  41     pcmk_free_ipc_api(schedulerd_api);
  42     schedulerd_api = NULL;
  43 }
  44 
  45 /*!
  46  * \internal
  47  * \brief Save CIB query result to file, raising FSA error
  48  *
  49  * \param[in] msg        Ignored
  50  * \param[in] call_id    Call ID of CIB query
  51  * \param[in] rc         Return code of CIB query
  52  * \param[in] output     Result of CIB query
  53  * \param[in] user_data  Unique identifier for filename
  54  *
  55  * \note This is intended to be called after a scheduler connection fails.
  56  */
  57 static void
  58 save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output,
     /* [previous][next][first][last][top][bottom][index][help] */
  59                   void *user_data)
  60 {
  61     const char *id = user_data;
  62 
  63     register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
  64     CRM_CHECK(id != NULL, return);
  65 
  66     if (rc == pcmk_ok) {
  67         char *filename = crm_strdup_printf(PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2", id);
  68 
  69         if (pcmk__xml_write_file(output, filename, true) != pcmk_rc_ok) {
  70             crm_err("Could not save Cluster Information Base to %s after scheduler crash",
  71                     filename);
  72         } else {
  73             crm_notice("Saved Cluster Information Base to %s after scheduler crash",
  74                        filename);
  75         }
  76         free(filename);
  77     }
  78 }
  79 
  80 /*!
  81  * \internal
  82  * \brief Respond to scheduler connection failure
  83  */
  84 static void
  85 handle_disconnect(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  86 {
  87     // If we aren't connected to the scheduler, we can't expect a reply
  88     controld_expect_sched_reply(NULL);
  89 
  90     if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) {
  91         int rc = pcmk_ok;
  92         char *uuid_str = crm_generate_uuid();
  93 
  94         crm_crit("Lost connection to the scheduler "
  95                  QB_XS " CIB will be saved to " PCMK_SCHEDULER_INPUT_DIR "/pe-core-%s.bz2",
  96                  uuid_str);
  97 
  98         /*
  99          * The scheduler died...
 100          *
 101          * Save the current CIB so that we have a chance of
 102          * figuring out what killed it.
 103          *
 104          * Delay raising the I_ERROR until the query below completes or
 105          * 5s is up, whichever comes first.
 106          *
 107          */
 108         rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn,
 109                                                     NULL, NULL, cib_none);
 110         fsa_register_cib_callback(rc, uuid_str, save_cib_contents);
 111     }
 112 
 113     controld_clear_fsa_input_flags(R_PE_CONNECTED);
 114     controld_trigger_fsa();
 115     return;
 116 }
 117 
 118 static void
 119 handle_reply(pcmk_schedulerd_api_reply_t *reply)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121     const char *msg_ref = NULL;
 122 
 123     if (!AM_I_DC) {
 124         return;
 125     }
 126 
 127     msg_ref = reply->data.graph.reference;
 128 
 129     if (msg_ref == NULL) {
 130         crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC);
 131 
 132     } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
 133                             pcmk__str_none)) {
 134         ha_msg_input_t fsa_input;
 135         xmlNode *crm_data_node;
 136 
 137         controld_stop_sched_timer();
 138 
 139         /* do_te_invoke (which will eventually process the fsa_input we are constructing
 140          * here) requires that fsa_input.xml be non-NULL.  That will only happen if
 141          * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the
 142          * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function
 143          * gave us the values we need, we just need to put them into XML.
 144          *
 145          * The name of the top level element here is irrelevant.  Nothing checks it.
 146          */
 147         fsa_input.msg = pcmk__xe_create(NULL, "dummy-reply");
 148         crm_xml_add(fsa_input.msg, PCMK_XA_REFERENCE, msg_ref);
 149         crm_xml_add(fsa_input.msg, PCMK__XA_CRM_TGRAPH_IN,
 150                     reply->data.graph.input);
 151 
 152         crm_data_node = pcmk__xe_create(fsa_input.msg, PCMK__XE_CRM_XML);
 153         pcmk__xml_copy(crm_data_node, reply->data.graph.tgraph);
 154         register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
 155 
 156         pcmk__xml_free(fsa_input.msg);
 157 
 158     } else {
 159         crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref);
 160     }
 161 }
 162 
 163 static void
 164 scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
     /* [previous][next][first][last][top][bottom][index][help] */
 165                          crm_exit_t status, void *event_data, void *user_data)
 166 {
 167     pcmk_schedulerd_api_reply_t *reply = event_data;
 168 
 169     switch (event_type) {
 170         case pcmk_ipc_event_disconnect:
 171             handle_disconnect();
 172             break;
 173 
 174         case pcmk_ipc_event_reply:
 175             handle_reply(reply);
 176             break;
 177 
 178         default:
 179             break;
 180     }
 181 }
 182 
 183 static bool
 184 new_schedulerd_ipc_connection(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 185 {
 186     int rc;
 187 
 188     controld_set_fsa_input_flags(R_PE_REQUIRED);
 189 
 190     if (schedulerd_api == NULL) {
 191         rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd);
 192 
 193         if (rc != pcmk_rc_ok) {
 194             crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc));
 195             return false;
 196         }
 197     }
 198 
 199     pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
 200 
 201     rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api, pcmk_ipc_dispatch_main, 3);
 202     if (rc != pcmk_rc_ok) {
 203         crm_err("Error connecting to %s: %s",
 204                 pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
 205         return false;
 206     }
 207 
 208     controld_set_fsa_input_flags(R_PE_CONNECTED);
 209     return true;
 210 }
 211 
 212 static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc,
 213                                   xmlNode *output, void *user_data);
 214 
 215 /*       A_PE_START, A_PE_STOP, O_PE_RESTART    */
 216 void
 217 do_pe_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 218               enum crmd_fsa_cause cause,
 219               enum crmd_fsa_state cur_state,
 220               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 221 {
 222     if (pcmk_is_set(action, A_PE_STOP)) {
 223         controld_clear_fsa_input_flags(R_PE_REQUIRED);
 224         pcmk_disconnect_ipc(schedulerd_api);
 225         handle_disconnect();
 226     }
 227     if (pcmk_is_set(action, A_PE_START)
 228         && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
 229 
 230         if (cur_state == S_STOPPING) {
 231             crm_info("Ignoring request to connect to scheduler while shutting down");
 232 
 233         } else if (!new_schedulerd_ipc_connection()) {
 234             crm_warn("Could not connect to scheduler");
 235             register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 236         }
 237     }
 238 }
 239 
 240 static int fsa_pe_query = 0;
 241 static mainloop_timer_t *controld_sched_timer = NULL;
 242 
 243 // @TODO Make this a configurable cluster option if there's demand for it
 244 #define SCHED_TIMEOUT_MS (120000)
 245 
 246 /*!
 247  * \internal
 248  * \brief Handle a timeout waiting for scheduler reply
 249  *
 250  * \param[in] user_data  Ignored
 251  *
 252  * \return FALSE (indicating that timer should not be restarted)
 253  */
 254 static gboolean
 255 controld_sched_timeout(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 256 {
 257     if (AM_I_DC) {
 258         /* If this node is the DC but can't communicate with the scheduler, just
 259          * exit (and likely get fenced) so this node doesn't interfere with any
 260          * further DC elections.
 261          *
 262          * @TODO We could try something less drastic first, like disconnecting
 263          * and reconnecting to the scheduler, but something is likely going
 264          * seriously wrong, so perhaps it's better to just fail as quickly as
 265          * possible.
 266          */
 267         crmd_exit(CRM_EX_FATAL);
 268     }
 269     return FALSE;
 270 }
 271 
 272 void
 273 controld_stop_sched_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 274 {
 275     if ((controld_sched_timer != NULL)
 276         && (controld_globals.fsa_pe_ref != NULL)) {
 277         crm_trace("Stopping timer for scheduler reply %s",
 278                   controld_globals.fsa_pe_ref);
 279     }
 280     mainloop_timer_stop(controld_sched_timer);
 281 }
 282 
 283 /*!
 284  * \internal
 285  * \brief Set the scheduler request currently being waited on
 286  *
 287  * \param[in] ref  Request to expect reply to (or NULL for none)
 288  *
 289  * \note This function takes ownership of \p ref.
 290  */
 291 void
 292 controld_expect_sched_reply(char *ref)
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294     if (ref) {
 295         if (controld_sched_timer == NULL) {
 296             controld_sched_timer = mainloop_timer_add("scheduler_reply_timer",
 297                                                       SCHED_TIMEOUT_MS, FALSE,
 298                                                       controld_sched_timeout,
 299                                                       NULL);
 300         }
 301         mainloop_timer_start(controld_sched_timer);
 302     } else {
 303         controld_stop_sched_timer();
 304     }
 305     free(controld_globals.fsa_pe_ref);
 306     controld_globals.fsa_pe_ref = ref;
 307 }
 308 
 309 /*!
 310  * \internal
 311  * \brief Free the scheduler reply timer
 312  */
 313 void
 314 controld_free_sched_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 315 {
 316     if (controld_sched_timer != NULL) {
 317         mainloop_timer_del(controld_sched_timer);
 318         controld_sched_timer = NULL;
 319     }
 320 }
 321 
 322 /*       A_PE_INVOKE    */
 323 void
 324 do_pe_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 325              enum crmd_fsa_cause cause,
 326              enum crmd_fsa_state cur_state,
 327              enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 328 {
 329     cib_t *cib_conn = controld_globals.cib_conn;
 330 
 331     if (AM_I_DC == FALSE) {
 332         crm_err("Not invoking scheduler because not DC: %s",
 333                 fsa_action2string(action));
 334         return;
 335     }
 336 
 337     if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) {
 338         if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 339             crm_err("Cannot shut down gracefully without the scheduler");
 340             register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL);
 341 
 342         } else {
 343             crm_info("Waiting for the scheduler to connect");
 344             crmd_fsa_stall(FALSE);
 345             controld_set_fsa_action_flags(A_PE_START);
 346             controld_trigger_fsa();
 347         }
 348         return;
 349     }
 350 
 351     if (cur_state != S_POLICY_ENGINE) {
 352         crm_notice("Not invoking scheduler because in state %s",
 353                    fsa_state2string(cur_state));
 354         return;
 355     }
 356     if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
 357         crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!");
 358 
 359         /* start the join from scratch */
 360         register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL);
 361         return;
 362     }
 363 
 364     fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_none);
 365 
 366     crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query,
 367               fsa_state2string(controld_globals.fsa_state));
 368 
 369     controld_expect_sched_reply(NULL);
 370     fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback);
 371 }
 372 
 373 static void
 374 force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value)
     /* [previous][next][first][last][top][bottom][index][help] */
 375 {
 376     int max = 0;
 377     int lpc = 0;
 378     const char *xpath_base = NULL;
 379     char *xpath_string = NULL;
 380     xmlXPathObject *xpathObj = NULL;
 381 
 382     xpath_base = pcmk_cib_xpath_for(PCMK_XE_CRM_CONFIG);
 383     if (xpath_base == NULL) {
 384         crm_err(PCMK_XE_CRM_CONFIG " CIB element not known (bug?)");
 385         return;
 386     }
 387 
 388     xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']",
 389                                      xpath_base, PCMK_XE_CLUSTER_PROPERTY_SET,
 390                                      attr_name);
 391     xpathObj = pcmk__xpath_search(xml->doc, xpath_string);
 392     max = pcmk__xpath_num_results(xpathObj);
 393     free(xpath_string);
 394 
 395     for (lpc = 0; lpc < max; lpc++) {
 396         xmlNode *match = pcmk__xpath_result(xpathObj, lpc);
 397 
 398         if (match == NULL) {
 399             continue;
 400         }
 401         crm_trace("Forcing %s/%s = %s",
 402                   pcmk__xe_id(match), attr_name, attr_value);
 403         crm_xml_add(match, PCMK_XA_VALUE, attr_value);
 404     }
 405 
 406     if(max == 0) {
 407         xmlNode *configuration = NULL;
 408         xmlNode *crm_config = NULL;
 409         xmlNode *cluster_property_set = NULL;
 410 
 411         crm_trace("Creating %s-%s for %s=%s",
 412                   PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name, attr_name,
 413                   attr_value);
 414 
 415         configuration = pcmk__xe_first_child(xml, PCMK_XE_CONFIGURATION, NULL,
 416                                              NULL);
 417         if (configuration == NULL) {
 418             configuration = pcmk__xe_create(xml, PCMK_XE_CONFIGURATION);
 419         }
 420 
 421         crm_config = pcmk__xe_first_child(configuration, PCMK_XE_CRM_CONFIG,
 422                                           NULL, NULL);
 423         if (crm_config == NULL) {
 424             crm_config = pcmk__xe_create(configuration, PCMK_XE_CRM_CONFIG);
 425         }
 426 
 427         cluster_property_set =
 428             pcmk__xe_first_child(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
 429                                  NULL);
 430         if (cluster_property_set == NULL) {
 431             cluster_property_set =
 432                 pcmk__xe_create(crm_config, PCMK_XE_CLUSTER_PROPERTY_SET);
 433             crm_xml_add(cluster_property_set, PCMK_XA_ID,
 434                         PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS);
 435         }
 436 
 437         xml = pcmk__xe_create(cluster_property_set, PCMK_XE_NVPAIR);
 438 
 439         pcmk__xe_set_id(xml, "%s-%s",
 440                         PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, attr_name);
 441         crm_xml_add(xml, PCMK_XA_NAME, attr_name);
 442         crm_xml_add(xml, PCMK_XA_VALUE, attr_value);
 443     }
 444     xmlXPathFreeObject(xpathObj);
 445 }
 446 
 447 static void
 448 do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 449 {
 450     char *ref = NULL;
 451     pid_t watchdog = pcmk__locate_sbd();
 452 
 453     if (rc != pcmk_ok) {
 454         crm_err("Could not retrieve the Cluster Information Base: %s "
 455                 QB_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id);
 456         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
 457         return;
 458 
 459     } else if (call_id != fsa_pe_query) {
 460         crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query);
 461         return;
 462 
 463     } else if (!AM_I_DC
 464                || !pcmk_is_set(controld_globals.fsa_input_register,
 465                                R_PE_CONNECTED)) {
 466         crm_debug("No need to invoke the scheduler anymore");
 467         return;
 468 
 469     } else if (controld_globals.fsa_state != S_POLICY_ENGINE) {
 470         crm_debug("Discarding scheduler request in state: %s",
 471                   fsa_state2string(controld_globals.fsa_state));
 472         return;
 473 
 474     /* this callback counts as 1 */
 475     } else if (num_cib_op_callbacks() > 1) {
 476         crm_debug("Re-asking for the CIB: %d other peer updates still pending",
 477                   (num_cib_op_callbacks() - 1));
 478         sleep(1);
 479         controld_set_fsa_action_flags(A_PE_INVOKE);
 480         controld_trigger_fsa();
 481         return;
 482     }
 483 
 484     CRM_LOG_ASSERT(output != NULL);
 485 
 486     /* Refresh the remote node cache and the known node cache when the
 487      * scheduler is invoked */
 488     pcmk__refresh_node_caches_from_cib(output);
 489 
 490     crm_xml_add(output, PCMK_XA_DC_UUID, controld_globals.our_uuid);
 491     pcmk__xe_set_bool_attr(output, PCMK_XA_HAVE_QUORUM,
 492                            pcmk_is_set(controld_globals.flags,
 493                                        controld_has_quorum));
 494 
 495     force_local_option(output, PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog));
 496 
 497     if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum)
 498         && !pcmk__cluster_has_quorum()) {
 499 
 500         crm_xml_add_int(output, PCMK_XA_NO_QUORUM_PANIC, 1);
 501     }
 502 
 503     rc = pcmk_schedulerd_api_graph(schedulerd_api, output, &ref);
 504     if (rc != pcmk_rc_ok) {
 505         free(ref);
 506         crm_err("Could not contact the scheduler: %s " QB_XS " rc=%d",
 507                 pcmk_rc_str(rc), rc);
 508         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
 509     } else {
 510         pcmk__assert(ref != NULL);
 511         controld_expect_sched_reply(ref);
 512         crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, "
 513                   "quorate=%s",
 514                   fsa_pe_query, controld_globals.fsa_pe_ref,
 515                   controld_globals.peer_seq,
 516                   pcmk__flag_text(controld_globals.flags, controld_has_quorum));
 517     }
 518 }

/* [previous][next][first][last][top][bottom][index][help] */