root/lib/pengine/common.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. check_placement_strategy
  2. pe_metadata
  3. verify_pe_options
  4. pe_pref
  5. fail2text
  6. text2task
  7. task2text
  8. role2text
  9. text2role
  10. add_hash_param
  11. pe_node_attribute_calculated
  12. pe_node_attribute_raw

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/crm.h>
  12 #include <crm/msg_xml.h>
  13 #include <crm/common/xml.h>
  14 #include <crm/common/util.h>
  15 
  16 #include <glib.h>
  17 
  18 #include <crm/pengine/internal.h>
  19 
  20 gboolean was_processing_error = FALSE;
  21 gboolean was_processing_warning = FALSE;
  22 
  23 static bool
  24 check_placement_strategy(const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
  25 {
  26     return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
  27                            "balanced", NULL);
  28 }
  29 
  30 static pcmk__cluster_option_t pe_opts[] = {
  31     /* name, old name, type, allowed values,
  32      * default value, validator,
  33      * short description,
  34      * long description
  35      */
  36     {
  37         "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
  38         "stop", pcmk__valid_quorum,
  39         N_("What to do when the cluster does not have quorum"),
  40         NULL
  41     },
  42     {
  43         "symmetric-cluster", NULL, "boolean", NULL,
  44         "true", pcmk__valid_boolean,
  45         N_("Whether resources can run on any node by default"),
  46         NULL
  47     },
  48     {
  49         "maintenance-mode", NULL, "boolean", NULL,
  50         "false", pcmk__valid_boolean,
  51         N_("Whether the cluster should refrain from monitoring, starting, "
  52             "and stopping resources"),
  53         NULL
  54     },
  55     {
  56         "start-failure-is-fatal", NULL, "boolean", NULL,
  57         "true", pcmk__valid_boolean,
  58         N_("Whether a start failure should prevent a resource from being "
  59             "recovered on the same node"),
  60         N_("When true, the cluster will immediately ban a resource from a node "
  61             "if it fails to start there. When false, the cluster will instead "
  62             "check the resource's fail count against its migration-threshold.")
  63     },
  64     {
  65         "enable-startup-probes", NULL, "boolean", NULL,
  66         "true", pcmk__valid_boolean,
  67         N_("Whether the cluster should check for active resources during start-up"),
  68         NULL
  69     },
  70     {
  71         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
  72         "false", pcmk__valid_boolean,
  73         N_("Whether to lock resources to a cleanly shut down node"),
  74         N_("When true, resources active on a node when it is cleanly shut down "
  75             "are kept \"locked\" to that node (not allowed to run elsewhere) "
  76             "until they start again on that node after it rejoins (or for at "
  77             "most shutdown-lock-limit, if set). Stonith resources and "
  78             "Pacemaker Remote connections are never locked. Clone and bundle "
  79             "instances and the promoted role of promotable clones are "
  80             "currently never locked, though support could be added in a future "
  81             "release.")
  82     },
  83     {
  84         XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
  85         "0", pcmk__valid_interval_spec,
  86         N_("Do not lock resources to a cleanly shut down node longer than "
  87            "this"),
  88         N_("If shutdown-lock is true and this is set to a nonzero time "
  89             "duration, shutdown locks will expire after this much time has "
  90             "passed since the shutdown was initiated, even if the node has not "
  91             "rejoined.")
  92     },
  93 
  94     // Fencing-related options
  95     {
  96         "stonith-enabled", NULL, "boolean", NULL,
  97         "true", pcmk__valid_boolean,
  98         N_("*** Advanced Use Only *** "
  99             "Whether nodes may be fenced as part of recovery"),
 100         N_("If false, unresponsive nodes are immediately assumed to be harmless, "
 101             "and resources that were active on them may be recovered "
 102             "elsewhere. This can result in a \"split-brain\" situation, "
 103             "potentially leading to data loss and/or service unavailability.")
 104     },
 105     {
 106         "stonith-action", NULL, "select", "reboot, off, poweroff",
 107         "reboot", pcmk__is_fencing_action,
 108         N_("Action to send to fence device when a node needs to be fenced "
 109             "(\"poweroff\" is a deprecated alias for \"off\")"),
 110         NULL
 111     },
 112     {
 113         "stonith-timeout", NULL, "time", NULL,
 114         "60s", pcmk__valid_interval_spec,
 115         N_("*** Advanced Use Only *** Unused by Pacemaker"),
 116         N_("This value is not used by Pacemaker, but is kept for backward "
 117             "compatibility, and certain legacy fence agents might use it.")
 118     },
 119     {
 120         XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
 121         "false", pcmk__valid_boolean,
 122         N_("Whether watchdog integration is enabled"),
 123         N_("This is set automatically by the cluster according to whether SBD "
 124             "is detected to be in use. User-configured values are ignored. "
 125             "The value `true` is meaningful if diskless SBD is used and "
 126             "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
 127             "is required, watchdog-based self-fencing will be performed via "
 128             "SBD without requiring a fencing resource explicitly configured.")
 129     },
 130     {
 131         "concurrent-fencing", NULL, "boolean", NULL,
 132         PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
 133         N_("Allow performing fencing operations in parallel"),
 134         NULL
 135     },
 136     {
 137         "startup-fencing", NULL, "boolean", NULL,
 138         "true", pcmk__valid_boolean,
 139         N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"),
 140         N_("Setting this to false may lead to a \"split-brain\" situation,"
 141             "potentially leading to data loss and/or service unavailability.")
 142     },
 143     {
 144         XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
 145         "0", pcmk__valid_interval_spec,
 146         N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"),
 147         N_("Apply specified delay for the fencings that are targeting the lost "
 148             "nodes with the highest total resource priority in case we don't "
 149             "have the majority of the nodes in our cluster partition, so that "
 150             "the more significant nodes potentially win any fencing match, "
 151             "which is especially meaningful under split-brain of 2-node "
 152             "cluster. A promoted resource instance takes the base priority + 1 "
 153             "on calculation if the base priority is not 0. Any static/random "
 154             "delays that are introduced by `pcmk_delay_base/max` configured "
 155             "for the corresponding fencing resources will be added to this "
 156             "delay. This delay should be significantly greater than, safely "
 157             "twice, the maximum `pcmk_delay_base/max`. By default, priority "
 158             "fencing delay is disabled.")
 159     },
 160 
 161     {
 162         "cluster-delay", NULL, "time", NULL,
 163         "60s", pcmk__valid_interval_spec,
 164         N_("Maximum time for node-to-node communication"),
 165         N_("The node elected Designated Controller (DC) will consider an action "
 166             "failed if it does not get a response from the node executing the "
 167             "action within this time (after considering the action's own "
 168             "timeout). The \"correct\" value will depend on the speed and "
 169             "load of your network and cluster nodes.")
 170     },
 171     {
 172         "batch-limit", NULL, "integer", NULL,
 173         "0", pcmk__valid_number,
 174         N_("Maximum number of jobs that the cluster may execute in parallel "
 175             "across all nodes"),
 176         N_("The \"correct\" value will depend on the speed and load of your "
 177             "network and cluster nodes. If set to 0, the cluster will "
 178             "impose a dynamically calculated limit when any node has a "
 179             "high load.")
 180     },
 181     {
 182         "migration-limit", NULL, "integer", NULL,
 183         "-1", pcmk__valid_number,
 184         N_("The number of live migration actions that the cluster is allowed "
 185             "to execute in parallel on a node (-1 means no limit)")
 186     },
 187 
 188     /* Orphans and stopping */
 189     {
 190         "stop-all-resources", NULL, "boolean", NULL,
 191         "false", pcmk__valid_boolean,
 192         N_("Whether the cluster should stop all active resources"),
 193         NULL
 194     },
 195     {
 196         "stop-orphan-resources", NULL, "boolean", NULL,
 197         "true", pcmk__valid_boolean,
 198         N_("Whether to stop resources that were removed from the configuration"),
 199         NULL
 200     },
 201     {
 202         "stop-orphan-actions", NULL, "boolean", NULL,
 203         "true", pcmk__valid_boolean,
 204         N_("Whether to cancel recurring actions removed from the configuration"),
 205         NULL
 206     },
 207     {
 208         "remove-after-stop", NULL, "boolean", NULL,
 209         "false", pcmk__valid_boolean,
 210         N_("*** Deprecated *** Whether to remove stopped resources from "
 211             "the executor"),
 212         N_("Values other than default are poorly tested and potentially dangerous."
 213             " This option will be removed in a future release.")
 214     },
 215 
 216     /* Storing inputs */
 217     {
 218         "pe-error-series-max", NULL, "integer", NULL,
 219         "-1", pcmk__valid_number,
 220         N_("The number of scheduler inputs resulting in errors to save"),
 221         N_("Zero to disable, -1 to store unlimited.")
 222     },
 223     {
 224         "pe-warn-series-max",  NULL, "integer", NULL,
 225         "5000", pcmk__valid_number,
 226         N_("The number of scheduler inputs resulting in warnings to save"),
 227         N_("Zero to disable, -1 to store unlimited.")
 228     },
 229     {
 230         "pe-input-series-max", NULL, "integer", NULL,
 231         "4000", pcmk__valid_number,
 232         N_("The number of scheduler inputs without errors or warnings to save"),
 233         N_("Zero to disable, -1 to store unlimited.")
 234     },
 235 
 236     /* Node health */
 237     {
 238         PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select",
 239         PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", "
 240             PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", "
 241             PCMK__VALUE_CUSTOM,
 242         PCMK__VALUE_NONE, pcmk__validate_health_strategy,
 243         N_("How cluster should react to node health attributes"),
 244         N_("Requires external entities to create node attributes (named with "
 245             "the prefix \"#health\") with values \"red\", "
 246             "\"yellow\", or \"green\".")
 247     },
 248     {
 249         PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL,
 250         "0", pcmk__valid_number,
 251         N_("Base health score assigned to a node"),
 252         N_("Only used when \"node-health-strategy\" is set to \"progressive\".")
 253     },
 254     {
 255         PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL,
 256         "0", pcmk__valid_number,
 257         N_("The score to use for a node health attribute whose value is \"green\""),
 258         N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
 259     },
 260     {
 261         PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL,
 262         "0", pcmk__valid_number,
 263         N_("The score to use for a node health attribute whose value is \"yellow\""),
 264         N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
 265     },
 266     {
 267         PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL,
 268         "-INFINITY", pcmk__valid_number,
 269         N_("The score to use for a node health attribute whose value is \"red\""),
 270         N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
 271     },
 272 
 273     /*Placement Strategy*/
 274     {
 275         "placement-strategy", NULL, "select",
 276         "default, utilization, minimal, balanced",
 277         "default", check_placement_strategy,
 278         N_("How the cluster should allocate resources to nodes"),
 279         NULL
 280     },
 281 };
 282 
 283 void
 284 pe_metadata(pcmk__output_t *out)
     /* [previous][next][first][last][top][bottom][index][help] */
 285 {
 286     const char *desc_short = "Pacemaker scheduler options";
 287     const char *desc_long = "Cluster options used by Pacemaker's scheduler";
 288 
 289     gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short,
 290                                             desc_long, pe_opts,
 291                                             PCMK__NELEM(pe_opts));
 292     out->output_xml(out, "metadata", s);
 293     g_free(s);
 294 }
 295 
 296 void
 297 verify_pe_options(GHashTable * options)
     /* [previous][next][first][last][top][bottom][index][help] */
 298 {
 299     pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
 300 }
 301 
 302 const char *
 303 pe_pref(GHashTable * options, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 304 {
 305     return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
 306 }
 307 
 308 const char *
 309 fail2text(enum action_fail_response fail)
     /* [previous][next][first][last][top][bottom][index][help] */
 310 {
 311     const char *result = "<unknown>";
 312 
 313     switch (fail) {
 314         case action_fail_ignore:
 315             result = "ignore";
 316             break;
 317         case action_fail_demote:
 318             result = "demote";
 319             break;
 320         case action_fail_block:
 321             result = "block";
 322             break;
 323         case action_fail_recover:
 324             result = "recover";
 325             break;
 326         case action_fail_migrate:
 327             result = "migrate";
 328             break;
 329         case action_fail_stop:
 330             result = "stop";
 331             break;
 332         case action_fail_fence:
 333             result = "fence";
 334             break;
 335         case action_fail_standby:
 336             result = "standby";
 337             break;
 338         case action_fail_restart_container:
 339             result = "restart-container";
 340             break;
 341         case action_fail_reset_remote:
 342             result = "reset-remote";
 343             break;
 344     }
 345     return result;
 346 }
 347 
 348 enum action_tasks
 349 text2task(const char *task)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351     if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
 352         return stop_rsc;
 353     } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
 354         return stopped_rsc;
 355     } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
 356         return start_rsc;
 357     } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
 358         return started_rsc;
 359     } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
 360         return shutdown_crm;
 361     } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
 362         return stonith_node;
 363     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
 364         return monitor_rsc;
 365     } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) {
 366         return action_notify;
 367     } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
 368         return action_notified;
 369     } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
 370         return action_promote;
 371     } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
 372         return action_demote;
 373     } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
 374         return action_promoted;
 375     } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
 376         return action_demoted;
 377     }
 378 #if SUPPORT_TRACING
 379     if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) {
 380         return no_action;
 381     } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) {
 382         return no_action;
 383     } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
 384         return no_action;
 385     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
 386         return no_action;
 387     } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
 388         return no_action;
 389     }
 390     crm_trace("Unsupported action: %s", task);
 391 #endif
 392 
 393     return no_action;
 394 }
 395 
 396 const char *
 397 task2text(enum action_tasks task)
     /* [previous][next][first][last][top][bottom][index][help] */
 398 {
 399     const char *result = "<unknown>";
 400 
 401     switch (task) {
 402         case no_action:
 403             result = "no_action";
 404             break;
 405         case stop_rsc:
 406             result = CRMD_ACTION_STOP;
 407             break;
 408         case stopped_rsc:
 409             result = CRMD_ACTION_STOPPED;
 410             break;
 411         case start_rsc:
 412             result = CRMD_ACTION_START;
 413             break;
 414         case started_rsc:
 415             result = CRMD_ACTION_STARTED;
 416             break;
 417         case shutdown_crm:
 418             result = CRM_OP_SHUTDOWN;
 419             break;
 420         case stonith_node:
 421             result = CRM_OP_FENCE;
 422             break;
 423         case monitor_rsc:
 424             result = CRMD_ACTION_STATUS;
 425             break;
 426         case action_notify:
 427             result = CRMD_ACTION_NOTIFY;
 428             break;
 429         case action_notified:
 430             result = CRMD_ACTION_NOTIFIED;
 431             break;
 432         case action_promote:
 433             result = CRMD_ACTION_PROMOTE;
 434             break;
 435         case action_promoted:
 436             result = CRMD_ACTION_PROMOTED;
 437             break;
 438         case action_demote:
 439             result = CRMD_ACTION_DEMOTE;
 440             break;
 441         case action_demoted:
 442             result = CRMD_ACTION_DEMOTED;
 443             break;
 444     }
 445 
 446     return result;
 447 }
 448 
 449 const char *
 450 role2text(enum rsc_role_e role)
     /* [previous][next][first][last][top][bottom][index][help] */
 451 {
 452     switch (role) {
 453         case RSC_ROLE_UNKNOWN:
 454             return RSC_ROLE_UNKNOWN_S;
 455         case RSC_ROLE_STOPPED:
 456             return RSC_ROLE_STOPPED_S;
 457         case RSC_ROLE_STARTED:
 458             return RSC_ROLE_STARTED_S;
 459         case RSC_ROLE_UNPROMOTED:
 460 #ifdef PCMK__COMPAT_2_0
 461             return RSC_ROLE_UNPROMOTED_LEGACY_S;
 462 #else
 463             return RSC_ROLE_UNPROMOTED_S;
 464 #endif
 465         case RSC_ROLE_PROMOTED:
 466 #ifdef PCMK__COMPAT_2_0
 467             return RSC_ROLE_PROMOTED_LEGACY_S;
 468 #else
 469             return RSC_ROLE_PROMOTED_S;
 470 #endif
 471     }
 472     CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
 473     CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
 474     // coverity[dead_error_line]
 475     return RSC_ROLE_UNKNOWN_S;
 476 }
 477 
 478 enum rsc_role_e
 479 text2role(const char *role)
     /* [previous][next][first][last][top][bottom][index][help] */
 480 {
 481     CRM_ASSERT(role != NULL);
 482     if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
 483         return RSC_ROLE_STOPPED;
 484     } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
 485         return RSC_ROLE_STARTED;
 486     } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
 487                                     RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
 488         return RSC_ROLE_UNPROMOTED;
 489     } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
 490                                     RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
 491         return RSC_ROLE_PROMOTED;
 492     } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
 493         return RSC_ROLE_UNKNOWN;
 494     }
 495     crm_err("Unknown role: %s", role);
 496     return RSC_ROLE_UNKNOWN;
 497 }
 498 
 499 void
 500 add_hash_param(GHashTable * hash, const char *name, const char *value)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502     CRM_CHECK(hash != NULL, return);
 503 
 504     crm_trace("Adding name='%s' value='%s' to hash table",
 505               pcmk__s(name, "<null>"), pcmk__s(value, "<null>"));
 506     if (name == NULL || value == NULL) {
 507         return;
 508 
 509     } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
 510         return;
 511 
 512     } else if (g_hash_table_lookup(hash, name) == NULL) {
 513         g_hash_table_insert(hash, strdup(name), strdup(value));
 514     }
 515 }
 516 
 517 const char *
 518 pe_node_attribute_calculated(const pe_node_t *node, const char *name,
     /* [previous][next][first][last][top][bottom][index][help] */
 519                              const pe_resource_t *rsc)
 520 {
 521     const char *source;
 522 
 523     if(node == NULL) {
 524         return NULL;
 525 
 526     } else if(rsc == NULL) {
 527         return g_hash_table_lookup(node->details->attrs, name);
 528     }
 529 
 530     source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
 531     if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) {
 532         return g_hash_table_lookup(node->details->attrs, name);
 533     }
 534 
 535     /* Use attributes set for the containers location
 536      * instead of for the container itself
 537      *
 538      * Useful when the container is using the host's local
 539      * storage
 540      */
 541 
 542     CRM_ASSERT(node->details->remote_rsc);
 543     CRM_ASSERT(node->details->remote_rsc->container);
 544 
 545     if(node->details->remote_rsc->container->running_on) {
 546         pe_node_t *host = node->details->remote_rsc->container->running_on->data;
 547         pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s",
 548                      rsc->id, name, pe__node_name(host));
 549         return g_hash_table_lookup(host->details->attrs, name);
 550     }
 551 
 552     pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive",
 553                  rsc->id, name, node->details->remote_rsc->container->id);
 554     return NULL;
 555 }
 556 
 557 const char *
 558 pe_node_attribute_raw(const pe_node_t *node, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 559 {
 560     if(node == NULL) {
 561         return NULL;
 562     }
 563     return g_hash_table_lookup(node->details->attrs, name);
 564 }

/* [previous][next][first][last][top][bottom][index][help] */