This source file includes following definitions.
- check_placement_strategy
- pe_metadata
- verify_pe_options
- pe_pref
- fail2text
- text2task
- task2text
- role2text
- text2role
- add_hash_param
- pe__node_attribute_calculated
- pe_node_attribute_raw
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/util.h>
15
16 #include <glib.h>
17
18 #include <crm/common/scheduler_internal.h>
19 #include <crm/pengine/internal.h>
20
21 gboolean was_processing_error = FALSE;
22 gboolean was_processing_warning = FALSE;
23
24 static bool
25 check_placement_strategy(const char *value)
26 {
27 return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
28 "balanced", NULL);
29 }
30
31 static pcmk__cluster_option_t pe_opts[] = {
32
33
34
35
36
37 {
38 "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
39 "stop", pcmk__valid_quorum,
40 N_("What to do when the cluster does not have quorum"),
41 NULL
42 },
43 {
44 "symmetric-cluster", NULL, "boolean", NULL,
45 "true", pcmk__valid_boolean,
46 N_("Whether resources can run on any node by default"),
47 NULL
48 },
49 {
50 "maintenance-mode", NULL, "boolean", NULL,
51 "false", pcmk__valid_boolean,
52 N_("Whether the cluster should refrain from monitoring, starting, "
53 "and stopping resources"),
54 NULL
55 },
56 {
57 "start-failure-is-fatal", NULL, "boolean", NULL,
58 "true", pcmk__valid_boolean,
59 N_("Whether a start failure should prevent a resource from being "
60 "recovered on the same node"),
61 N_("When true, the cluster will immediately ban a resource from a node "
62 "if it fails to start there. When false, the cluster will instead "
63 "check the resource's fail count against its migration-threshold.")
64 },
65 {
66 "enable-startup-probes", NULL, "boolean", NULL,
67 "true", pcmk__valid_boolean,
68 N_("Whether the cluster should check for active resources during start-up"),
69 NULL
70 },
71 {
72 XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
73 "false", pcmk__valid_boolean,
74 N_("Whether to lock resources to a cleanly shut down node"),
75 N_("When true, resources active on a node when it is cleanly shut down "
76 "are kept \"locked\" to that node (not allowed to run elsewhere) "
77 "until they start again on that node after it rejoins (or for at "
78 "most shutdown-lock-limit, if set). Stonith resources and "
79 "Pacemaker Remote connections are never locked. Clone and bundle "
80 "instances and the promoted role of promotable clones are "
81 "currently never locked, though support could be added in a future "
82 "release.")
83 },
84 {
85 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
86 "0", pcmk__valid_interval_spec,
87 N_("Do not lock resources to a cleanly shut down node longer than "
88 "this"),
89 N_("If shutdown-lock is true and this is set to a nonzero time "
90 "duration, shutdown locks will expire after this much time has "
91 "passed since the shutdown was initiated, even if the node has not "
92 "rejoined.")
93 },
94
95
96 {
97 "stonith-enabled", NULL, "boolean", NULL,
98 "true", pcmk__valid_boolean,
99 N_("*** Advanced Use Only *** "
100 "Whether nodes may be fenced as part of recovery"),
101 N_("If false, unresponsive nodes are immediately assumed to be harmless, "
102 "and resources that were active on them may be recovered "
103 "elsewhere. This can result in a \"split-brain\" situation, "
104 "potentially leading to data loss and/or service unavailability.")
105 },
106 {
107 "stonith-action", NULL, "select", "reboot, off, poweroff",
108 PCMK_ACTION_REBOOT, pcmk__is_fencing_action,
109 N_("Action to send to fence device when a node needs to be fenced "
110 "(\"poweroff\" is a deprecated alias for \"off\")"),
111 NULL
112 },
113 {
114 "stonith-timeout", NULL, "time", NULL,
115 "60s", pcmk__valid_interval_spec,
116 N_("*** Advanced Use Only *** Unused by Pacemaker"),
117 N_("This value is not used by Pacemaker, but is kept for backward "
118 "compatibility, and certain legacy fence agents might use it.")
119 },
120 {
121 XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
122 "false", pcmk__valid_boolean,
123 N_("Whether watchdog integration is enabled"),
124 N_("This is set automatically by the cluster according to whether SBD "
125 "is detected to be in use. User-configured values are ignored. "
126 "The value `true` is meaningful if diskless SBD is used and "
127 "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
128 "is required, watchdog-based self-fencing will be performed via "
129 "SBD without requiring a fencing resource explicitly configured.")
130 },
131 {
132 "concurrent-fencing", NULL, "boolean", NULL,
133 PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
134 N_("Allow performing fencing operations in parallel"),
135 NULL
136 },
137 {
138 "startup-fencing", NULL, "boolean", NULL,
139 "true", pcmk__valid_boolean,
140 N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"),
141 N_("Setting this to false may lead to a \"split-brain\" situation,"
142 "potentially leading to data loss and/or service unavailability.")
143 },
144 {
145 XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
146 "0", pcmk__valid_interval_spec,
147 N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"),
148 N_("Apply specified delay for the fencings that are targeting the lost "
149 "nodes with the highest total resource priority in case we don't "
150 "have the majority of the nodes in our cluster partition, so that "
151 "the more significant nodes potentially win any fencing match, "
152 "which is especially meaningful under split-brain of 2-node "
153 "cluster. A promoted resource instance takes the base priority + 1 "
154 "on calculation if the base priority is not 0. Any static/random "
155 "delays that are introduced by `pcmk_delay_base/max` configured "
156 "for the corresponding fencing resources will be added to this "
157 "delay. This delay should be significantly greater than, safely "
158 "twice, the maximum `pcmk_delay_base/max`. By default, priority "
159 "fencing delay is disabled.")
160 },
161 {
162 XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
163 "0", pcmk__valid_interval_spec,
164 N_("How long to wait for a node that has joined the cluster to join "
165 "the controller process group"),
166 N_("Fence nodes that do not join the controller process group within "
167 "this much time after joining the cluster, to allow the cluster "
168 "to continue managing resources. A value of 0 means never fence "
169 "pending nodes. Setting the value to 2h means fence nodes after "
170 "2 hours.")
171 },
172 {
173 "cluster-delay", NULL, "time", NULL,
174 "60s", pcmk__valid_interval_spec,
175 N_("Maximum time for node-to-node communication"),
176 N_("The node elected Designated Controller (DC) will consider an action "
177 "failed if it does not get a response from the node executing the "
178 "action within this time (after considering the action's own "
179 "timeout). The \"correct\" value will depend on the speed and "
180 "load of your network and cluster nodes.")
181 },
182 {
183 "batch-limit", NULL, "integer", NULL,
184 "0", pcmk__valid_number,
185 N_("Maximum number of jobs that the cluster may execute in parallel "
186 "across all nodes"),
187 N_("The \"correct\" value will depend on the speed and load of your "
188 "network and cluster nodes. If set to 0, the cluster will "
189 "impose a dynamically calculated limit when any node has a "
190 "high load.")
191 },
192 {
193 "migration-limit", NULL, "integer", NULL,
194 "-1", pcmk__valid_number,
195 N_("The number of live migration actions that the cluster is allowed "
196 "to execute in parallel on a node (-1 means no limit)")
197 },
198
199
200 {
201 "stop-all-resources", NULL, "boolean", NULL,
202 "false", pcmk__valid_boolean,
203 N_("Whether the cluster should stop all active resources"),
204 NULL
205 },
206 {
207 "stop-orphan-resources", NULL, "boolean", NULL,
208 "true", pcmk__valid_boolean,
209 N_("Whether to stop resources that were removed from the configuration"),
210 NULL
211 },
212 {
213 "stop-orphan-actions", NULL, "boolean", NULL,
214 "true", pcmk__valid_boolean,
215 N_("Whether to cancel recurring actions removed from the configuration"),
216 NULL
217 },
218 {
219 "remove-after-stop", NULL, "boolean", NULL,
220 "false", pcmk__valid_boolean,
221 N_("*** Deprecated *** Whether to remove stopped resources from "
222 "the executor"),
223 N_("Values other than default are poorly tested and potentially dangerous."
224 " This option will be removed in a future release.")
225 },
226
227
228 {
229 "pe-error-series-max", NULL, "integer", NULL,
230 "-1", pcmk__valid_number,
231 N_("The number of scheduler inputs resulting in errors to save"),
232 N_("Zero to disable, -1 to store unlimited.")
233 },
234 {
235 "pe-warn-series-max", NULL, "integer", NULL,
236 "5000", pcmk__valid_number,
237 N_("The number of scheduler inputs resulting in warnings to save"),
238 N_("Zero to disable, -1 to store unlimited.")
239 },
240 {
241 "pe-input-series-max", NULL, "integer", NULL,
242 "4000", pcmk__valid_number,
243 N_("The number of scheduler inputs without errors or warnings to save"),
244 N_("Zero to disable, -1 to store unlimited.")
245 },
246
247
248 {
249 PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select",
250 PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", "
251 PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", "
252 PCMK__VALUE_CUSTOM,
253 PCMK__VALUE_NONE, pcmk__validate_health_strategy,
254 N_("How cluster should react to node health attributes"),
255 N_("Requires external entities to create node attributes (named with "
256 "the prefix \"#health\") with values \"red\", "
257 "\"yellow\", or \"green\".")
258 },
259 {
260 PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL,
261 "0", pcmk__valid_number,
262 N_("Base health score assigned to a node"),
263 N_("Only used when \"node-health-strategy\" is set to \"progressive\".")
264 },
265 {
266 PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL,
267 "0", pcmk__valid_number,
268 N_("The score to use for a node health attribute whose value is \"green\""),
269 N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
270 },
271 {
272 PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL,
273 "0", pcmk__valid_number,
274 N_("The score to use for a node health attribute whose value is \"yellow\""),
275 N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
276 },
277 {
278 PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL,
279 "-INFINITY", pcmk__valid_number,
280 N_("The score to use for a node health attribute whose value is \"red\""),
281 N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
282 },
283
284
285 {
286 "placement-strategy", NULL, "select",
287 "default, utilization, minimal, balanced",
288 "default", check_placement_strategy,
289 N_("How the cluster should allocate resources to nodes"),
290 NULL
291 },
292 };
293
294 void
295 pe_metadata(pcmk__output_t *out)
296 {
297 const char *desc_short = "Pacemaker scheduler options";
298 const char *desc_long = "Cluster options used by Pacemaker's scheduler";
299
300 gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short,
301 desc_long, pe_opts,
302 PCMK__NELEM(pe_opts));
303 out->output_xml(out, "metadata", s);
304 g_free(s);
305 }
306
307 void
308 verify_pe_options(GHashTable * options)
309 {
310 pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
311 }
312
313 const char *
314 pe_pref(GHashTable * options, const char *name)
315 {
316 return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
317 }
318
319 const char *
320 fail2text(enum action_fail_response fail)
321 {
322 const char *result = "<unknown>";
323
324 switch (fail) {
325 case pcmk_on_fail_ignore:
326 result = "ignore";
327 break;
328 case pcmk_on_fail_demote:
329 result = "demote";
330 break;
331 case pcmk_on_fail_block:
332 result = "block";
333 break;
334 case pcmk_on_fail_restart:
335 result = "recover";
336 break;
337 case pcmk_on_fail_ban:
338 result = "migrate";
339 break;
340 case pcmk_on_fail_stop:
341 result = "stop";
342 break;
343 case pcmk_on_fail_fence_node:
344 result = "fence";
345 break;
346 case pcmk_on_fail_standby_node:
347 result = "standby";
348 break;
349 case pcmk_on_fail_restart_container:
350 result = "restart-container";
351 break;
352 case pcmk_on_fail_reset_remote:
353 result = "reset-remote";
354 break;
355 }
356 return result;
357 }
358
359 enum action_tasks
360 text2task(const char *task)
361 {
362 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)) {
363 return pcmk_action_stop;
364
365 } else if (pcmk__str_eq(task, PCMK_ACTION_STOPPED, pcmk__str_casei)) {
366 return pcmk_action_stopped;
367
368 } else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_casei)) {
369 return pcmk_action_start;
370
371 } else if (pcmk__str_eq(task, PCMK_ACTION_RUNNING, pcmk__str_casei)) {
372 return pcmk_action_started;
373
374 } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_casei)) {
375 return pcmk_action_shutdown;
376
377 } else if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
378 return pcmk_action_fence;
379
380 } else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
381 return pcmk_action_monitor;
382
383 } else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
384 return pcmk_action_notify;
385
386 } else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFIED, pcmk__str_casei)) {
387 return pcmk_action_notified;
388
389 } else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) {
390 return pcmk_action_promote;
391
392 } else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_casei)) {
393 return pcmk_action_demote;
394
395 } else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTED, pcmk__str_casei)) {
396 return pcmk_action_promoted;
397
398 } else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTED, pcmk__str_casei)) {
399 return pcmk_action_demoted;
400 }
401 return pcmk_action_unspecified;
402 }
403
404 const char *
405 task2text(enum action_tasks task)
406 {
407 const char *result = "<unknown>";
408
409 switch (task) {
410 case pcmk_action_unspecified:
411 result = "no_action";
412 break;
413 case pcmk_action_stop:
414 result = PCMK_ACTION_STOP;
415 break;
416 case pcmk_action_stopped:
417 result = PCMK_ACTION_STOPPED;
418 break;
419 case pcmk_action_start:
420 result = PCMK_ACTION_START;
421 break;
422 case pcmk_action_started:
423 result = PCMK_ACTION_RUNNING;
424 break;
425 case pcmk_action_shutdown:
426 result = PCMK_ACTION_DO_SHUTDOWN;
427 break;
428 case pcmk_action_fence:
429 result = PCMK_ACTION_STONITH;
430 break;
431 case pcmk_action_monitor:
432 result = PCMK_ACTION_MONITOR;
433 break;
434 case pcmk_action_notify:
435 result = PCMK_ACTION_NOTIFY;
436 break;
437 case pcmk_action_notified:
438 result = PCMK_ACTION_NOTIFIED;
439 break;
440 case pcmk_action_promote:
441 result = PCMK_ACTION_PROMOTE;
442 break;
443 case pcmk_action_promoted:
444 result = PCMK_ACTION_PROMOTED;
445 break;
446 case pcmk_action_demote:
447 result = PCMK_ACTION_DEMOTE;
448 break;
449 case pcmk_action_demoted:
450 result = PCMK_ACTION_DEMOTED;
451 break;
452 }
453
454 return result;
455 }
456
457 const char *
458 role2text(enum rsc_role_e role)
459 {
460 switch (role) {
461 case pcmk_role_stopped:
462 return PCMK__ROLE_STOPPED;
463
464 case pcmk_role_started:
465 return PCMK__ROLE_STARTED;
466
467 case pcmk_role_unpromoted:
468 #ifdef PCMK__COMPAT_2_0
469 return PCMK__ROLE_UNPROMOTED_LEGACY;
470 #else
471 return PCMK__ROLE_UNPROMOTED;
472 #endif
473
474 case pcmk_role_promoted:
475 #ifdef PCMK__COMPAT_2_0
476 return PCMK__ROLE_PROMOTED_LEGACY;
477 #else
478 return PCMK__ROLE_PROMOTED;
479 #endif
480
481 default:
482 return PCMK__ROLE_UNKNOWN;
483 }
484 }
485
486 enum rsc_role_e
487 text2role(const char *role)
488 {
489 CRM_ASSERT(role != NULL);
490 if (pcmk__str_eq(role, PCMK__ROLE_STOPPED, pcmk__str_casei)) {
491 return pcmk_role_stopped;
492 } else if (pcmk__str_eq(role, PCMK__ROLE_STARTED, pcmk__str_casei)) {
493 return pcmk_role_started;
494 } else if (pcmk__strcase_any_of(role, PCMK__ROLE_UNPROMOTED,
495 PCMK__ROLE_UNPROMOTED_LEGACY, NULL)) {
496 return pcmk_role_unpromoted;
497 } else if (pcmk__strcase_any_of(role, PCMK__ROLE_PROMOTED,
498 PCMK__ROLE_PROMOTED_LEGACY, NULL)) {
499 return pcmk_role_promoted;
500 } else if (pcmk__str_eq(role, PCMK__ROLE_UNKNOWN, pcmk__str_casei)) {
501 return pcmk_role_unknown;
502 }
503 crm_err("Unknown role: %s", role);
504 return pcmk_role_unknown;
505 }
506
507 void
508 add_hash_param(GHashTable * hash, const char *name, const char *value)
509 {
510 CRM_CHECK(hash != NULL, return);
511
512 crm_trace("Adding name='%s' value='%s' to hash table",
513 pcmk__s(name, "<null>"), pcmk__s(value, "<null>"));
514 if (name == NULL || value == NULL) {
515 return;
516
517 } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
518 return;
519
520 } else if (g_hash_table_lookup(hash, name) == NULL) {
521 g_hash_table_insert(hash, strdup(name), strdup(value));
522 }
523 }
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545 const char *
546 pe__node_attribute_calculated(const pcmk_node_t *node, const char *name,
547 const pcmk_resource_t *rsc,
548 enum pcmk__rsc_node node_type,
549 bool force_host)
550 {
551
552 bool is_guest = (node != NULL)
553 && (node->details->type == pcmk_node_variant_remote)
554 && (node->details->remote_rsc != NULL)
555 && (node->details->remote_rsc->container != NULL);
556 const char *source = NULL;
557 const char *node_type_s = NULL;
558 const char *reason = NULL;
559
560 const pcmk_resource_t *container = NULL;
561 const pcmk_node_t *host = NULL;
562
563 CRM_ASSERT((node != NULL) && (name != NULL) && (rsc != NULL)
564 && (!force_host || is_guest));
565
566
567
568
569 source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
570 if (!force_host
571 && (!is_guest || !pcmk__str_eq(source, "host", pcmk__str_casei))) {
572
573 return g_hash_table_lookup(node->details->attrs, name);
574 }
575
576 container = node->details->remote_rsc->container;
577
578 switch (node_type) {
579 case pcmk__rsc_node_assigned:
580 node_type_s = "assigned";
581 host = container->allocated_to;
582 if (host == NULL) {
583 reason = "not assigned";
584 }
585 break;
586
587 case pcmk__rsc_node_current:
588 node_type_s = "current";
589
590 if (container->running_on != NULL) {
591 host = container->running_on->data;
592 }
593 if (host == NULL) {
594 reason = "inactive";
595 }
596 break;
597
598 default:
599
600 CRM_ASSERT(false);
601 break;
602 }
603
604 if (host != NULL) {
605 const char *value = g_hash_table_lookup(host->details->attrs, name);
606
607 pe_rsc_trace(rsc,
608 "%s: Value lookup for %s on %s container host %s %s%s",
609 rsc->id, name, node_type_s, pe__node_name(host),
610 ((value != NULL)? "succeeded: " : "failed"),
611 pcmk__s(value, ""));
612 return value;
613 }
614 pe_rsc_trace(rsc,
615 "%s: Not looking for %s on %s container host: %s is %s",
616 rsc->id, name, node_type_s, container->id, reason);
617 return NULL;
618 }
619
620 const char *
621 pe_node_attribute_raw(const pcmk_node_t *node, const char *name)
622 {
623 if(node == NULL) {
624 return NULL;
625 }
626 return g_hash_table_lookup(node->details->attrs, name);
627 }