This source file includes following definitions.
- check_health
- check_stonith_action
- check_placement_strategy
- pe_metadata
- verify_pe_options
- pe_pref
- fail2text
- text2task
- task2text
- role2text
- text2role
- pe__add_scores
- add_hash_param
- pe_node_attribute_calculated
- pe_node_attribute_raw
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/util.h>
15
16 #include <glib.h>
17
18 #include <crm/pengine/internal.h>
19
20 gboolean was_processing_error = FALSE;
21 gboolean was_processing_warning = FALSE;
22
23 static bool
24 check_health(const char *value)
25 {
26 return pcmk__strcase_any_of(value, "none", "custom", "only-green", "progressive",
27 "migrate-on-red", NULL);
28 }
29
30 static bool
31 check_stonith_action(const char *value)
32 {
33 return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL);
34 }
35
36 static bool
37 check_placement_strategy(const char *value)
38 {
39 return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
40 "balanced", NULL);
41 }
42
43 static pcmk__cluster_option_t pe_opts[] = {
44
45
46
47
48
49 {
50 "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide",
51 "stop", pcmk__valid_quorum,
52 "What to do when the cluster does not have quorum",
53 NULL
54 },
55 {
56 "symmetric-cluster", NULL, "boolean", NULL,
57 "true", pcmk__valid_boolean,
58 "Whether resources can run on any node by default",
59 NULL
60 },
61 {
62 "maintenance-mode", NULL, "boolean", NULL,
63 "false", pcmk__valid_boolean,
64 "Whether the cluster should refrain from monitoring, starting, "
65 "and stopping resources",
66 NULL
67 },
68 {
69 "start-failure-is-fatal", NULL, "boolean", NULL,
70 "true", pcmk__valid_boolean,
71 "Whether a start failure should prevent a resource from being "
72 "recovered on the same node",
73 "When true, the cluster will immediately ban a resource from a node "
74 "if it fails to start there. When false, the cluster will instead "
75 "check the resource's fail count against its migration-threshold."
76 },
77 {
78 "enable-startup-probes", NULL, "boolean", NULL,
79 "true", pcmk__valid_boolean,
80 "Whether the cluster should check for active resources during start-up",
81 NULL
82 },
83 {
84 XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
85 "false", pcmk__valid_boolean,
86 "Whether to lock resources to a cleanly shut down node",
87 "When true, resources active on a node when it is cleanly shut down "
88 "are kept \"locked\" to that node (not allowed to run elsewhere) "
89 "until they start again on that node after it rejoins (or for at "
90 "most shutdown-lock-limit, if set). Stonith resources and "
91 "Pacemaker Remote connections are never locked. Clone and bundle "
92 "instances and the promoted role of promotable clones are currently"
93 " never locked, though support could be added in a future release."
94 },
95 {
96 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
97 "0", pcmk__valid_interval_spec,
98 "Do not lock resources to a cleanly shut down node longer than this",
99 "If shutdown-lock is true and this is set to a nonzero time duration, "
100 "shutdown locks will expire after this much time has passed since "
101 "the shutdown was initiated, even if the node has not rejoined."
102 },
103
104
105 {
106 "stonith-enabled", NULL, "boolean", NULL,
107 "true", pcmk__valid_boolean,
108 "*** Advanced Use Only *** "
109 "Whether nodes may be fenced as part of recovery",
110 "If false, unresponsive nodes are immediately assumed to be harmless, "
111 "and resources that were active on them may be recovered "
112 "elsewhere. This can result in a \"split-brain\" situation, "
113 "potentially leading to data loss and/or service unavailability."
114 },
115 {
116 "stonith-action", NULL, "enum", "reboot, off, poweroff",
117 "reboot", check_stonith_action,
118 "Action to send to fence device when a node needs to be fenced "
119 "(\"poweroff\" is a deprecated alias for \"off\")",
120 NULL
121 },
122 {
123 "stonith-timeout", NULL, "time", NULL,
124 "60s", pcmk__valid_interval_spec,
125 "*** Advanced Use Only *** Unused by Pacemaker",
126 "This value is not used by Pacemaker, but is kept for backward "
127 "compatibility, and certain legacy fence agents might use it."
128 },
129 {
130 XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
131 "false", pcmk__valid_boolean,
132 "Whether watchdog integration is enabled",
133 "This is set automatically by the cluster according to whether SBD "
134 "is detected to be in use. User-configured values are ignored. "
135 "The value `true` is meaningful if diskless SBD is used and "
136 "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
137 "is required, watchdog-based self-fencing will be performed via "
138 "SBD without requiring a fencing resource explicitly configured."
139 },
140 {
141 "concurrent-fencing", NULL, "boolean", NULL,
142 PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
143 "Allow performing fencing operations in parallel",
144 NULL
145 },
146 {
147 "startup-fencing", NULL, "boolean", NULL,
148 "true", pcmk__valid_boolean,
149 "*** Advanced Use Only *** Whether to fence unseen nodes at start-up",
150 "Setting this to false may lead to a \"split-brain\" situation,"
151 "potentially leading to data loss and/or service unavailability."
152 },
153 {
154 XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
155 "0", pcmk__valid_interval_spec,
156 "Apply fencing delay targeting the lost nodes with the highest total resource priority",
157 "Apply specified delay for the fencings that are targeting the lost "
158 "nodes with the highest total resource priority in case we don't "
159 "have the majority of the nodes in our cluster partition, so that "
160 "the more significant nodes potentially win any fencing match, "
161 "which is especially meaningful under split-brain of 2-node "
162 "cluster. A promoted resource instance takes the base priority + 1 "
163 "on calculation if the base priority is not 0. Any static/random "
164 "delays that are introduced by `pcmk_delay_base/max` configured "
165 "for the corresponding fencing resources will be added to this "
166 "delay. This delay should be significantly greater than, safely "
167 "twice, the maximum `pcmk_delay_base/max`. By default, priority "
168 "fencing delay is disabled."
169 },
170
171 {
172 "cluster-delay", NULL, "time", NULL,
173 "60s", pcmk__valid_interval_spec,
174 "Maximum time for node-to-node communication",
175 "The node elected Designated Controller (DC) will consider an action "
176 "failed if it does not get a response from the node executing the "
177 "action within this time (after considering the action's own "
178 "timeout). The \"correct\" value will depend on the speed and "
179 "load of your network and cluster nodes."
180 },
181 {
182 "batch-limit", NULL, "integer", NULL,
183 "0", pcmk__valid_number,
184 "Maximum number of jobs that the cluster may execute in parallel "
185 "across all nodes",
186 "The \"correct\" value will depend on the speed and load of your "
187 "network and cluster nodes. If set to 0, the cluster will "
188 "impose a dynamically calculated limit when any node has a "
189 "high load."
190 },
191 {
192 "migration-limit", NULL, "integer", NULL,
193 "-1", pcmk__valid_number,
194 "The number of live migration actions that the cluster is allowed "
195 "to execute in parallel on a node (-1 means no limit)"
196 },
197
198
199 {
200 "stop-all-resources", NULL, "boolean", NULL,
201 "false", pcmk__valid_boolean,
202 "Whether the cluster should stop all active resources",
203 NULL
204 },
205 {
206 "stop-orphan-resources", NULL, "boolean", NULL,
207 "true", pcmk__valid_boolean,
208 "Whether to stop resources that were removed from the configuration",
209 NULL
210 },
211 {
212 "stop-orphan-actions", NULL, "boolean", NULL,
213 "true", pcmk__valid_boolean,
214 "Whether to cancel recurring actions removed from the configuration",
215 NULL
216 },
217 {
218 "remove-after-stop", NULL, "boolean", NULL,
219 "false", pcmk__valid_boolean,
220 "*** Deprecated *** Whether to remove stopped resources from "
221 "the executor",
222 "Values other than default are poorly tested and potentially dangerous."
223 " This option will be removed in a future release."
224 },
225
226
227 {
228 "pe-error-series-max", NULL, "integer", NULL,
229 "-1", pcmk__valid_number,
230 "The number of scheduler inputs resulting in errors to save",
231 "Zero to disable, -1 to store unlimited."
232 },
233 {
234 "pe-warn-series-max", NULL, "integer", NULL,
235 "5000", pcmk__valid_number,
236 "The number of scheduler inputs resulting in warnings to save",
237 "Zero to disable, -1 to store unlimited."
238 },
239 {
240 "pe-input-series-max", NULL, "integer", NULL,
241 "4000", pcmk__valid_number,
242 "The number of scheduler inputs without errors or warnings to save",
243 "Zero to disable, -1 to store unlimited."
244 },
245
246
247 {
248 "node-health-strategy", NULL, "enum",
249 "none, migrate-on-red, only-green, progressive, custom",
250 "none", check_health,
251 "How cluster should react to node health attributes",
252 "Requires external entities to create node attributes (named with "
253 "the prefix \"#health\") with values \"red\", \"yellow\" or "
254 "\"green\"."
255 },
256 {
257 "node-health-base", NULL, "integer", NULL,
258 "0", pcmk__valid_number,
259 "Base health score assigned to a node",
260 "Only used when node-health-strategy is set to progressive."
261 },
262 {
263 "node-health-green", NULL, "integer", NULL,
264 "0", pcmk__valid_number,
265 "The score to use for a node health attribute whose value is \"green\"",
266 "Only used when node-health-strategy is set to custom or progressive."
267 },
268 {
269 "node-health-yellow", NULL, "integer", NULL,
270 "0", pcmk__valid_number,
271 "The score to use for a node health attribute whose value is \"yellow\"",
272 "Only used when node-health-strategy is set to custom or progressive."
273 },
274 {
275 "node-health-red", NULL, "integer", NULL,
276 "-INFINITY", pcmk__valid_number,
277 "The score to use for a node health attribute whose value is \"red\"",
278 "Only used when node-health-strategy is set to custom or progressive."
279 },
280
281
282 {
283 "placement-strategy", NULL, "enum",
284 "default, utilization, minimal, balanced",
285 "default", check_placement_strategy,
286 "How the cluster should allocate resources to nodes",
287 NULL
288 },
289 };
290
291 void
292 pe_metadata(void)
293 {
294 pcmk__print_option_metadata("pacemaker-schedulerd", "1.0",
295 "Pacemaker scheduler options",
296 "Cluster options used by Pacemaker's scheduler"
297 " (formerly called pengine)",
298 pe_opts, PCMK__NELEM(pe_opts));
299 }
300
301 void
302 verify_pe_options(GHashTable * options)
303 {
304 pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
305 }
306
307 const char *
308 pe_pref(GHashTable * options, const char *name)
309 {
310 return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
311 }
312
313 const char *
314 fail2text(enum action_fail_response fail)
315 {
316 const char *result = "<unknown>";
317
318 switch (fail) {
319 case action_fail_ignore:
320 result = "ignore";
321 break;
322 case action_fail_demote:
323 result = "demote";
324 break;
325 case action_fail_block:
326 result = "block";
327 break;
328 case action_fail_recover:
329 result = "recover";
330 break;
331 case action_fail_migrate:
332 result = "migrate";
333 break;
334 case action_fail_stop:
335 result = "stop";
336 break;
337 case action_fail_fence:
338 result = "fence";
339 break;
340 case action_fail_standby:
341 result = "standby";
342 break;
343 case action_fail_restart_container:
344 result = "restart-container";
345 break;
346 case action_fail_reset_remote:
347 result = "reset-remote";
348 break;
349 }
350 return result;
351 }
352
353 enum action_tasks
354 text2task(const char *task)
355 {
356 if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
357 return stop_rsc;
358 } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
359 return stopped_rsc;
360 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
361 return start_rsc;
362 } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
363 return started_rsc;
364 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
365 return shutdown_crm;
366 } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
367 return stonith_node;
368 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
369 return monitor_rsc;
370 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) {
371 return action_notify;
372 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
373 return action_notified;
374 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
375 return action_promote;
376 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
377 return action_demote;
378 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
379 return action_promoted;
380 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
381 return action_demoted;
382 }
383 #if SUPPORT_TRACING
384 if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) {
385 return no_action;
386 } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) {
387 return no_action;
388 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
389 return no_action;
390 } else if (pcmk__str_eq(task, CRM_OP_PROBED, pcmk__str_casei)) {
391 return no_action;
392 } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
393 return no_action;
394 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
395 return no_action;
396 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
397 return no_action;
398 }
399 crm_trace("Unsupported action: %s", task);
400 #endif
401
402 return no_action;
403 }
404
405 const char *
406 task2text(enum action_tasks task)
407 {
408 const char *result = "<unknown>";
409
410 switch (task) {
411 case no_action:
412 result = "no_action";
413 break;
414 case stop_rsc:
415 result = CRMD_ACTION_STOP;
416 break;
417 case stopped_rsc:
418 result = CRMD_ACTION_STOPPED;
419 break;
420 case start_rsc:
421 result = CRMD_ACTION_START;
422 break;
423 case started_rsc:
424 result = CRMD_ACTION_STARTED;
425 break;
426 case shutdown_crm:
427 result = CRM_OP_SHUTDOWN;
428 break;
429 case stonith_node:
430 result = CRM_OP_FENCE;
431 break;
432 case monitor_rsc:
433 result = CRMD_ACTION_STATUS;
434 break;
435 case action_notify:
436 result = CRMD_ACTION_NOTIFY;
437 break;
438 case action_notified:
439 result = CRMD_ACTION_NOTIFIED;
440 break;
441 case action_promote:
442 result = CRMD_ACTION_PROMOTE;
443 break;
444 case action_promoted:
445 result = CRMD_ACTION_PROMOTED;
446 break;
447 case action_demote:
448 result = CRMD_ACTION_DEMOTE;
449 break;
450 case action_demoted:
451 result = CRMD_ACTION_DEMOTED;
452 break;
453 }
454
455 return result;
456 }
457
458 const char *
459 role2text(enum rsc_role_e role)
460 {
461 switch (role) {
462 case RSC_ROLE_UNKNOWN:
463 return RSC_ROLE_UNKNOWN_S;
464 case RSC_ROLE_STOPPED:
465 return RSC_ROLE_STOPPED_S;
466 case RSC_ROLE_STARTED:
467 return RSC_ROLE_STARTED_S;
468 case RSC_ROLE_UNPROMOTED:
469 #ifdef PCMK__COMPAT_2_0
470 return RSC_ROLE_UNPROMOTED_LEGACY_S;
471 #else
472 return RSC_ROLE_UNPROMOTED_S;
473 #endif
474 case RSC_ROLE_PROMOTED:
475 #ifdef PCMK__COMPAT_2_0
476 return RSC_ROLE_PROMOTED_LEGACY_S;
477 #else
478 return RSC_ROLE_PROMOTED_S;
479 #endif
480 }
481 CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
482 CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
483
484 return RSC_ROLE_UNKNOWN_S;
485 }
486
487 enum rsc_role_e
488 text2role(const char *role)
489 {
490 CRM_ASSERT(role != NULL);
491 if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
492 return RSC_ROLE_STOPPED;
493 } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
494 return RSC_ROLE_STARTED;
495 } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
496 RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
497 return RSC_ROLE_UNPROMOTED;
498 } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
499 RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
500 return RSC_ROLE_PROMOTED;
501 } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
502 return RSC_ROLE_UNKNOWN;
503 }
504 crm_err("Unknown role: %s", role);
505 return RSC_ROLE_UNKNOWN;
506 }
507
508
509
510
511
512
513
514
515 int
516 pe__add_scores(int score1, int score2)
517 {
518 int result = score1 + score2;
519
520
521
522 if (score1 <= -CRM_SCORE_INFINITY) {
523
524 if (score2 <= -CRM_SCORE_INFINITY) {
525 crm_trace("-INFINITY + -INFINITY = -INFINITY");
526 } else if (score2 >= CRM_SCORE_INFINITY) {
527 crm_trace("-INFINITY + +INFINITY = -INFINITY");
528 } else {
529 crm_trace("-INFINITY + %d = -INFINITY", score2);
530 }
531
532 return -CRM_SCORE_INFINITY;
533
534 } else if (score2 <= -CRM_SCORE_INFINITY) {
535
536 if (score1 >= CRM_SCORE_INFINITY) {
537 crm_trace("+INFINITY + -INFINITY = -INFINITY");
538 } else {
539 crm_trace("%d + -INFINITY = -INFINITY", score1);
540 }
541
542 return -CRM_SCORE_INFINITY;
543
544 } else if (score1 >= CRM_SCORE_INFINITY) {
545
546 if (score2 >= CRM_SCORE_INFINITY) {
547 crm_trace("+INFINITY + +INFINITY = +INFINITY");
548 } else {
549 crm_trace("+INFINITY + %d = +INFINITY", score2);
550 }
551
552 return CRM_SCORE_INFINITY;
553
554 } else if (score2 >= CRM_SCORE_INFINITY) {
555 crm_trace("%d + +INFINITY = +INFINITY", score1);
556 return CRM_SCORE_INFINITY;
557 }
558
559
560
561
562
563
564
565 if (result >= CRM_SCORE_INFINITY) {
566 crm_trace("%d + %d = +INFINITY", score1, score2);
567 return CRM_SCORE_INFINITY;
568
569 } else if (result <= -CRM_SCORE_INFINITY) {
570 crm_trace("%d + %d = -INFINITY", score1, score2);
571 return -CRM_SCORE_INFINITY;
572 }
573
574 crm_trace("%d + %d = %d", score1, score2, result);
575 return result;
576 }
577
578 void
579 add_hash_param(GHashTable * hash, const char *name, const char *value)
580 {
581 CRM_CHECK(hash != NULL, return);
582
583 crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
584 if (name == NULL || value == NULL) {
585 return;
586
587 } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
588 return;
589
590 } else if (g_hash_table_lookup(hash, name) == NULL) {
591 g_hash_table_insert(hash, strdup(name), strdup(value));
592 }
593 }
594
595 const char *
596 pe_node_attribute_calculated(const pe_node_t *node, const char *name,
597 const pe_resource_t *rsc)
598 {
599 const char *source;
600
601 if(node == NULL) {
602 return NULL;
603
604 } else if(rsc == NULL) {
605 return g_hash_table_lookup(node->details->attrs, name);
606 }
607
608 source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
609 if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) {
610 return g_hash_table_lookup(node->details->attrs, name);
611 }
612
613
614
615
616
617
618
619
620 CRM_ASSERT(node->details->remote_rsc);
621 CRM_ASSERT(node->details->remote_rsc->container);
622
623 if(node->details->remote_rsc->container->running_on) {
624 pe_node_t *host = node->details->remote_rsc->container->running_on->data;
625 pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", rsc->id, name, host->details->uname);
626 return g_hash_table_lookup(host->details->attrs, name);
627 }
628
629 pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive",
630 rsc->id, name, node->details->remote_rsc->container->id);
631 return NULL;
632 }
633
634 const char *
635 pe_node_attribute_raw(pe_node_t *node, const char *name)
636 {
637 if(node == NULL) {
638 return NULL;
639 }
640 return g_hash_table_lookup(node->details->attrs, name);
641 }