This source file includes following definitions.
- check_health
- check_stonith_action
- check_placement_strategy
- pe_metadata
- verify_pe_options
- pe_pref
- fail2text
- text2task
- task2text
- role2text
- text2role
- pe__add_scores
- add_hash_param
- pe_node_attribute_calculated
- pe_node_attribute_raw
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/util.h>
15
16 #include <glib.h>
17
18 #include <crm/pengine/internal.h>
19
20 gboolean was_processing_error = FALSE;
21 gboolean was_processing_warning = FALSE;
22
23 static bool
24 check_health(const char *value)
25 {
26 return pcmk__strcase_any_of(value, "none", "custom", "only-green", "progressive",
27 "migrate-on-red", NULL);
28 }
29
30 static bool
31 check_stonith_action(const char *value)
32 {
33 return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL);
34 }
35
36 static bool
37 check_placement_strategy(const char *value)
38 {
39 return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
40 "balanced", NULL);
41 }
42
43 static pcmk__cluster_option_t pe_opts[] = {
44
45
46
47
48
49 {
50 "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, demote, suicide",
51 "stop", pcmk__valid_quorum,
52 "What to do when the cluster does not have quorum",
53 NULL
54 },
55 {
56 "symmetric-cluster", NULL, "boolean", NULL,
57 "true", pcmk__valid_boolean,
58 "Whether resources can run on any node by default",
59 NULL
60 },
61 {
62 "maintenance-mode", NULL, "boolean", NULL,
63 "false", pcmk__valid_boolean,
64 "Whether the cluster should refrain from monitoring, starting, "
65 "and stopping resources",
66 NULL
67 },
68 {
69 "start-failure-is-fatal", NULL, "boolean", NULL,
70 "true", pcmk__valid_boolean,
71 "Whether a start failure should prevent a resource from being "
72 "recovered on the same node",
73 "When true, the cluster will immediately ban a resource from a node "
74 "if it fails to start there. When false, the cluster will instead "
75 "check the resource's fail count against its migration-threshold."
76 },
77 {
78 "enable-startup-probes", NULL, "boolean", NULL,
79 "true", pcmk__valid_boolean,
80 "Whether the cluster should check for active resources during start-up",
81 NULL
82 },
83 {
84 XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
85 "false", pcmk__valid_boolean,
86 "Whether to lock resources to a cleanly shut down node",
87 "When true, resources active on a node when it is cleanly shut down "
88 "are kept \"locked\" to that node (not allowed to run elsewhere) "
89 "until they start again on that node after it rejoins (or for at "
90 "most shutdown-lock-limit, if set). Stonith resources and "
91 "Pacemaker Remote connections are never locked. Clone and bundle "
92 "instances and the master role of promotable clones are currently "
93 "never locked, though support could be added in a future release."
94 },
95 {
96 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
97 "0", pcmk__valid_interval_spec,
98 "Do not lock resources to a cleanly shut down node longer than this",
99 "If shutdown-lock is true and this is set to a nonzero time duration, "
100 "shutdown locks will expire after this much time has passed since "
101 "the shutdown was initiated, even if the node has not rejoined."
102 },
103
104
105 {
106 "stonith-enabled", NULL, "boolean", NULL,
107 "true", pcmk__valid_boolean,
108 "*** Advanced Use Only *** "
109 "Whether nodes may be fenced as part of recovery",
110 "If false, unresponsive nodes are immediately assumed to be harmless, "
111 "and resources that were active on them may be recovered "
112 "elsewhere. This can result in a \"split-brain\" situation, "
113 "potentially leading to data loss and/or service unavailability."
114 },
115 {
116 "stonith-action", NULL, "enum", "reboot, off, poweroff",
117 "reboot", check_stonith_action,
118 "Action to send to fence device when a node needs to be fenced "
119 "(\"poweroff\" is a deprecated alias for \"off\")",
120 NULL
121 },
122 {
123 "stonith-timeout", NULL, "time", NULL,
124 "60s", pcmk__valid_interval_spec,
125 "*** Advanced Use Only *** Unused by Pacemaker",
126 "This value is not used by Pacemaker, but is kept for backward "
127 "compatibility, and certain legacy fence agents might use it."
128 },
129 {
130 XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
131 "false", pcmk__valid_boolean,
132 "Whether watchdog integration is enabled",
133 "This is set automatically by the cluster according to whether SBD "
134 "is detected to be in use. User-configured values are ignored. "
135 "The value `true` is meaningful if diskless SBD is used and "
136 "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
137 "is required, watchdog-based self-fencing will be performed via "
138 "SBD without requiring a fencing resource explicitly configured."
139 },
140 {
141 "concurrent-fencing", NULL, "boolean", NULL,
142 #ifdef DEFAULT_CONCURRENT_FENCING_TRUE
143 "true",
144 #else
145 "false",
146 #endif
147 pcmk__valid_boolean,
148 "Allow performing fencing operations in parallel",
149 NULL
150 },
151 {
152 "startup-fencing", NULL, "boolean", NULL,
153 "true", pcmk__valid_boolean,
154 "*** Advanced Use Only *** Whether to fence unseen nodes at start-up",
155 "Setting this to false may lead to a \"split-brain\" situation,"
156 "potentially leading to data loss and/or service unavailability."
157 },
158 {
159 XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
160 "0", pcmk__valid_interval_spec,
161 "Apply fencing delay targeting the lost nodes with the highest total resource priority",
162 "Apply specified delay for the fencings that are targeting the lost "
163 "nodes with the highest total resource priority in case we don't "
164 "have the majority of the nodes in our cluster partition, so that "
165 "the more significant nodes potentially win any fencing match, "
166 "which is especially meaningful under split-brain of 2-node "
167 "cluster. A promoted resource instance takes the base priority + 1 "
168 "on calculation if the base priority is not 0. Any static/random "
169 "delays that are introduced by `pcmk_delay_base/max` configured "
170 "for the corresponding fencing resources will be added to this "
171 "delay. This delay should be significantly greater than, safely "
172 "twice, the maximum `pcmk_delay_base/max`. By default, priority "
173 "fencing delay is disabled."
174 },
175
176 {
177 "cluster-delay", NULL, "time", NULL,
178 "60s", pcmk__valid_interval_spec,
179 "Maximum time for node-to-node communication",
180 "The node elected Designated Controller (DC) will consider an action "
181 "failed if it does not get a response from the node executing the "
182 "action within this time (after considering the action's own "
183 "timeout). The \"correct\" value will depend on the speed and "
184 "load of your network and cluster nodes."
185 },
186 {
187 "batch-limit", NULL, "integer", NULL,
188 "0", pcmk__valid_number,
189 "Maximum number of jobs that the cluster may execute in parallel "
190 "across all nodes",
191 "The \"correct\" value will depend on the speed and load of your "
192 "network and cluster nodes. If set to 0, the cluster will "
193 "impose a dynamically calculated limit when any node has a "
194 "high load."
195 },
196 {
197 "migration-limit", NULL, "integer", NULL,
198 "-1", pcmk__valid_number,
199 "The number of live migration actions that the cluster is allowed "
200 "to execute in parallel on a node (-1 means no limit)"
201 },
202
203
204 {
205 "stop-all-resources", NULL, "boolean", NULL,
206 "false", pcmk__valid_boolean,
207 "Whether the cluster should stop all active resources",
208 NULL
209 },
210 {
211 "stop-orphan-resources", NULL, "boolean", NULL,
212 "true", pcmk__valid_boolean,
213 "Whether to stop resources that were removed from the configuration",
214 NULL
215 },
216 {
217 "stop-orphan-actions", NULL, "boolean", NULL,
218 "true", pcmk__valid_boolean,
219 "Whether to cancel recurring actions removed from the configuration",
220 NULL
221 },
222 {
223 "remove-after-stop", NULL, "boolean", NULL,
224 "false", pcmk__valid_boolean,
225 "*** Advanced Use Only *** Whether to remove stopped resources from "
226 "the executor",
227 "Values other than default are poorly tested and potentially dangerous."
228 },
229
230
231 {
232 "pe-error-series-max", NULL, "integer", NULL,
233 "-1", pcmk__valid_number,
234 "The number of scheduler inputs resulting in errors to save",
235 "Zero to disable, -1 to store unlimited."
236 },
237 {
238 "pe-warn-series-max", NULL, "integer", NULL,
239 "5000", pcmk__valid_number,
240 "The number of scheduler inputs resulting in warnings to save",
241 "Zero to disable, -1 to store unlimited."
242 },
243 {
244 "pe-input-series-max", NULL, "integer", NULL,
245 "4000", pcmk__valid_number,
246 "The number of scheduler inputs without errors or warnings to save",
247 "Zero to disable, -1 to store unlimited."
248 },
249
250
251 {
252 "node-health-strategy", NULL, "enum",
253 "none, migrate-on-red, only-green, progressive, custom",
254 "none", check_health,
255 "How cluster should react to node health attributes",
256 "Requires external entities to create node attributes (named with "
257 "the prefix \"#health\") with values \"red\", \"yellow\" or "
258 "\"green\"."
259 },
260 {
261 "node-health-base", NULL, "integer", NULL,
262 "0", pcmk__valid_number,
263 "Base health score assigned to a node",
264 "Only used when node-health-strategy is set to progressive."
265 },
266 {
267 "node-health-green", NULL, "integer", NULL,
268 "0", pcmk__valid_number,
269 "The score to use for a node health attribute whose value is \"green\"",
270 "Only used when node-health-strategy is set to custom or progressive."
271 },
272 {
273 "node-health-yellow", NULL, "integer", NULL,
274 "0", pcmk__valid_number,
275 "The score to use for a node health attribute whose value is \"yellow\"",
276 "Only used when node-health-strategy is set to custom or progressive."
277 },
278 {
279 "node-health-red", NULL, "integer", NULL,
280 "-INFINITY", pcmk__valid_number,
281 "The score to use for a node health attribute whose value is \"red\"",
282 "Only used when node-health-strategy is set to custom or progressive."
283 },
284
285
286 {
287 "placement-strategy", NULL, "enum",
288 "default, utilization, minimal, balanced",
289 "default", check_placement_strategy,
290 "How the cluster should allocate resources to nodes",
291 NULL
292 },
293 };
294
295 void
296 pe_metadata(void)
297 {
298 pcmk__print_option_metadata("pacemaker-schedulerd", "1.0",
299 "Pacemaker scheduler options",
300 "Cluster options used by Pacemaker's scheduler"
301 " (formerly called pengine)",
302 pe_opts, DIMOF(pe_opts));
303 }
304
305 void
306 verify_pe_options(GHashTable * options)
307 {
308 pcmk__validate_cluster_options(options, pe_opts, DIMOF(pe_opts));
309 }
310
311 const char *
312 pe_pref(GHashTable * options, const char *name)
313 {
314 return pcmk__cluster_option(options, pe_opts, DIMOF(pe_opts), name);
315 }
316
317 const char *
318 fail2text(enum action_fail_response fail)
319 {
320 const char *result = "<unknown>";
321
322 switch (fail) {
323 case action_fail_ignore:
324 result = "ignore";
325 break;
326 case action_fail_demote:
327 result = "demote";
328 break;
329 case action_fail_block:
330 result = "block";
331 break;
332 case action_fail_recover:
333 result = "recover";
334 break;
335 case action_fail_migrate:
336 result = "migrate";
337 break;
338 case action_fail_stop:
339 result = "stop";
340 break;
341 case action_fail_fence:
342 result = "fence";
343 break;
344 case action_fail_standby:
345 result = "standby";
346 break;
347 case action_fail_restart_container:
348 result = "restart-container";
349 break;
350 case action_fail_reset_remote:
351 result = "reset-remote";
352 break;
353 }
354 return result;
355 }
356
357 enum action_tasks
358 text2task(const char *task)
359 {
360 if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
361 return stop_rsc;
362 } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
363 return stopped_rsc;
364 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
365 return start_rsc;
366 } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
367 return started_rsc;
368 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
369 return shutdown_crm;
370 } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
371 return stonith_node;
372 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
373 return monitor_rsc;
374 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) {
375 return action_notify;
376 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
377 return action_notified;
378 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
379 return action_promote;
380 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
381 return action_demote;
382 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
383 return action_promoted;
384 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
385 return action_demoted;
386 }
387 #if SUPPORT_TRACING
388 if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) {
389 return no_action;
390 } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) {
391 return no_action;
392 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
393 return no_action;
394 } else if (pcmk__str_eq(task, CRM_OP_PROBED, pcmk__str_casei)) {
395 return no_action;
396 } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
397 return no_action;
398 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
399 return no_action;
400 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
401 return no_action;
402 }
403 crm_trace("Unsupported action: %s", task);
404 #endif
405
406 return no_action;
407 }
408
409 const char *
410 task2text(enum action_tasks task)
411 {
412 const char *result = "<unknown>";
413
414 switch (task) {
415 case no_action:
416 result = "no_action";
417 break;
418 case stop_rsc:
419 result = CRMD_ACTION_STOP;
420 break;
421 case stopped_rsc:
422 result = CRMD_ACTION_STOPPED;
423 break;
424 case start_rsc:
425 result = CRMD_ACTION_START;
426 break;
427 case started_rsc:
428 result = CRMD_ACTION_STARTED;
429 break;
430 case shutdown_crm:
431 result = CRM_OP_SHUTDOWN;
432 break;
433 case stonith_node:
434 result = CRM_OP_FENCE;
435 break;
436 case monitor_rsc:
437 result = CRMD_ACTION_STATUS;
438 break;
439 case action_notify:
440 result = CRMD_ACTION_NOTIFY;
441 break;
442 case action_notified:
443 result = CRMD_ACTION_NOTIFIED;
444 break;
445 case action_promote:
446 result = CRMD_ACTION_PROMOTE;
447 break;
448 case action_promoted:
449 result = CRMD_ACTION_PROMOTED;
450 break;
451 case action_demote:
452 result = CRMD_ACTION_DEMOTE;
453 break;
454 case action_demoted:
455 result = CRMD_ACTION_DEMOTED;
456 break;
457 }
458
459 return result;
460 }
461
462 const char *
463 role2text(enum rsc_role_e role)
464 {
465 switch (role) {
466 case RSC_ROLE_UNKNOWN:
467 return RSC_ROLE_UNKNOWN_S;
468 case RSC_ROLE_STOPPED:
469 return RSC_ROLE_STOPPED_S;
470 case RSC_ROLE_STARTED:
471 return RSC_ROLE_STARTED_S;
472 case RSC_ROLE_SLAVE:
473 return RSC_ROLE_SLAVE_S;
474 case RSC_ROLE_MASTER:
475 return RSC_ROLE_MASTER_S;
476 }
477 CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
478 CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
479
480 return RSC_ROLE_UNKNOWN_S;
481 }
482
483 enum rsc_role_e
484 text2role(const char *role)
485 {
486 CRM_ASSERT(role != NULL);
487 if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
488 return RSC_ROLE_STOPPED;
489 } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
490 return RSC_ROLE_STARTED;
491 } else if (pcmk__str_eq(role, RSC_ROLE_SLAVE_S, pcmk__str_casei)) {
492 return RSC_ROLE_SLAVE;
493 } else if (pcmk__str_eq(role, RSC_ROLE_MASTER_S, pcmk__str_casei)) {
494 return RSC_ROLE_MASTER;
495 } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
496 return RSC_ROLE_UNKNOWN;
497 }
498 crm_err("Unknown role: %s", role);
499 return RSC_ROLE_UNKNOWN;
500 }
501
502
503
504
505
506
507
508
509 int
510 pe__add_scores(int score1, int score2)
511 {
512 int result = score1 + score2;
513
514
515
516 if (score1 <= -CRM_SCORE_INFINITY) {
517
518 if (score2 <= -CRM_SCORE_INFINITY) {
519 crm_trace("-INFINITY + -INFINITY = -INFINITY");
520 } else if (score2 >= CRM_SCORE_INFINITY) {
521 crm_trace("-INFINITY + +INFINITY = -INFINITY");
522 } else {
523 crm_trace("-INFINITY + %d = -INFINITY", score2);
524 }
525
526 return -CRM_SCORE_INFINITY;
527
528 } else if (score2 <= -CRM_SCORE_INFINITY) {
529
530 if (score1 >= CRM_SCORE_INFINITY) {
531 crm_trace("+INFINITY + -INFINITY = -INFINITY");
532 } else {
533 crm_trace("%d + -INFINITY = -INFINITY", score1);
534 }
535
536 return -CRM_SCORE_INFINITY;
537
538 } else if (score1 >= CRM_SCORE_INFINITY) {
539
540 if (score2 >= CRM_SCORE_INFINITY) {
541 crm_trace("+INFINITY + +INFINITY = +INFINITY");
542 } else {
543 crm_trace("+INFINITY + %d = +INFINITY", score2);
544 }
545
546 return CRM_SCORE_INFINITY;
547
548 } else if (score2 >= CRM_SCORE_INFINITY) {
549 crm_trace("%d + +INFINITY = +INFINITY", score1);
550 return CRM_SCORE_INFINITY;
551 }
552
553
554
555
556
557
558
559 if (result >= CRM_SCORE_INFINITY) {
560 crm_trace("%d + %d = +INFINITY", score1, score2);
561 return CRM_SCORE_INFINITY;
562
563 } else if (result <= -CRM_SCORE_INFINITY) {
564 crm_trace("%d + %d = -INFINITY", score1, score2);
565 return -CRM_SCORE_INFINITY;
566 }
567
568 crm_trace("%d + %d = %d", score1, score2, result);
569 return result;
570 }
571
572 void
573 add_hash_param(GHashTable * hash, const char *name, const char *value)
574 {
575 CRM_CHECK(hash != NULL, return);
576
577 crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
578 if (name == NULL || value == NULL) {
579 return;
580
581 } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
582 return;
583
584 } else if (g_hash_table_lookup(hash, name) == NULL) {
585 g_hash_table_insert(hash, strdup(name), strdup(value));
586 }
587 }
588
589 const char *
590 pe_node_attribute_calculated(const pe_node_t *node, const char *name,
591 const pe_resource_t *rsc)
592 {
593 const char *source;
594
595 if(node == NULL) {
596 return NULL;
597
598 } else if(rsc == NULL) {
599 return g_hash_table_lookup(node->details->attrs, name);
600 }
601
602 source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
603 if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) {
604 return g_hash_table_lookup(node->details->attrs, name);
605 }
606
607
608
609
610
611
612
613
614 CRM_ASSERT(node->details->remote_rsc);
615 CRM_ASSERT(node->details->remote_rsc->container);
616
617 if(node->details->remote_rsc->container->running_on) {
618 pe_node_t *host = node->details->remote_rsc->container->running_on->data;
619 pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", rsc->id, name, host->details->uname);
620 return g_hash_table_lookup(host->details->attrs, name);
621 }
622
623 pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive",
624 rsc->id, name, node->details->remote_rsc->container->id);
625 return NULL;
626 }
627
628 const char *
629 pe_node_attribute_raw(pe_node_t *node, const char *name)
630 {
631 if(node == NULL) {
632 return NULL;
633 }
634 return g_hash_table_lookup(node->details->attrs, name);
635 }