This source file includes following definitions.
- check_placement_strategy
- pe_metadata
- verify_pe_options
- pe_pref
- fail2text
- text2task
- task2text
- role2text
- text2role
- add_hash_param
- pe_node_attribute_calculated
- pe_node_attribute_raw
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11 #include <crm/crm.h>
12 #include <crm/msg_xml.h>
13 #include <crm/common/xml.h>
14 #include <crm/common/util.h>
15
16 #include <glib.h>
17
18 #include <crm/pengine/internal.h>
19
20 gboolean was_processing_error = FALSE;
21 gboolean was_processing_warning = FALSE;
22
23 static bool
24 check_placement_strategy(const char *value)
25 {
26 return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
27 "balanced", NULL);
28 }
29
30 static pcmk__cluster_option_t pe_opts[] = {
31
32
33
34
35
36 {
37 "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
38 "stop", pcmk__valid_quorum,
39 "What to do when the cluster does not have quorum",
40 NULL
41 },
42 {
43 "symmetric-cluster", NULL, "boolean", NULL,
44 "true", pcmk__valid_boolean,
45 "Whether resources can run on any node by default",
46 NULL
47 },
48 {
49 "maintenance-mode", NULL, "boolean", NULL,
50 "false", pcmk__valid_boolean,
51 "Whether the cluster should refrain from monitoring, starting, "
52 "and stopping resources",
53 NULL
54 },
55 {
56 "start-failure-is-fatal", NULL, "boolean", NULL,
57 "true", pcmk__valid_boolean,
58 "Whether a start failure should prevent a resource from being "
59 "recovered on the same node",
60 "When true, the cluster will immediately ban a resource from a node "
61 "if it fails to start there. When false, the cluster will instead "
62 "check the resource's fail count against its migration-threshold."
63 },
64 {
65 "enable-startup-probes", NULL, "boolean", NULL,
66 "true", pcmk__valid_boolean,
67 "Whether the cluster should check for active resources during start-up",
68 NULL
69 },
70 {
71 XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
72 "false", pcmk__valid_boolean,
73 "Whether to lock resources to a cleanly shut down node",
74 "When true, resources active on a node when it is cleanly shut down "
75 "are kept \"locked\" to that node (not allowed to run elsewhere) "
76 "until they start again on that node after it rejoins (or for at "
77 "most shutdown-lock-limit, if set). Stonith resources and "
78 "Pacemaker Remote connections are never locked. Clone and bundle "
79 "instances and the promoted role of promotable clones are currently"
80 " never locked, though support could be added in a future release."
81 },
82 {
83 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
84 "0", pcmk__valid_interval_spec,
85 "Do not lock resources to a cleanly shut down node longer than this",
86 "If shutdown-lock is true and this is set to a nonzero time duration, "
87 "shutdown locks will expire after this much time has passed since "
88 "the shutdown was initiated, even if the node has not rejoined."
89 },
90
91
92 {
93 "stonith-enabled", NULL, "boolean", NULL,
94 "true", pcmk__valid_boolean,
95 "*** Advanced Use Only *** "
96 "Whether nodes may be fenced as part of recovery",
97 "If false, unresponsive nodes are immediately assumed to be harmless, "
98 "and resources that were active on them may be recovered "
99 "elsewhere. This can result in a \"split-brain\" situation, "
100 "potentially leading to data loss and/or service unavailability."
101 },
102 {
103 "stonith-action", NULL, "select", "reboot, off, poweroff",
104 "reboot", pcmk__is_fencing_action,
105 "Action to send to fence device when a node needs to be fenced "
106 "(\"poweroff\" is a deprecated alias for \"off\")",
107 NULL
108 },
109 {
110 "stonith-timeout", NULL, "time", NULL,
111 "60s", pcmk__valid_interval_spec,
112 "*** Advanced Use Only *** Unused by Pacemaker",
113 "This value is not used by Pacemaker, but is kept for backward "
114 "compatibility, and certain legacy fence agents might use it."
115 },
116 {
117 XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
118 "false", pcmk__valid_boolean,
119 N_("Whether watchdog integration is enabled"),
120 "This is set automatically by the cluster according to whether SBD "
121 "is detected to be in use. User-configured values are ignored. "
122 "The value `true` is meaningful if diskless SBD is used and "
123 "`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
124 "is required, watchdog-based self-fencing will be performed via "
125 "SBD without requiring a fencing resource explicitly configured."
126 },
127 {
128 "concurrent-fencing", NULL, "boolean", NULL,
129 PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
130 "Allow performing fencing operations in parallel",
131 NULL
132 },
133 {
134 "startup-fencing", NULL, "boolean", NULL,
135 "true", pcmk__valid_boolean,
136 "*** Advanced Use Only *** Whether to fence unseen nodes at start-up",
137 "Setting this to false may lead to a \"split-brain\" situation,"
138 "potentially leading to data loss and/or service unavailability."
139 },
140 {
141 XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
142 "0", pcmk__valid_interval_spec,
143 "Apply fencing delay targeting the lost nodes with the highest total resource priority",
144 "Apply specified delay for the fencings that are targeting the lost "
145 "nodes with the highest total resource priority in case we don't "
146 "have the majority of the nodes in our cluster partition, so that "
147 "the more significant nodes potentially win any fencing match, "
148 "which is especially meaningful under split-brain of 2-node "
149 "cluster. A promoted resource instance takes the base priority + 1 "
150 "on calculation if the base priority is not 0. Any static/random "
151 "delays that are introduced by `pcmk_delay_base/max` configured "
152 "for the corresponding fencing resources will be added to this "
153 "delay. This delay should be significantly greater than, safely "
154 "twice, the maximum `pcmk_delay_base/max`. By default, priority "
155 "fencing delay is disabled."
156 },
157
158 {
159 "cluster-delay", NULL, "time", NULL,
160 "60s", pcmk__valid_interval_spec,
161 "Maximum time for node-to-node communication",
162 "The node elected Designated Controller (DC) will consider an action "
163 "failed if it does not get a response from the node executing the "
164 "action within this time (after considering the action's own "
165 "timeout). The \"correct\" value will depend on the speed and "
166 "load of your network and cluster nodes."
167 },
168 {
169 "batch-limit", NULL, "integer", NULL,
170 "0", pcmk__valid_number,
171 "Maximum number of jobs that the cluster may execute in parallel "
172 "across all nodes",
173 "The \"correct\" value will depend on the speed and load of your "
174 "network and cluster nodes. If set to 0, the cluster will "
175 "impose a dynamically calculated limit when any node has a "
176 "high load."
177 },
178 {
179 "migration-limit", NULL, "integer", NULL,
180 "-1", pcmk__valid_number,
181 "The number of live migration actions that the cluster is allowed "
182 "to execute in parallel on a node (-1 means no limit)"
183 },
184
185
186 {
187 "stop-all-resources", NULL, "boolean", NULL,
188 "false", pcmk__valid_boolean,
189 "Whether the cluster should stop all active resources",
190 NULL
191 },
192 {
193 "stop-orphan-resources", NULL, "boolean", NULL,
194 "true", pcmk__valid_boolean,
195 "Whether to stop resources that were removed from the configuration",
196 NULL
197 },
198 {
199 "stop-orphan-actions", NULL, "boolean", NULL,
200 "true", pcmk__valid_boolean,
201 "Whether to cancel recurring actions removed from the configuration",
202 NULL
203 },
204 {
205 "remove-after-stop", NULL, "boolean", NULL,
206 "false", pcmk__valid_boolean,
207 "*** Deprecated *** Whether to remove stopped resources from "
208 "the executor",
209 "Values other than default are poorly tested and potentially dangerous."
210 " This option will be removed in a future release."
211 },
212
213
214 {
215 "pe-error-series-max", NULL, "integer", NULL,
216 "-1", pcmk__valid_number,
217 "The number of scheduler inputs resulting in errors to save",
218 "Zero to disable, -1 to store unlimited."
219 },
220 {
221 "pe-warn-series-max", NULL, "integer", NULL,
222 "5000", pcmk__valid_number,
223 "The number of scheduler inputs resulting in warnings to save",
224 "Zero to disable, -1 to store unlimited."
225 },
226 {
227 "pe-input-series-max", NULL, "integer", NULL,
228 "4000", pcmk__valid_number,
229 "The number of scheduler inputs without errors or warnings to save",
230 "Zero to disable, -1 to store unlimited."
231 },
232
233
234 {
235 PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select",
236 PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", "
237 PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", "
238 PCMK__VALUE_CUSTOM,
239 PCMK__VALUE_NONE, pcmk__validate_health_strategy,
240 "How cluster should react to node health attributes",
241 "Requires external entities to create node attributes (named with "
242 "the prefix \"#health\") with values \"" PCMK__VALUE_RED "\", "
243 "\"" PCMK__VALUE_YELLOW "\", or \"" PCMK__VALUE_GREEN "\"."
244 },
245 {
246 PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL,
247 "0", pcmk__valid_number,
248 "Base health score assigned to a node",
249 "Only used when " PCMK__OPT_NODE_HEALTH_STRATEGY " is set to "
250 PCMK__VALUE_PROGRESSIVE "."
251 },
252 {
253 PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL,
254 "0", pcmk__valid_number,
255 "The score to use for a node health attribute whose value is \""
256 PCMK__VALUE_GREEN "\"",
257 "Only used when " PCMK__OPT_NODE_HEALTH_STRATEGY " is set to "
258 PCMK__VALUE_CUSTOM " or " PCMK__VALUE_PROGRESSIVE "."
259 },
260 {
261 PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL,
262 "0", pcmk__valid_number,
263 "The score to use for a node health attribute whose value is \""
264 PCMK__VALUE_YELLOW "\"",
265 "Only used when " PCMK__OPT_NODE_HEALTH_STRATEGY " is set to "
266 PCMK__VALUE_CUSTOM " or " PCMK__VALUE_PROGRESSIVE "."
267 },
268 {
269 PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL,
270 "-INFINITY", pcmk__valid_number,
271 "The score to use for a node health attribute whose value is \""
272 PCMK__VALUE_RED "\"",
273 "Only used when " PCMK__OPT_NODE_HEALTH_STRATEGY " is set to "
274 PCMK__VALUE_CUSTOM " or " PCMK__VALUE_PROGRESSIVE "."
275 },
276
277
278 {
279 "placement-strategy", NULL, "select",
280 "default, utilization, minimal, balanced",
281 "default", check_placement_strategy,
282 "How the cluster should allocate resources to nodes",
283 NULL
284 },
285 };
286
287 void
288 pe_metadata(pcmk__output_t *out)
289 {
290 char *s = pcmk__format_option_metadata("pacemaker-schedulerd",
291 "Pacemaker scheduler options",
292 "Cluster options used by Pacemaker's scheduler",
293 pe_opts, PCMK__NELEM(pe_opts));
294 out->output_xml(out, "metadata", s);
295 free(s);
296 }
297
298 void
299 verify_pe_options(GHashTable * options)
300 {
301 pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
302 }
303
304 const char *
305 pe_pref(GHashTable * options, const char *name)
306 {
307 return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
308 }
309
310 const char *
311 fail2text(enum action_fail_response fail)
312 {
313 const char *result = "<unknown>";
314
315 switch (fail) {
316 case action_fail_ignore:
317 result = "ignore";
318 break;
319 case action_fail_demote:
320 result = "demote";
321 break;
322 case action_fail_block:
323 result = "block";
324 break;
325 case action_fail_recover:
326 result = "recover";
327 break;
328 case action_fail_migrate:
329 result = "migrate";
330 break;
331 case action_fail_stop:
332 result = "stop";
333 break;
334 case action_fail_fence:
335 result = "fence";
336 break;
337 case action_fail_standby:
338 result = "standby";
339 break;
340 case action_fail_restart_container:
341 result = "restart-container";
342 break;
343 case action_fail_reset_remote:
344 result = "reset-remote";
345 break;
346 }
347 return result;
348 }
349
350 enum action_tasks
351 text2task(const char *task)
352 {
353 if (pcmk__str_eq(task, CRMD_ACTION_STOP, pcmk__str_casei)) {
354 return stop_rsc;
355 } else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
356 return stopped_rsc;
357 } else if (pcmk__str_eq(task, CRMD_ACTION_START, pcmk__str_casei)) {
358 return start_rsc;
359 } else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
360 return started_rsc;
361 } else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
362 return shutdown_crm;
363 } else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
364 return stonith_node;
365 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
366 return monitor_rsc;
367 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei)) {
368 return action_notify;
369 } else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
370 return action_notified;
371 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) {
372 return action_promote;
373 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTE, pcmk__str_casei)) {
374 return action_demote;
375 } else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
376 return action_promoted;
377 } else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
378 return action_demoted;
379 }
380 #if SUPPORT_TRACING
381 if (pcmk__str_eq(task, CRMD_ACTION_CANCEL, pcmk__str_casei)) {
382 return no_action;
383 } else if (pcmk__str_eq(task, CRMD_ACTION_DELETE, pcmk__str_casei)) {
384 return no_action;
385 } else if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei)) {
386 return no_action;
387 } else if (pcmk__str_eq(task, CRM_OP_LRM_REFRESH, pcmk__str_casei)) {
388 return no_action;
389 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
390 return no_action;
391 } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
392 return no_action;
393 }
394 crm_trace("Unsupported action: %s", task);
395 #endif
396
397 return no_action;
398 }
399
400 const char *
401 task2text(enum action_tasks task)
402 {
403 const char *result = "<unknown>";
404
405 switch (task) {
406 case no_action:
407 result = "no_action";
408 break;
409 case stop_rsc:
410 result = CRMD_ACTION_STOP;
411 break;
412 case stopped_rsc:
413 result = CRMD_ACTION_STOPPED;
414 break;
415 case start_rsc:
416 result = CRMD_ACTION_START;
417 break;
418 case started_rsc:
419 result = CRMD_ACTION_STARTED;
420 break;
421 case shutdown_crm:
422 result = CRM_OP_SHUTDOWN;
423 break;
424 case stonith_node:
425 result = CRM_OP_FENCE;
426 break;
427 case monitor_rsc:
428 result = CRMD_ACTION_STATUS;
429 break;
430 case action_notify:
431 result = CRMD_ACTION_NOTIFY;
432 break;
433 case action_notified:
434 result = CRMD_ACTION_NOTIFIED;
435 break;
436 case action_promote:
437 result = CRMD_ACTION_PROMOTE;
438 break;
439 case action_promoted:
440 result = CRMD_ACTION_PROMOTED;
441 break;
442 case action_demote:
443 result = CRMD_ACTION_DEMOTE;
444 break;
445 case action_demoted:
446 result = CRMD_ACTION_DEMOTED;
447 break;
448 }
449
450 return result;
451 }
452
453 const char *
454 role2text(enum rsc_role_e role)
455 {
456 switch (role) {
457 case RSC_ROLE_UNKNOWN:
458 return RSC_ROLE_UNKNOWN_S;
459 case RSC_ROLE_STOPPED:
460 return RSC_ROLE_STOPPED_S;
461 case RSC_ROLE_STARTED:
462 return RSC_ROLE_STARTED_S;
463 case RSC_ROLE_UNPROMOTED:
464 #ifdef PCMK__COMPAT_2_0
465 return RSC_ROLE_UNPROMOTED_LEGACY_S;
466 #else
467 return RSC_ROLE_UNPROMOTED_S;
468 #endif
469 case RSC_ROLE_PROMOTED:
470 #ifdef PCMK__COMPAT_2_0
471 return RSC_ROLE_PROMOTED_LEGACY_S;
472 #else
473 return RSC_ROLE_PROMOTED_S;
474 #endif
475 }
476 CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
477 CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
478
479 return RSC_ROLE_UNKNOWN_S;
480 }
481
482 enum rsc_role_e
483 text2role(const char *role)
484 {
485 CRM_ASSERT(role != NULL);
486 if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
487 return RSC_ROLE_STOPPED;
488 } else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
489 return RSC_ROLE_STARTED;
490 } else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
491 RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
492 return RSC_ROLE_UNPROMOTED;
493 } else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
494 RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
495 return RSC_ROLE_PROMOTED;
496 } else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
497 return RSC_ROLE_UNKNOWN;
498 }
499 crm_err("Unknown role: %s", role);
500 return RSC_ROLE_UNKNOWN;
501 }
502
503 void
504 add_hash_param(GHashTable * hash, const char *name, const char *value)
505 {
506 CRM_CHECK(hash != NULL, return);
507
508 crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
509 if (name == NULL || value == NULL) {
510 return;
511
512 } else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
513 return;
514
515 } else if (g_hash_table_lookup(hash, name) == NULL) {
516 g_hash_table_insert(hash, strdup(name), strdup(value));
517 }
518 }
519
520 const char *
521 pe_node_attribute_calculated(const pe_node_t *node, const char *name,
522 const pe_resource_t *rsc)
523 {
524 const char *source;
525
526 if(node == NULL) {
527 return NULL;
528
529 } else if(rsc == NULL) {
530 return g_hash_table_lookup(node->details->attrs, name);
531 }
532
533 source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
534 if(source == NULL || !pcmk__str_eq("host", source, pcmk__str_casei)) {
535 return g_hash_table_lookup(node->details->attrs, name);
536 }
537
538
539
540
541
542
543
544
545 CRM_ASSERT(node->details->remote_rsc);
546 CRM_ASSERT(node->details->remote_rsc->container);
547
548 if(node->details->remote_rsc->container->running_on) {
549 pe_node_t *host = node->details->remote_rsc->container->running_on->data;
550 pe_rsc_trace(rsc, "%s: Looking for %s on the container host %s", rsc->id, name, host->details->uname);
551 return g_hash_table_lookup(host->details->attrs, name);
552 }
553
554 pe_rsc_trace(rsc, "%s: Not looking for %s on the container host: %s is inactive",
555 rsc->id, name, node->details->remote_rsc->container->id);
556 return NULL;
557 }
558
559 const char *
560 pe_node_attribute_raw(pe_node_t *node, const char *name)
561 {
562 if(node == NULL) {
563 return NULL;
564 }
565 return g_hash_table_lookup(node->details->attrs, name);
566 }