This source file includes following definitions.
- is_dangling_guest_node
- pe_fence_node
- set_if_xpath
- unpack_config
- pe_create_node
- expand_remote_rsc_meta
- handle_startup_fencing
- unpack_nodes
- setup_container
- unpack_remote_nodes
- link_rsc2remotenode
- destroy_tag
- unpack_resources
- unpack_tags
- unpack_ticket_state
- unpack_tickets_state
- unpack_handle_remote_attrs
- unpack_transient_attributes
- unpack_node_state
- unpack_node_history
- unpack_status
- unpack_node_member
- unpack_node_online
- unpack_node_terminate
- determine_online_status_no_fencing
- pending_too_long
- determine_online_status_fencing
- determine_remote_online_status
- determine_online_status
- pe_base_name_end
- clone_strip
- clone_zero
- create_fake_resource
- create_anonymous_orphan
- find_anonymous_clone
- unpack_find_resource
- process_orphan_resource
- process_rsc_state
- process_recurring
- calculate_active_ops
- unpack_shutdown_lock
- unpack_lrm_resource
- handle_orphaned_container_fillers
- unpack_node_lrm
- set_active
- set_node_score
- find_lrm_op
- find_lrm_resource
- unknown_on_node
- monitor_not_running_after
- non_monitor_after
- newer_state_after_migrate
- get_migration_node_names
- add_dangling_migration
- unpack_migrate_to_success
- unpack_migrate_to_failure
- unpack_migrate_from_failure
- record_failed_op
- last_change_str
- cmp_on_fail
- ban_from_all_nodes
- unpack_failure_handling
- unpack_rsc_op_failure
- block_if_unrecoverable
- remap_because
- remap_operation
- should_clear_for_param_change
- order_after_remote_fencing
- should_ignore_failure_timeout
- check_operation_expiry
- pe__target_rc_from_xml
- update_resource_state
- can_affect_state
- unpack_action_result
- process_expired_result
- mask_probe_failure
- failure_is_newer
- process_pending_action
- unpack_rsc_op
- add_node_attrs
- extract_operations
- find_operations
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/msg_xml.h>
20 #include <crm/common/xml.h>
21 #include <crm/common/xml_internal.h>
22
23 #include <crm/common/util.h>
24 #include <crm/pengine/rules.h>
25 #include <crm/pengine/internal.h>
26 #include <pe_status_private.h>
27
28 CRM_TRACE_INIT_DATA(pe_status);
29
30
31 struct action_history {
32 pcmk_resource_t *rsc;
33 pcmk_node_t *node;
34 xmlNode *xml;
35
36
37 const char *id;
38 const char *key;
39 const char *task;
40 const char *exit_reason;
41 guint interval_ms;
42 int call_id;
43 int expected_exit_status;
44 int exit_status;
45 int execution_status;
46 };
47
48
49
50
51
52 #define set_config_flag(scheduler, option, flag) do { \
53 const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \
54 if (scf_value != NULL) { \
55 if (crm_is_true(scf_value)) { \
56 (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \
57 LOG_TRACE, "Scheduler", \
58 crm_system_name, (scheduler)->flags, \
59 (flag), #flag); \
60 } else { \
61 (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
62 LOG_TRACE, "Scheduler", \
63 crm_system_name, (scheduler)->flags, \
64 (flag), #flag); \
65 } \
66 } \
67 } while(0)
68
69 static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node,
70 xmlNode *xml_op, xmlNode **last_failure,
71 enum action_fail_response *failed);
72 static void determine_remote_online_status(pcmk_scheduler_t *scheduler,
73 pcmk_node_t *this_node);
74 static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node,
75 bool overwrite, pcmk_scheduler_t *scheduler);
76 static void determine_online_status(const xmlNode *node_state,
77 pcmk_node_t *this_node,
78 pcmk_scheduler_t *scheduler);
79
80 static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
81 pcmk_scheduler_t *scheduler);
82
83
84 static gboolean
85 is_dangling_guest_node(pcmk_node_t *node)
86 {
87
88
89
90 if (pe__is_guest_or_remote_node(node) &&
91 node->details->remote_rsc &&
92 node->details->remote_rsc->container == NULL &&
93 pcmk_is_set(node->details->remote_rsc->flags,
94 pcmk_rsc_removed_filler)) {
95 return TRUE;
96 }
97
98 return FALSE;
99 }
100
101
102
103
104
105
106
107
108
109 void
110 pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node,
111 const char *reason, bool priority_delay)
112 {
113 CRM_CHECK(node, return);
114
115
116 if (pe__is_guest_node(node)) {
117 pcmk_resource_t *rsc = node->details->remote_rsc->container;
118
119 if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
120 if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
121 crm_notice("Not fencing guest node %s "
122 "(otherwise would because %s): "
123 "its guest resource %s is unmanaged",
124 pe__node_name(node), reason, rsc->id);
125 } else {
126 crm_warn("Guest node %s will be fenced "
127 "(by recovering its guest resource %s): %s",
128 pe__node_name(node), rsc->id, reason);
129
130
131
132
133
134 node->details->remote_requires_reset = TRUE;
135 pe__set_resource_flags(rsc,
136 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
137 }
138 }
139
140 } else if (is_dangling_guest_node(node)) {
141 crm_info("Cleaning up dangling connection for guest node %s: "
142 "fencing was already done because %s, "
143 "and guest resource no longer exists",
144 pe__node_name(node), reason);
145 pe__set_resource_flags(node->details->remote_rsc,
146 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
147
148 } else if (pe__is_remote_node(node)) {
149 pcmk_resource_t *rsc = node->details->remote_rsc;
150
151 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
152 crm_notice("Not fencing remote node %s "
153 "(otherwise would because %s): connection is unmanaged",
154 pe__node_name(node), reason);
155 } else if(node->details->remote_requires_reset == FALSE) {
156 node->details->remote_requires_reset = TRUE;
157 crm_warn("Remote node %s %s: %s",
158 pe__node_name(node),
159 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
160 reason);
161 }
162 node->details->unclean = TRUE;
163
164 pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler);
165
166 } else if (node->details->unclean) {
167 crm_trace("Cluster node %s %s because %s",
168 pe__node_name(node),
169 pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean",
170 reason);
171
172 } else {
173 crm_warn("Cluster node %s %s: %s",
174 pe__node_name(node),
175 pe_can_fence(scheduler, node)? "will be fenced" : "is unclean",
176 reason);
177 node->details->unclean = TRUE;
178 pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler);
179 }
180 }
181
182
183
184
185 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
186 "[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
187 "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
188 "and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
189
190
191 #define XPATH_ENABLE_UNFENCING \
192 "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
193 "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
194 "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
195 "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
196
197 static void
198 set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler)
199 {
200 xmlXPathObjectPtr result = NULL;
201
202 if (!pcmk_is_set(scheduler->flags, flag)) {
203 result = xpath_search(scheduler->input, xpath);
204 if (result && (numXpathResults(result) > 0)) {
205 pe__set_working_set_flags(scheduler, flag);
206 }
207 freeXpathObject(result);
208 }
209 }
210
211 gboolean
212 unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
213 {
214 const char *value = NULL;
215 GHashTable *config_hash = pcmk__strkey_table(free, free);
216
217 pe_rule_eval_data_t rule_data = {
218 .node_hash = NULL,
219 .role = pcmk_role_unknown,
220 .now = scheduler->now,
221 .match_data = NULL,
222 .rsc_data = NULL,
223 .op_data = NULL
224 };
225
226 scheduler->config_hash = config_hash;
227
228 pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
229 CIB_OPTIONS_FIRST, FALSE, scheduler);
230
231 verify_pe_options(scheduler->config_hash);
232
233 set_config_flag(scheduler, "enable-startup-probes",
234 pcmk_sched_probe_resources);
235 if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) {
236 crm_info("Startup probes: disabled (dangerous)");
237 }
238
239 value = pe_pref(scheduler->config_hash, XML_ATTR_HAVE_WATCHDOG);
240 if (value && crm_is_true(value)) {
241 crm_info("Watchdog-based self-fencing will be performed via SBD if "
242 "fencing is required and stonith-watchdog-timeout is nonzero");
243 pe__set_working_set_flags(scheduler, pcmk_sched_have_fencing);
244 }
245
246
247
248
249 set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING,
250 scheduler);
251
252 value = pe_pref(scheduler->config_hash, "stonith-timeout");
253 scheduler->stonith_timeout = (int) crm_parse_interval_spec(value);
254 crm_debug("STONITH timeout: %d", scheduler->stonith_timeout);
255
256 set_config_flag(scheduler, "stonith-enabled", pcmk_sched_fencing_enabled);
257 if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
258 crm_debug("STONITH of failed nodes is enabled");
259 } else {
260 crm_debug("STONITH of failed nodes is disabled");
261 }
262
263 scheduler->stonith_action = pe_pref(scheduler->config_hash,
264 "stonith-action");
265 if (!strcmp(scheduler->stonith_action, "poweroff")) {
266 pe_warn_once(pcmk__wo_poweroff,
267 "Support for stonith-action of 'poweroff' is deprecated "
268 "and will be removed in a future release (use 'off' instead)");
269 scheduler->stonith_action = PCMK_ACTION_OFF;
270 }
271 crm_trace("STONITH will %s nodes", scheduler->stonith_action);
272
273 set_config_flag(scheduler, "concurrent-fencing",
274 pcmk_sched_concurrent_fencing);
275 if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) {
276 crm_debug("Concurrent fencing is enabled");
277 } else {
278 crm_debug("Concurrent fencing is disabled");
279 }
280
281 value = pe_pref(scheduler->config_hash,
282 XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
283 if (value) {
284 scheduler->priority_fencing_delay = crm_parse_interval_spec(value)
285 / 1000;
286 crm_trace("Priority fencing delay is %ds",
287 scheduler->priority_fencing_delay);
288 }
289
290 set_config_flag(scheduler, "stop-all-resources", pcmk_sched_stop_all);
291 crm_debug("Stop all active resources: %s",
292 pcmk__btoa(pcmk_is_set(scheduler->flags, pcmk_sched_stop_all)));
293
294 set_config_flag(scheduler, "symmetric-cluster",
295 pcmk_sched_symmetric_cluster);
296 if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) {
297 crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
298 }
299
300 value = pe_pref(scheduler->config_hash, "no-quorum-policy");
301
302 if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
303 scheduler->no_quorum_policy = pcmk_no_quorum_ignore;
304
305 } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
306 scheduler->no_quorum_policy = pcmk_no_quorum_freeze;
307
308 } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
309 scheduler->no_quorum_policy = pcmk_no_quorum_demote;
310
311 } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
312 if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
313 int do_panic = 0;
314
315 crm_element_value_int(scheduler->input, XML_ATTR_QUORUM_PANIC,
316 &do_panic);
317 if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) {
318 scheduler->no_quorum_policy = pcmk_no_quorum_fence;
319 } else {
320 crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
321 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
322 }
323 } else {
324 pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
325 "fencing is disabled");
326 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
327 }
328
329 } else {
330 scheduler->no_quorum_policy = pcmk_no_quorum_stop;
331 }
332
333 switch (scheduler->no_quorum_policy) {
334 case pcmk_no_quorum_freeze:
335 crm_debug("On loss of quorum: Freeze resources");
336 break;
337 case pcmk_no_quorum_stop:
338 crm_debug("On loss of quorum: Stop ALL resources");
339 break;
340 case pcmk_no_quorum_demote:
341 crm_debug("On loss of quorum: "
342 "Demote promotable resources and stop other resources");
343 break;
344 case pcmk_no_quorum_fence:
345 crm_notice("On loss of quorum: Fence all remaining nodes");
346 break;
347 case pcmk_no_quorum_ignore:
348 crm_notice("On loss of quorum: Ignore");
349 break;
350 }
351
352 set_config_flag(scheduler, "stop-orphan-resources",
353 pcmk_sched_stop_removed_resources);
354 if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
355 crm_trace("Orphan resources are stopped");
356 } else {
357 crm_trace("Orphan resources are ignored");
358 }
359
360 set_config_flag(scheduler, "stop-orphan-actions",
361 pcmk_sched_cancel_removed_actions);
362 if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) {
363 crm_trace("Orphan resource actions are stopped");
364 } else {
365 crm_trace("Orphan resource actions are ignored");
366 }
367
368 value = pe_pref(scheduler->config_hash, "remove-after-stop");
369 if (value != NULL) {
370 if (crm_is_true(value)) {
371 pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop);
372 #ifndef PCMK__COMPAT_2_0
373 pe_warn_once(pcmk__wo_remove_after,
374 "Support for the remove-after-stop cluster property is"
375 " deprecated and will be removed in a future release");
376 #endif
377 } else {
378 pe__clear_working_set_flags(scheduler,
379 pcmk_sched_remove_after_stop);
380 }
381 }
382
383 set_config_flag(scheduler, "maintenance-mode", pcmk_sched_in_maintenance);
384 crm_trace("Maintenance mode: %s",
385 pcmk__btoa(pcmk_is_set(scheduler->flags,
386 pcmk_sched_in_maintenance)));
387
388 set_config_flag(scheduler, "start-failure-is-fatal",
389 pcmk_sched_start_failure_fatal);
390 if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
391 crm_trace("Start failures are always fatal");
392 } else {
393 crm_trace("Start failures are handled by failcount");
394 }
395
396 if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
397 set_config_flag(scheduler, "startup-fencing",
398 pcmk_sched_startup_fencing);
399 }
400 if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
401 crm_trace("Unseen nodes will be fenced");
402 } else {
403 pe_warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes");
404 }
405
406 pe__unpack_node_health_scores(scheduler);
407
408 scheduler->placement_strategy = pe_pref(scheduler->config_hash,
409 "placement-strategy");
410 crm_trace("Placement strategy: %s", scheduler->placement_strategy);
411
412 set_config_flag(scheduler, "shutdown-lock", pcmk_sched_shutdown_lock);
413 if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
414 value = pe_pref(scheduler->config_hash,
415 XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
416 scheduler->shutdown_lock = crm_parse_interval_spec(value) / 1000;
417 crm_trace("Resources will be locked to nodes that were cleanly "
418 "shut down (locks expire after %s)",
419 pcmk__readable_interval(scheduler->shutdown_lock));
420 } else {
421 crm_trace("Resources will not be locked to nodes that were cleanly "
422 "shut down");
423 }
424
425 value = pe_pref(scheduler->config_hash,
426 XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
427 scheduler->node_pending_timeout = crm_parse_interval_spec(value) / 1000;
428 if (scheduler->node_pending_timeout == 0) {
429 crm_trace("Do not fence pending nodes");
430 } else {
431 crm_trace("Fence pending nodes after %s",
432 pcmk__readable_interval(scheduler->node_pending_timeout
433 * 1000));
434 }
435
436 return TRUE;
437 }
438
439 pcmk_node_t *
440 pe_create_node(const char *id, const char *uname, const char *type,
441 const char *score, pcmk_scheduler_t *scheduler)
442 {
443 pcmk_node_t *new_node = NULL;
444
445 if (pe_find_node(scheduler->nodes, uname) != NULL) {
446 pcmk__config_warn("More than one node entry has name '%s'", uname);
447 }
448
449 new_node = calloc(1, sizeof(pcmk_node_t));
450 if (new_node == NULL) {
451 return NULL;
452 }
453
454 new_node->weight = char2score(score);
455 new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
456
457 if (new_node->details == NULL) {
458 free(new_node);
459 return NULL;
460 }
461
462 crm_trace("Creating node for entry %s/%s", uname, id);
463 new_node->details->id = id;
464 new_node->details->uname = uname;
465 new_node->details->online = FALSE;
466 new_node->details->shutdown = FALSE;
467 new_node->details->rsc_discovery_enabled = TRUE;
468 new_node->details->running_rsc = NULL;
469 new_node->details->data_set = scheduler;
470
471 if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
472 new_node->details->type = pcmk_node_variant_cluster;
473
474 } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
475 new_node->details->type = pcmk_node_variant_remote;
476 pe__set_working_set_flags(scheduler, pcmk_sched_have_remote_nodes);
477
478 } else {
479
480
481
482 if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
483 pcmk__config_warn("Node %s has unrecognized type '%s', "
484 "assuming 'ping'", pcmk__s(uname, "without name"),
485 type);
486 }
487 pe_warn_once(pcmk__wo_ping_node,
488 "Support for nodes of type 'ping' (such as %s) is "
489 "deprecated and will be removed in a future release",
490 pcmk__s(uname, "unnamed node"));
491 new_node->details->type = node_ping;
492 }
493
494 new_node->details->attrs = pcmk__strkey_table(free, free);
495
496 if (pe__is_guest_or_remote_node(new_node)) {
497 g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
498 strdup("remote"));
499 } else {
500 g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
501 strdup("cluster"));
502 }
503
504 new_node->details->utilization = pcmk__strkey_table(free, free);
505 new_node->details->digest_cache = pcmk__strkey_table(free,
506 pe__free_digests);
507
508 scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node,
509 pe__cmp_node_name);
510 return new_node;
511 }
512
513 static const char *
514 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data)
515 {
516 xmlNode *attr_set = NULL;
517 xmlNode *attr = NULL;
518
519 const char *container_id = ID(xml_obj);
520 const char *remote_name = NULL;
521 const char *remote_server = NULL;
522 const char *remote_port = NULL;
523 const char *connect_timeout = "60s";
524 const char *remote_allow_migrate=NULL;
525 const char *is_managed = NULL;
526
527 for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
528 attr_set = pcmk__xe_next(attr_set)) {
529
530 if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
531 pcmk__str_casei)) {
532 continue;
533 }
534
535 for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
536 attr = pcmk__xe_next(attr)) {
537 const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
538 const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
539
540 if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
541 remote_name = value;
542 } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
543 remote_server = value;
544 } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
545 remote_port = value;
546 } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
547 connect_timeout = value;
548 } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
549 remote_allow_migrate=value;
550 } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
551 is_managed = value;
552 }
553 }
554 }
555
556 if (remote_name == NULL) {
557 return NULL;
558 }
559
560 if (pe_find_resource(data->resources, remote_name) != NULL) {
561 return NULL;
562 }
563
564 pe_create_remote_xml(parent, remote_name, container_id,
565 remote_allow_migrate, is_managed,
566 connect_timeout, remote_server, remote_port);
567 return remote_name;
568 }
569
570 static void
571 handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node)
572 {
573 if ((new_node->details->type == pcmk_node_variant_remote)
574 && (new_node->details->remote_rsc == NULL)) {
575
576
577
578
579 return;
580 }
581
582 if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) {
583
584 new_node->details->unclean = TRUE;
585
586 } else {
587
588 new_node->details->unclean = FALSE;
589 }
590
591
592
593 new_node->details->unseen = TRUE;
594 }
595
596 gboolean
597 unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler)
598 {
599 xmlNode *xml_obj = NULL;
600 pcmk_node_t *new_node = NULL;
601 const char *id = NULL;
602 const char *uname = NULL;
603 const char *type = NULL;
604 const char *score = NULL;
605
606 for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
607 xml_obj = pcmk__xe_next(xml_obj)) {
608
609 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
610 new_node = NULL;
611
612 id = crm_element_value(xml_obj, XML_ATTR_ID);
613 uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
614 type = crm_element_value(xml_obj, XML_ATTR_TYPE);
615 score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
616 crm_trace("Processing node %s/%s", uname, id);
617
618 if (id == NULL) {
619 pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE
620 "> entry in configuration without id");
621 continue;
622 }
623 new_node = pe_create_node(id, uname, type, score, scheduler);
624
625 if (new_node == NULL) {
626 return FALSE;
627 }
628
629 handle_startup_fencing(scheduler, new_node);
630
631 add_node_attrs(xml_obj, new_node, FALSE, scheduler);
632
633 crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
634 }
635 }
636
637 if (scheduler->localhost
638 && (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) {
639 crm_info("Creating a fake local node");
640 pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0,
641 scheduler);
642 }
643
644 return TRUE;
645 }
646
647 static void
648 setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
649 {
650 const char *container_id = NULL;
651
652 if (rsc->children) {
653 g_list_foreach(rsc->children, (GFunc) setup_container, scheduler);
654 return;
655 }
656
657 container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
658 if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
659 pcmk_resource_t *container = pe_find_resource(scheduler->resources,
660 container_id);
661
662 if (container) {
663 rsc->container = container;
664 pe__set_resource_flags(container, pcmk_rsc_has_filler);
665 container->fillers = g_list_append(container->fillers, rsc);
666 pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
667 } else {
668 pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
669 }
670 }
671 }
672
673 gboolean
674 unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
675 {
676 xmlNode *xml_obj = NULL;
677
678
679
680
681 for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
682 xml_obj = pcmk__xe_next(xml_obj)) {
683
684 const char *new_node_id = NULL;
685
686
687
688
689 if (xml_contains_remote_node(xml_obj)) {
690 new_node_id = ID(xml_obj);
691
692
693 if (new_node_id
694 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
695 crm_trace("Found remote node %s defined by resource %s",
696 new_node_id, ID(xml_obj));
697 pe_create_node(new_node_id, new_node_id, "remote", NULL,
698 scheduler);
699 }
700 continue;
701 }
702
703
704
705
706 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
707
708
709
710
711 new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources,
712 scheduler);
713 if (new_node_id
714 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
715 crm_trace("Found guest node %s in resource %s",
716 new_node_id, ID(xml_obj));
717 pe_create_node(new_node_id, new_node_id, "remote", NULL,
718 scheduler);
719 }
720 continue;
721 }
722
723
724
725
726 if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
727 xmlNode *xml_obj2 = NULL;
728 for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
729 xml_obj2 = pcmk__xe_next(xml_obj2)) {
730
731 new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources,
732 scheduler);
733
734 if (new_node_id
735 && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) {
736 crm_trace("Found guest node %s in resource %s inside group %s",
737 new_node_id, ID(xml_obj2), ID(xml_obj));
738 pe_create_node(new_node_id, new_node_id, "remote", NULL,
739 scheduler);
740 }
741 }
742 }
743 }
744 return TRUE;
745 }
746
747
748
749
750
751
752
753
754
755 static void
756 link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc)
757 {
758 pcmk_node_t *remote_node = NULL;
759
760 if (new_rsc->is_remote_node == FALSE) {
761 return;
762 }
763
764 if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
765
766 return;
767 }
768
769 remote_node = pe_find_node(scheduler->nodes, new_rsc->id);
770 CRM_CHECK(remote_node != NULL, return);
771
772 pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
773 new_rsc->id, pe__node_name(remote_node));
774 remote_node->details->remote_rsc = new_rsc;
775
776 if (new_rsc->container == NULL) {
777
778
779
780 handle_startup_fencing(scheduler, remote_node);
781
782 } else {
783
784
785
786 g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
787 strdup("container"));
788 }
789 }
790
791 static void
792 destroy_tag(gpointer data)
793 {
794 pcmk_tag_t *tag = data;
795
796 if (tag) {
797 free(tag->id);
798 g_list_free_full(tag->refs, free);
799 free(tag);
800 }
801 }
802
803
804
805
806
807
808
809
810
811
812
813
814
815 gboolean
816 unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler)
817 {
818 xmlNode *xml_obj = NULL;
819 GList *gIter = NULL;
820
821 scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
822
823 for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
824 xml_obj = pcmk__xe_next(xml_obj)) {
825
826 pcmk_resource_t *new_rsc = NULL;
827 const char *id = ID(xml_obj);
828
829 if (pcmk__str_empty(id)) {
830 pcmk__config_err("Ignoring <%s> resource without ID",
831 xml_obj->name);
832 continue;
833 }
834
835 if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
836 pcmk__str_none)) {
837 if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id,
838 NULL, NULL) == FALSE) {
839
840 g_hash_table_insert(scheduler->template_rsc_sets, strdup(id),
841 NULL);
842 }
843 continue;
844 }
845
846 crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>",
847 xml_obj->name, id);
848 if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
849 scheduler) == pcmk_rc_ok) {
850 scheduler->resources = g_list_append(scheduler->resources, new_rsc);
851 pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
852
853 } else {
854 pcmk__config_err("Ignoring <%s> resource '%s' "
855 "because configuration is invalid",
856 xml_obj->name, id);
857 }
858 }
859
860 for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) {
861 pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data;
862
863 setup_container(rsc, scheduler);
864 link_rsc2remotenode(scheduler, rsc);
865 }
866
867 scheduler->resources = g_list_sort(scheduler->resources,
868 pe__cmp_rsc_priority);
869 if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) {
870
871
872 } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)
873 && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) {
874
875 pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
876 pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
877 pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
878 }
879
880 return TRUE;
881 }
882
883 gboolean
884 unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler)
885 {
886 xmlNode *xml_tag = NULL;
887
888 scheduler->tags = pcmk__strkey_table(free, destroy_tag);
889
890 for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
891 xml_tag = pcmk__xe_next(xml_tag)) {
892
893 xmlNode *xml_obj_ref = NULL;
894 const char *tag_id = ID(xml_tag);
895
896 if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
897 continue;
898 }
899
900 if (tag_id == NULL) {
901 pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
902 (const char *) xml_tag->name);
903 continue;
904 }
905
906 for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
907 xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
908
909 const char *obj_ref = ID(xml_obj_ref);
910
911 if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
912 continue;
913 }
914
915 if (obj_ref == NULL) {
916 pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
917 xml_obj_ref->name, tag_id);
918 continue;
919 }
920
921 if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) {
922 return FALSE;
923 }
924 }
925 }
926
927 return TRUE;
928 }
929
930
931
932 static gboolean
933 unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler)
934 {
935 const char *ticket_id = NULL;
936 const char *granted = NULL;
937 const char *last_granted = NULL;
938 const char *standby = NULL;
939 xmlAttrPtr xIter = NULL;
940
941 pcmk_ticket_t *ticket = NULL;
942
943 ticket_id = ID(xml_ticket);
944 if (pcmk__str_empty(ticket_id)) {
945 return FALSE;
946 }
947
948 crm_trace("Processing ticket state for %s", ticket_id);
949
950 ticket = g_hash_table_lookup(scheduler->tickets, ticket_id);
951 if (ticket == NULL) {
952 ticket = ticket_new(ticket_id, scheduler);
953 if (ticket == NULL) {
954 return FALSE;
955 }
956 }
957
958 for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
959 const char *prop_name = (const char *)xIter->name;
960 const char *prop_value = pcmk__xml_attr_value(xIter);
961
962 if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
963 continue;
964 }
965 g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
966 }
967
968 granted = g_hash_table_lookup(ticket->state, "granted");
969 if (granted && crm_is_true(granted)) {
970 ticket->granted = TRUE;
971 crm_info("We have ticket '%s'", ticket->id);
972 } else {
973 ticket->granted = FALSE;
974 crm_info("We do not have ticket '%s'", ticket->id);
975 }
976
977 last_granted = g_hash_table_lookup(ticket->state, "last-granted");
978 if (last_granted) {
979 long long last_granted_ll;
980
981 pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
982 ticket->last_granted = (time_t) last_granted_ll;
983 }
984
985 standby = g_hash_table_lookup(ticket->state, "standby");
986 if (standby && crm_is_true(standby)) {
987 ticket->standby = TRUE;
988 if (ticket->granted) {
989 crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
990 }
991 } else {
992 ticket->standby = FALSE;
993 }
994
995 crm_trace("Done with ticket state for %s", ticket_id);
996
997 return TRUE;
998 }
999
1000 static gboolean
1001 unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler)
1002 {
1003 xmlNode *xml_obj = NULL;
1004
1005 for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
1006 xml_obj = pcmk__xe_next(xml_obj)) {
1007
1008 if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
1009 continue;
1010 }
1011 unpack_ticket_state(xml_obj, scheduler);
1012 }
1013
1014 return TRUE;
1015 }
1016
1017 static void
1018 unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state,
1019 pcmk_scheduler_t *scheduler)
1020 {
1021 const char *resource_discovery_enabled = NULL;
1022 const xmlNode *attrs = NULL;
1023 pcmk_resource_t *rsc = NULL;
1024
1025 if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1026 return;
1027 }
1028
1029 if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
1030 return;
1031 }
1032 crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
1033
1034 pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE),
1035 &(this_node->details->remote_maintenance), 0);
1036
1037 rsc = this_node->details->remote_rsc;
1038 if (this_node->details->remote_requires_reset == FALSE) {
1039 this_node->details->unclean = FALSE;
1040 this_node->details->unseen = FALSE;
1041 }
1042 attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1043 add_node_attrs(attrs, this_node, TRUE, scheduler);
1044
1045 if (pe__shutdown_requested(this_node)) {
1046 crm_info("%s is shutting down", pe__node_name(this_node));
1047 this_node->details->shutdown = TRUE;
1048 }
1049
1050 if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1051 crm_info("%s is in standby mode", pe__node_name(this_node));
1052 this_node->details->standby = TRUE;
1053 }
1054
1055 if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
1056 ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) {
1057 crm_info("%s is in maintenance mode", pe__node_name(this_node));
1058 this_node->details->maintenance = TRUE;
1059 }
1060
1061 resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1062 if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1063 if (pe__is_remote_node(this_node)
1064 && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1065 crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
1066 " attribute on Pacemaker Remote node %s"
1067 " because fencing is disabled",
1068 pe__node_name(this_node));
1069 } else {
1070
1071
1072
1073
1074
1075 crm_info("%s has resource discovery disabled",
1076 pe__node_name(this_node));
1077 this_node->details->rsc_discovery_enabled = FALSE;
1078 }
1079 }
1080 }
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090 static void
1091 unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node,
1092 pcmk_scheduler_t *scheduler)
1093 {
1094 const char *discovery = NULL;
1095 const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS,
1096 FALSE);
1097
1098 add_node_attrs(attrs, node, TRUE, scheduler);
1099
1100 if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
1101 crm_info("%s is in standby mode", pe__node_name(node));
1102 node->details->standby = TRUE;
1103 }
1104
1105 if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
1106 crm_info("%s is in maintenance mode", pe__node_name(node));
1107 node->details->maintenance = TRUE;
1108 }
1109
1110 discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY);
1111 if ((discovery != NULL) && !crm_is_true(discovery)) {
1112 crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
1113 " attribute for %s because disabling resource discovery "
1114 "is not allowed for cluster nodes", pe__node_name(node));
1115 }
1116 }
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130 static void
1131 unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler)
1132 {
1133 const char *id = NULL;
1134 const char *uname = NULL;
1135 pcmk_node_t *this_node = NULL;
1136
1137 id = crm_element_value(state, XML_ATTR_ID);
1138 if (id == NULL) {
1139 crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
1140 XML_ATTR_ID);
1141 return;
1142 }
1143
1144 uname = crm_element_value(state, XML_ATTR_UNAME);
1145 if (uname == NULL) {
1146
1147
1148
1149
1150
1151
1152 crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
1153 XML_ATTR_UNAME, id);
1154 }
1155
1156 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1157 if (this_node == NULL) {
1158 pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) "
1159 "because it is no longer in the configuration",
1160 id, pcmk__s(uname, "uname unknown"));
1161 return;
1162 }
1163
1164 if (pe__is_guest_or_remote_node(this_node)) {
1165
1166
1167
1168
1169
1170 pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED),
1171 &(this_node->details->remote_was_fenced), 0);
1172 return;
1173 }
1174
1175 unpack_transient_attributes(state, this_node, scheduler);
1176
1177
1178
1179
1180 this_node->details->unclean = FALSE;
1181 this_node->details->unseen = FALSE;
1182
1183 crm_trace("Determining online status of cluster node %s (id %s)",
1184 pe__node_name(this_node), id);
1185 determine_online_status(state, this_node, scheduler);
1186
1187 if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
1188 && this_node->details->online
1189 && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) {
1190
1191
1192
1193
1194 pe_fence_node(scheduler, this_node, "cluster does not have quorum",
1195 FALSE);
1196 }
1197 }
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216 static int
1217 unpack_node_history(const xmlNode *status, bool fence,
1218 pcmk_scheduler_t *scheduler)
1219 {
1220 int rc = pcmk_rc_ok;
1221
1222
1223 for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
1224 state != NULL; state = crm_next_same_xml(state)) {
1225
1226 const char *id = ID(state);
1227 const char *uname = crm_element_value(state, XML_ATTR_UNAME);
1228 pcmk_node_t *this_node = NULL;
1229
1230 if ((id == NULL) || (uname == NULL)) {
1231
1232 crm_trace("Not unpacking resource history from malformed "
1233 XML_CIB_TAG_STATE " without id and/or uname");
1234 continue;
1235 }
1236
1237 this_node = pe_find_node_any(scheduler->nodes, id, uname);
1238 if (this_node == NULL) {
1239
1240 crm_trace("Not unpacking resource history for node %s because "
1241 "no longer in configuration", id);
1242 continue;
1243 }
1244
1245 if (this_node->details->unpacked) {
1246 crm_trace("Not unpacking resource history for node %s because "
1247 "already unpacked", id);
1248 continue;
1249 }
1250
1251 if (fence) {
1252
1253
1254 } else if (pe__is_guest_node(this_node)) {
1255
1256
1257
1258
1259 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1260
1261 if ((rsc == NULL) || (rsc->role != pcmk_role_started)
1262 || (rsc->container->role != pcmk_role_started)) {
1263 crm_trace("Not unpacking resource history for guest node %s "
1264 "because container and connection are not known to "
1265 "be up", id);
1266 continue;
1267 }
1268
1269 } else if (pe__is_remote_node(this_node)) {
1270
1271
1272
1273
1274
1275 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1276
1277 if ((rsc == NULL)
1278 || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)
1279 && (rsc->role != pcmk_role_started))) {
1280 crm_trace("Not unpacking resource history for remote node %s "
1281 "because connection is not known to be up", id);
1282 continue;
1283 }
1284
1285
1286
1287
1288
1289
1290 } else if (!pcmk_any_flags_set(scheduler->flags,
1291 pcmk_sched_fencing_enabled
1292 |pcmk_sched_shutdown_lock)
1293 && !this_node->details->online) {
1294 crm_trace("Not unpacking resource history for offline "
1295 "cluster node %s", id);
1296 continue;
1297 }
1298
1299 if (pe__is_guest_or_remote_node(this_node)) {
1300 determine_remote_online_status(scheduler, this_node);
1301 unpack_handle_remote_attrs(this_node, state, scheduler);
1302 }
1303
1304 crm_trace("Unpacking resource history for %snode %s",
1305 (fence? "unseen " : ""), id);
1306
1307 this_node->details->unpacked = TRUE;
1308 unpack_node_lrm(this_node, state, scheduler);
1309
1310 rc = EAGAIN;
1311 }
1312 return rc;
1313 }
1314
1315
1316
1317
1318 gboolean
1319 unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler)
1320 {
1321 xmlNode *state = NULL;
1322
1323 crm_trace("Beginning unpack");
1324
1325 if (scheduler->tickets == NULL) {
1326 scheduler->tickets = pcmk__strkey_table(free, destroy_ticket);
1327 }
1328
1329 for (state = pcmk__xe_first_child(status); state != NULL;
1330 state = pcmk__xe_next(state)) {
1331
1332 if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
1333 unpack_tickets_state((xmlNode *) state, scheduler);
1334
1335 } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
1336 unpack_node_state(state, scheduler);
1337 }
1338 }
1339
1340 while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) {
1341 crm_trace("Another pass through node resource histories is needed");
1342 }
1343
1344
1345 unpack_node_history(status,
1346 pcmk_is_set(scheduler->flags,
1347 pcmk_sched_fencing_enabled),
1348 scheduler);
1349
1350
1351
1352
1353 if (scheduler->stop_needed != NULL) {
1354 for (GList *item = scheduler->stop_needed; item; item = item->next) {
1355 pcmk_resource_t *container = item->data;
1356 pcmk_node_t *node = pe__current_node(container);
1357
1358 if (node) {
1359 stop_action(container, node, FALSE);
1360 }
1361 }
1362 g_list_free(scheduler->stop_needed);
1363 scheduler->stop_needed = NULL;
1364 }
1365
1366
1367
1368
1369
1370 for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) {
1371 pcmk_node_t *this_node = gIter->data;
1372
1373 if (!pe__is_guest_or_remote_node(this_node)) {
1374 continue;
1375 }
1376 if (this_node->details->shutdown
1377 && (this_node->details->remote_rsc != NULL)) {
1378 pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped,
1379 "remote shutdown");
1380 }
1381 if (!this_node->details->unpacked) {
1382 determine_remote_online_status(scheduler, this_node);
1383 }
1384 }
1385
1386 return TRUE;
1387 }
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 static long long
1401 unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler)
1402 {
1403 const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM);
1404 int member = 0;
1405
1406 if (member_time == NULL) {
1407 return -1LL;
1408
1409 } else if (crm_str_to_boolean(member_time, &member) == 1) {
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419 return member? (long long) get_effective_time(scheduler) : 0LL;
1420
1421 } else {
1422 long long when_member = 0LL;
1423
1424 if ((pcmk__scan_ll(member_time, &when_member,
1425 0LL) != pcmk_rc_ok) || (when_member < 0LL)) {
1426 crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM
1427 " in " XML_CIB_TAG_STATE " entry", member_time);
1428 return -1LL;
1429 }
1430 return when_member;
1431 }
1432 }
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443 static long long
1444 unpack_node_online(const xmlNode *node_state)
1445 {
1446 const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD);
1447
1448
1449 if (pcmk__str_eq(peer_time, OFFLINESTATUS,
1450 pcmk__str_casei|pcmk__str_null_matches)) {
1451 return 0LL;
1452
1453 } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) {
1454 return 1LL;
1455
1456 } else {
1457 long long when_online = 0LL;
1458
1459 if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok)
1460 || (when_online < 0)) {
1461 crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in "
1462 XML_CIB_TAG_STATE " entry, assuming offline", peer_time);
1463 return 0LL;
1464 }
1465 return when_online;
1466 }
1467 }
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478 static bool
1479 unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state)
1480 {
1481 long long value = 0LL;
1482 int value_i = 0;
1483 const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE);
1484
1485
1486 if (crm_str_to_boolean(value_s, &value_i) == 1) {
1487 return (value_i != 0);
1488 }
1489 if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) {
1490 return (value > 0);
1491 }
1492 crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE
1493 "node attribute for %s", value_s, pe__node_name(node));
1494 return false;
1495 }
1496
1497 static gboolean
1498 determine_online_status_no_fencing(pcmk_scheduler_t *scheduler,
1499 const xmlNode *node_state,
1500 pcmk_node_t *this_node)
1501 {
1502 gboolean online = FALSE;
1503 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1504 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1505 long long when_member = unpack_node_member(node_state, scheduler);
1506 long long when_online = unpack_node_online(node_state);
1507
1508 if (when_member <= 0) {
1509 crm_trace("Node %s is %sdown", pe__node_name(this_node),
1510 ((when_member < 0)? "presumed " : ""));
1511
1512 } else if (when_online > 0) {
1513 if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1514 online = TRUE;
1515 } else {
1516 crm_debug("Node %s is not ready to run resources: %s",
1517 pe__node_name(this_node), join);
1518 }
1519
1520 } else if (this_node->details->expected_up == FALSE) {
1521 crm_trace("Node %s controller is down: "
1522 "member@%lld online@%lld join=%s expected=%s",
1523 pe__node_name(this_node), when_member, when_online,
1524 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1525
1526 } else {
1527
1528 pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE);
1529 crm_info("Node %s member@%lld online@%lld join=%s expected=%s",
1530 pe__node_name(this_node), when_member, when_online,
1531 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
1532 }
1533 return online;
1534 }
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549 static inline bool
1550 pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1551 long long when_member, long long when_online)
1552 {
1553 if ((scheduler->node_pending_timeout > 0)
1554 && (when_member > 0) && (when_online <= 0)) {
1555
1556
1557 time_t timeout = when_member + scheduler->node_pending_timeout;
1558
1559 if (get_effective_time(node->details->data_set) >= timeout) {
1560 return true;
1561 }
1562
1563
1564 pe__update_recheck_time(timeout, scheduler, "pending node timeout");
1565 }
1566 return false;
1567 }
1568
1569 static bool
1570 determine_online_status_fencing(pcmk_scheduler_t *scheduler,
1571 const xmlNode *node_state,
1572 pcmk_node_t *this_node)
1573 {
1574 bool termination_requested = unpack_node_terminate(this_node, node_state);
1575 const char *join = crm_element_value(node_state, PCMK__XA_JOIN);
1576 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1577 long long when_member = unpack_node_member(node_state, scheduler);
1578 long long when_online = unpack_node_online(node_state);
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s",
1599 pe__node_name(this_node), when_member, when_online,
1600 pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"),
1601 (termination_requested? " (termination requested)" : ""));
1602
1603 if (this_node->details->shutdown) {
1604 crm_debug("%s is shutting down", pe__node_name(this_node));
1605
1606
1607 return (when_online > 0);
1608 }
1609
1610 if (when_member < 0) {
1611 pe_fence_node(scheduler, this_node,
1612 "peer has not been seen by the cluster", FALSE);
1613 return false;
1614 }
1615
1616 if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) {
1617 pe_fence_node(scheduler, this_node,
1618 "peer failed Pacemaker membership criteria", FALSE);
1619
1620 } else if (termination_requested) {
1621 if ((when_member <= 0) && (when_online <= 0)
1622 && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) {
1623 crm_info("%s was fenced as requested", pe__node_name(this_node));
1624 return false;
1625 }
1626 pe_fence_node(scheduler, this_node, "fencing was requested", false);
1627
1628 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN,
1629 pcmk__str_null_matches)) {
1630
1631 if (pending_too_long(scheduler, this_node, when_member, when_online)) {
1632 pe_fence_node(scheduler, this_node,
1633 "peer pending timed out on joining the process group",
1634 FALSE);
1635
1636 } else if ((when_member > 0) || (when_online > 0)) {
1637 crm_info("- %s is not ready to run resources",
1638 pe__node_name(this_node));
1639 this_node->details->standby = TRUE;
1640 this_node->details->pending = TRUE;
1641
1642 } else {
1643 crm_trace("%s is down or still coming up",
1644 pe__node_name(this_node));
1645 }
1646
1647 } else if (when_member <= 0) {
1648
1649 pe_fence_node(scheduler, this_node,
1650 "peer is no longer part of the cluster", TRUE);
1651
1652 } else if (when_online <= 0) {
1653 pe_fence_node(scheduler, this_node,
1654 "peer process is no longer available", FALSE);
1655
1656
1657
1658 } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) {
1659 crm_info("%s is active", pe__node_name(this_node));
1660
1661 } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING,
1662 CRMD_JOINSTATE_DOWN, NULL)) {
1663 crm_info("%s is not ready to run resources", pe__node_name(this_node));
1664 this_node->details->standby = TRUE;
1665 this_node->details->pending = TRUE;
1666
1667 } else {
1668 pe_fence_node(scheduler, this_node, "peer was in an unknown state",
1669 FALSE);
1670 }
1671
1672 return (when_member > 0);
1673 }
1674
1675 static void
1676 determine_remote_online_status(pcmk_scheduler_t *scheduler,
1677 pcmk_node_t *this_node)
1678 {
1679 pcmk_resource_t *rsc = this_node->details->remote_rsc;
1680 pcmk_resource_t *container = NULL;
1681 pcmk_node_t *host = NULL;
1682
1683
1684
1685
1686
1687 if (rsc == NULL) {
1688 this_node->details->online = FALSE;
1689 goto remote_online_done;
1690 }
1691
1692 container = rsc->container;
1693
1694 if (container && pcmk__list_of_1(rsc->running_on)) {
1695 host = rsc->running_on->data;
1696 }
1697
1698
1699 if (rsc->role == pcmk_role_started) {
1700 crm_trace("%s node %s presumed ONLINE because connection resource is started",
1701 (container? "Guest" : "Remote"), this_node->details->id);
1702 this_node->details->online = TRUE;
1703 }
1704
1705
1706 if ((rsc->role == pcmk_role_started)
1707 && (rsc->next_role == pcmk_role_stopped)) {
1708
1709 crm_trace("%s node %s shutting down because connection resource is stopping",
1710 (container? "Guest" : "Remote"), this_node->details->id);
1711 this_node->details->shutdown = TRUE;
1712 }
1713
1714
1715 if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) {
1716 crm_trace("Guest node %s UNCLEAN because guest resource failed",
1717 this_node->details->id);
1718 this_node->details->online = FALSE;
1719 this_node->details->remote_requires_reset = TRUE;
1720
1721 } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
1722 crm_trace("%s node %s OFFLINE because connection resource failed",
1723 (container? "Guest" : "Remote"), this_node->details->id);
1724 this_node->details->online = FALSE;
1725
1726 } else if ((rsc->role == pcmk_role_stopped)
1727 || ((container != NULL)
1728 && (container->role == pcmk_role_stopped))) {
1729
1730 crm_trace("%s node %s OFFLINE because its resource is stopped",
1731 (container? "Guest" : "Remote"), this_node->details->id);
1732 this_node->details->online = FALSE;
1733 this_node->details->remote_requires_reset = FALSE;
1734
1735 } else if (host && (host->details->online == FALSE)
1736 && host->details->unclean) {
1737 crm_trace("Guest node %s UNCLEAN because host is unclean",
1738 this_node->details->id);
1739 this_node->details->online = FALSE;
1740 this_node->details->remote_requires_reset = TRUE;
1741 }
1742
1743 remote_online_done:
1744 crm_trace("Remote node %s online=%s",
1745 this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1746 }
1747
1748 static void
1749 determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node,
1750 pcmk_scheduler_t *scheduler)
1751 {
1752 gboolean online = FALSE;
1753 const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED);
1754
1755 CRM_CHECK(this_node != NULL, return);
1756
1757 this_node->details->shutdown = FALSE;
1758 this_node->details->expected_up = FALSE;
1759
1760 if (pe__shutdown_requested(this_node)) {
1761 this_node->details->shutdown = TRUE;
1762
1763 } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
1764 this_node->details->expected_up = TRUE;
1765 }
1766
1767 if (this_node->details->type == node_ping) {
1768 this_node->details->unclean = FALSE;
1769 online = FALSE;
1770
1771
1772
1773
1774 } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
1775 online = determine_online_status_no_fencing(scheduler, node_state,
1776 this_node);
1777
1778 } else {
1779 online = determine_online_status_fencing(scheduler, node_state,
1780 this_node);
1781 }
1782
1783 if (online) {
1784 this_node->details->online = TRUE;
1785
1786 } else {
1787
1788 this_node->fixed = TRUE;
1789 this_node->weight = -INFINITY;
1790 }
1791
1792 if (online && this_node->details->shutdown) {
1793
1794 this_node->fixed = TRUE;
1795 this_node->weight = -INFINITY;
1796 }
1797
1798 if (this_node->details->type == node_ping) {
1799 crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
1800
1801 } else if (this_node->details->unclean) {
1802 pe_proc_warn("%s is unclean", pe__node_name(this_node));
1803
1804 } else if (this_node->details->online) {
1805 crm_info("%s is %s", pe__node_name(this_node),
1806 this_node->details->shutdown ? "shutting down" :
1807 this_node->details->pending ? "pending" :
1808 this_node->details->standby ? "standby" :
1809 this_node->details->maintenance ? "maintenance" : "online");
1810
1811 } else {
1812 crm_trace("%s is offline", pe__node_name(this_node));
1813 }
1814 }
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824 const char *
1825 pe_base_name_end(const char *id)
1826 {
1827 if (!pcmk__str_empty(id)) {
1828 const char *end = id + strlen(id) - 1;
1829
1830 for (const char *s = end; s > id; --s) {
1831 switch (*s) {
1832 case '0':
1833 case '1':
1834 case '2':
1835 case '3':
1836 case '4':
1837 case '5':
1838 case '6':
1839 case '7':
1840 case '8':
1841 case '9':
1842 break;
1843 case ':':
1844 return (s == end)? s : (s - 1);
1845 default:
1846 return end;
1847 }
1848 }
1849 return end;
1850 }
1851 return NULL;
1852 }
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864 char *
1865 clone_strip(const char *last_rsc_id)
1866 {
1867 const char *end = pe_base_name_end(last_rsc_id);
1868 char *basename = NULL;
1869
1870 CRM_ASSERT(end);
1871 basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1872 CRM_ASSERT(basename);
1873 return basename;
1874 }
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886 char *
1887 clone_zero(const char *last_rsc_id)
1888 {
1889 const char *end = pe_base_name_end(last_rsc_id);
1890 size_t base_name_len = end - last_rsc_id + 1;
1891 char *zero = NULL;
1892
1893 CRM_ASSERT(end);
1894 zero = calloc(base_name_len + 3, sizeof(char));
1895 CRM_ASSERT(zero);
1896 memcpy(zero, last_rsc_id, base_name_len);
1897 zero[base_name_len] = ':';
1898 zero[base_name_len + 1] = '0';
1899 return zero;
1900 }
1901
1902 static pcmk_resource_t *
1903 create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
1904 pcmk_scheduler_t *scheduler)
1905 {
1906 pcmk_resource_t *rsc = NULL;
1907 xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1908
1909 copy_in_properties(xml_rsc, rsc_entry);
1910 crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1911 crm_log_xml_debug(xml_rsc, "Orphan resource");
1912
1913 if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) {
1914 return NULL;
1915 }
1916
1917 if (xml_contains_remote_node(xml_rsc)) {
1918 pcmk_node_t *node;
1919
1920 crm_debug("Detected orphaned remote node %s", rsc_id);
1921 node = pe_find_node(scheduler->nodes, rsc_id);
1922 if (node == NULL) {
1923 node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler);
1924 }
1925 link_rsc2remotenode(scheduler, rsc);
1926
1927 if (node) {
1928 crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1929 node->details->shutdown = TRUE;
1930 }
1931 }
1932
1933 if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1934
1935 crm_trace("Detected orphaned container filler %s", rsc_id);
1936 pe__set_resource_flags(rsc, pcmk_rsc_removed_filler);
1937 }
1938 pe__set_resource_flags(rsc, pcmk_rsc_removed);
1939 scheduler->resources = g_list_append(scheduler->resources, rsc);
1940 return rsc;
1941 }
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954 static pcmk_resource_t *
1955 create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id,
1956 const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
1957 {
1958 pcmk_resource_t *top = pe__create_clone_child(parent, scheduler);
1959
1960
1961 pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL,
1962 pcmk_rsc_match_clone_only);
1963
1964 pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1965 top->id, parent->id, rsc_id, pe__node_name(node));
1966 return orphan;
1967 }
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983 static pcmk_resource_t *
1984 find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
1985 pcmk_resource_t *parent, const char *rsc_id)
1986 {
1987 GList *rIter = NULL;
1988 pcmk_resource_t *rsc = NULL;
1989 pcmk_resource_t *inactive_instance = NULL;
1990 gboolean skip_inactive = FALSE;
1991
1992 CRM_ASSERT(parent != NULL);
1993 CRM_ASSERT(pe_rsc_is_clone(parent));
1994 CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique));
1995
1996
1997 pe_rsc_trace(parent, "Looking for %s on %s in %s",
1998 rsc_id, pe__node_name(node), parent->id);
1999 for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
2000 GList *locations = NULL;
2001 pcmk_resource_t *child = rIter->data;
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018 child->fns->location(child, &locations, 2);
2019 if (locations) {
2020
2021
2022
2023
2024 CRM_LOG_ASSERT(locations->next == NULL);
2025
2026 if (((pcmk_node_t *) locations->data)->details == node->details) {
2027
2028
2029
2030
2031
2032
2033
2034 rsc = parent->fns->find_rsc(child, rsc_id, NULL,
2035 pcmk_rsc_match_clone_only);
2036 if (rsc) {
2037
2038
2039
2040
2041
2042
2043
2044 if (rsc->running_on) {
2045 crm_notice("Active (now-)anonymous clone %s has "
2046 "multiple (orphan) instance histories on %s",
2047 parent->id, pe__node_name(node));
2048 skip_inactive = TRUE;
2049 rsc = NULL;
2050 } else {
2051 pe_rsc_trace(parent, "Resource %s, active", rsc->id);
2052 }
2053 }
2054 }
2055 g_list_free(locations);
2056
2057 } else {
2058 pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
2059 if (!skip_inactive && !inactive_instance
2060 && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) {
2061
2062 inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
2063 pcmk_rsc_match_clone_only);
2064
2065
2066
2067
2068 if (inactive_instance && inactive_instance->pending_node
2069 && (inactive_instance->pending_node->details != node->details)) {
2070 inactive_instance = NULL;
2071 }
2072 }
2073 }
2074 }
2075
2076 if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
2077 pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
2078 rsc = inactive_instance;
2079 }
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093 if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing)
2094 && (!node->details->online || node->details->unclean)
2095 && !pe__is_guest_node(node)
2096 && !pe__is_universal_clone(parent, scheduler)) {
2097
2098 rsc = NULL;
2099 }
2100
2101 if (rsc == NULL) {
2102 rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler);
2103 pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
2104 }
2105 return rsc;
2106 }
2107
2108 static pcmk_resource_t *
2109 unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node,
2110 const char *rsc_id)
2111 {
2112 pcmk_resource_t *rsc = NULL;
2113 pcmk_resource_t *parent = NULL;
2114
2115 crm_trace("looking for %s", rsc_id);
2116 rsc = pe_find_resource(scheduler->resources, rsc_id);
2117
2118 if (rsc == NULL) {
2119
2120
2121
2122
2123 char *clone0_id = clone_zero(rsc_id);
2124 pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources,
2125 clone0_id);
2126
2127 if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) {
2128 rsc = clone0;
2129 parent = uber_parent(clone0);
2130 crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
2131 } else {
2132 crm_trace("%s is not known as %s either (orphan)",
2133 rsc_id, clone0_id);
2134 }
2135 free(clone0_id);
2136
2137 } else if (rsc->variant > pcmk_rsc_variant_primitive) {
2138 crm_trace("Resource history for %s is orphaned because it is no longer primitive",
2139 rsc_id);
2140 return NULL;
2141
2142 } else {
2143 parent = uber_parent(rsc);
2144 }
2145
2146 if (pe_rsc_is_anon_clone(parent)) {
2147
2148 if (pe_rsc_is_bundled(parent)) {
2149 rsc = pe__find_bundle_replica(parent->parent, node);
2150 } else {
2151 char *base = clone_strip(rsc_id);
2152
2153 rsc = find_anonymous_clone(scheduler, node, parent, base);
2154 free(base);
2155 CRM_ASSERT(rsc != NULL);
2156 }
2157 }
2158
2159 if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
2160 && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
2161
2162 pcmk__str_update(&rsc->clone_name, rsc_id);
2163 pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
2164 rsc_id, pe__node_name(node), rsc->id,
2165 (pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : ""));
2166 }
2167 return rsc;
2168 }
2169
2170 static pcmk_resource_t *
2171 process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node,
2172 pcmk_scheduler_t *scheduler)
2173 {
2174 pcmk_resource_t *rsc = NULL;
2175 const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2176
2177 crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
2178 rsc = create_fake_resource(rsc_id, rsc_entry, scheduler);
2179 if (rsc == NULL) {
2180 return NULL;
2181 }
2182
2183 if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) {
2184 pe__clear_resource_flags(rsc, pcmk_rsc_managed);
2185
2186 } else {
2187 CRM_CHECK(rsc != NULL, return NULL);
2188 pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
2189 resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__",
2190 scheduler);
2191 }
2192 return rsc;
2193 }
2194
2195 static void
2196 process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node,
2197 enum action_fail_response on_fail)
2198 {
2199 pcmk_node_t *tmpnode = NULL;
2200 char *reason = NULL;
2201 enum action_fail_response save_on_fail = pcmk_on_fail_ignore;
2202
2203 CRM_ASSERT(rsc);
2204 pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
2205 rsc->id, role2text(rsc->role), pe__node_name(node),
2206 fail2text(on_fail));
2207
2208
2209 if (rsc->role != pcmk_role_unknown) {
2210 pcmk_resource_t *iter = rsc;
2211
2212 while (iter) {
2213 if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
2214 pcmk_node_t *n = pe__copy_node(node);
2215
2216 pe_rsc_trace(rsc, "%s%s%s known on %s",
2217 rsc->id,
2218 ((rsc->clone_name == NULL)? "" : " also known as "),
2219 ((rsc->clone_name == NULL)? "" : rsc->clone_name),
2220 pe__node_name(n));
2221 g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
2222 }
2223 if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) {
2224 break;
2225 }
2226 iter = iter->parent;
2227 }
2228 }
2229
2230
2231 if ((rsc->role > pcmk_role_stopped)
2232 && node->details->online == FALSE
2233 && node->details->maintenance == FALSE
2234 && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2235
2236 gboolean should_fence = FALSE;
2237
2238
2239
2240
2241
2242
2243
2244
2245 if (pe__is_guest_node(node)) {
2246 pe__set_resource_flags(rsc,
2247 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2248 should_fence = TRUE;
2249
2250 } else if (pcmk_is_set(rsc->cluster->flags,
2251 pcmk_sched_fencing_enabled)) {
2252 if (pe__is_remote_node(node) && node->details->remote_rsc
2253 && !pcmk_is_set(node->details->remote_rsc->flags,
2254 pcmk_rsc_failed)) {
2255
2256
2257
2258
2259
2260
2261
2262 node->details->unseen = TRUE;
2263 reason = crm_strdup_printf("%s is active there (fencing will be"
2264 " revoked if remote connection can "
2265 "be re-established elsewhere)",
2266 rsc->id);
2267 }
2268 should_fence = TRUE;
2269 }
2270
2271 if (should_fence) {
2272 if (reason == NULL) {
2273 reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2274 }
2275 pe_fence_node(rsc->cluster, node, reason, FALSE);
2276 }
2277 free(reason);
2278 }
2279
2280
2281 save_on_fail = on_fail;
2282
2283 if (node->details->unclean) {
2284
2285
2286
2287 on_fail = pcmk_on_fail_ignore;
2288 }
2289
2290 switch (on_fail) {
2291 case pcmk_on_fail_ignore:
2292
2293 break;
2294
2295 case pcmk_on_fail_demote:
2296 pe__set_resource_flags(rsc, pcmk_rsc_failed);
2297 demote_action(rsc, node, FALSE);
2298 break;
2299
2300 case pcmk_on_fail_fence_node:
2301
2302
2303
2304 reason = crm_strdup_printf("%s failed there", rsc->id);
2305 pe_fence_node(rsc->cluster, node, reason, FALSE);
2306 free(reason);
2307 break;
2308
2309 case pcmk_on_fail_standby_node:
2310 node->details->standby = TRUE;
2311 node->details->standby_onfail = TRUE;
2312 break;
2313
2314 case pcmk_on_fail_block:
2315
2316
2317
2318 pe__clear_resource_flags(rsc, pcmk_rsc_managed);
2319 pe__set_resource_flags(rsc, pcmk_rsc_blocked);
2320 break;
2321
2322 case pcmk_on_fail_ban:
2323
2324
2325
2326 resource_location(rsc, node, -INFINITY, "__action_migration_auto__",
2327 rsc->cluster);
2328 break;
2329
2330 case pcmk_on_fail_stop:
2331 pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop");
2332 break;
2333
2334 case pcmk_on_fail_restart:
2335 if ((rsc->role != pcmk_role_stopped)
2336 && (rsc->role != pcmk_role_unknown)) {
2337 pe__set_resource_flags(rsc,
2338 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2339 stop_action(rsc, node, FALSE);
2340 }
2341 break;
2342
2343 case pcmk_on_fail_restart_container:
2344 pe__set_resource_flags(rsc,
2345 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2346 if (rsc->container && pe_rsc_is_bundled(rsc)) {
2347
2348
2349
2350
2351
2352 rsc->cluster->stop_needed =
2353 g_list_prepend(rsc->cluster->stop_needed, rsc->container);
2354 } else if (rsc->container) {
2355 stop_action(rsc->container, node, FALSE);
2356 } else if ((rsc->role != pcmk_role_stopped)
2357 && (rsc->role != pcmk_role_unknown)) {
2358 stop_action(rsc, node, FALSE);
2359 }
2360 break;
2361
2362 case pcmk_on_fail_reset_remote:
2363 pe__set_resource_flags(rsc,
2364 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2365 if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) {
2366 tmpnode = NULL;
2367 if (rsc->is_remote_node) {
2368 tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2369 }
2370 if (tmpnode &&
2371 pe__is_remote_node(tmpnode) &&
2372 tmpnode->details->remote_was_fenced == 0) {
2373
2374
2375
2376
2377 pe_fence_node(rsc->cluster, tmpnode,
2378 "remote connection is unrecoverable", FALSE);
2379 }
2380 }
2381
2382
2383 if (rsc->role > pcmk_role_stopped) {
2384 stop_action(rsc, node, FALSE);
2385 }
2386
2387
2388
2389 if (rsc->remote_reconnect_ms) {
2390 pe__set_next_role(rsc, pcmk_role_stopped, "remote reset");
2391 }
2392 break;
2393 }
2394
2395
2396
2397
2398
2399 if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) {
2400 tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
2401 if (tmpnode && tmpnode->details->unclean) {
2402 tmpnode->details->unseen = FALSE;
2403 }
2404 }
2405
2406 if ((rsc->role != pcmk_role_stopped)
2407 && (rsc->role != pcmk_role_unknown)) {
2408 if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) {
2409 if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
2410 pcmk__config_warn("Detected active orphan %s running on %s",
2411 rsc->id, pe__node_name(node));
2412 } else {
2413 pcmk__config_warn("Resource '%s' must be stopped manually on "
2414 "%s because cluster is configured not to "
2415 "stop active orphans",
2416 rsc->id, pe__node_name(node));
2417 }
2418 }
2419
2420 native_add_running(rsc, node, rsc->cluster,
2421 (save_on_fail != pcmk_on_fail_ignore));
2422 switch (on_fail) {
2423 case pcmk_on_fail_ignore:
2424 break;
2425 case pcmk_on_fail_demote:
2426 case pcmk_on_fail_block:
2427 pe__set_resource_flags(rsc, pcmk_rsc_failed);
2428 break;
2429 default:
2430 pe__set_resource_flags(rsc,
2431 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
2432 break;
2433 }
2434
2435 } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2436
2437
2438
2439 pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2440 free(rsc->clone_name);
2441 rsc->clone_name = NULL;
2442
2443 } else {
2444 GList *possible_matches = pe__resource_actions(rsc, node,
2445 PCMK_ACTION_STOP, FALSE);
2446 GList *gIter = possible_matches;
2447
2448 for (; gIter != NULL; gIter = gIter->next) {
2449 pcmk_action_t *stop = (pcmk_action_t *) gIter->data;
2450
2451 pe__set_action_flags(stop, pcmk_action_optional);
2452 }
2453
2454 g_list_free(possible_matches);
2455 }
2456
2457
2458
2459
2460 if ((rsc->role == pcmk_role_stopped)
2461 && rsc->partial_migration_source
2462 && rsc->partial_migration_source->details == node->details
2463 && rsc->partial_migration_target
2464 && rsc->running_on) {
2465
2466 rsc->role = pcmk_role_started;
2467 }
2468 }
2469
2470
2471 static void
2472 process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc,
2473 int start_index, int stop_index,
2474 GList *sorted_op_list, pcmk_scheduler_t *scheduler)
2475 {
2476 int counter = -1;
2477 const char *task = NULL;
2478 const char *status = NULL;
2479 GList *gIter = sorted_op_list;
2480
2481 CRM_ASSERT(rsc);
2482 pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2483
2484 for (; gIter != NULL; gIter = gIter->next) {
2485 xmlNode *rsc_op = (xmlNode *) gIter->data;
2486
2487 guint interval_ms = 0;
2488 char *key = NULL;
2489 const char *id = ID(rsc_op);
2490
2491 counter++;
2492
2493 if (node->details->online == FALSE) {
2494 pe_rsc_trace(rsc, "Skipping %s on %s: node is offline",
2495 rsc->id, pe__node_name(node));
2496 break;
2497
2498
2499 } else if (start_index < stop_index && counter <= stop_index) {
2500 pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active",
2501 id, pe__node_name(node));
2502 continue;
2503
2504 } else if (counter < start_index) {
2505 pe_rsc_trace(rsc, "Skipping %s on %s: old %d",
2506 id, pe__node_name(node), counter);
2507 continue;
2508 }
2509
2510 crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2511 if (interval_ms == 0) {
2512 pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring",
2513 id, pe__node_name(node));
2514 continue;
2515 }
2516
2517 status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2518 if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
2519 pe_rsc_trace(rsc, "Skipping %s on %s: status",
2520 id, pe__node_name(node));
2521 continue;
2522 }
2523 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2524
2525 key = pcmk__op_key(rsc->id, task, interval_ms);
2526 pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
2527 custom_action(rsc, key, task, node, TRUE, scheduler);
2528 }
2529 }
2530
2531 void
2532 calculate_active_ops(const GList *sorted_op_list, int *start_index,
2533 int *stop_index)
2534 {
2535 int counter = -1;
2536 int implied_monitor_start = -1;
2537 int implied_clone_start = -1;
2538 const char *task = NULL;
2539 const char *status = NULL;
2540
2541 *stop_index = -1;
2542 *start_index = -1;
2543
2544 for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
2545 const xmlNode *rsc_op = (const xmlNode *) iter->data;
2546
2547 counter++;
2548
2549 task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2550 status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2551
2552 if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
2553 && pcmk__str_eq(status, "0", pcmk__str_casei)) {
2554 *stop_index = counter;
2555
2556 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
2557 PCMK_ACTION_MIGRATE_FROM, NULL)) {
2558 *start_index = counter;
2559
2560 } else if ((implied_monitor_start <= *stop_index)
2561 && pcmk__str_eq(task, PCMK_ACTION_MONITOR,
2562 pcmk__str_casei)) {
2563 const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2564
2565 if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
2566 implied_monitor_start = counter;
2567 }
2568 } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
2569 PCMK_ACTION_DEMOTE, NULL)) {
2570 implied_clone_start = counter;
2571 }
2572 }
2573
2574 if (*start_index == -1) {
2575 if (implied_clone_start != -1) {
2576 *start_index = implied_clone_start;
2577 } else if (implied_monitor_start != -1) {
2578 *start_index = implied_monitor_start;
2579 }
2580 }
2581 }
2582
2583
2584 static void
2585 unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc,
2586 const pcmk_node_t *node, pcmk_scheduler_t *scheduler)
2587 {
2588 time_t lock_time = 0;
2589
2590 if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
2591 &lock_time) == pcmk_ok) && (lock_time != 0)) {
2592
2593 if ((scheduler->shutdown_lock > 0)
2594 && (get_effective_time(scheduler)
2595 > (lock_time + scheduler->shutdown_lock))) {
2596 pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
2597 rsc->id, pe__node_name(node));
2598 pe__clear_resource_history(rsc, node);
2599 } else {
2600
2601
2602
2603
2604 rsc->lock_node = (pcmk_node_t *) node;
2605 rsc->lock_time = lock_time;
2606 }
2607 }
2608 }
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620 static pcmk_resource_t *
2621 unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource,
2622 pcmk_scheduler_t *scheduler)
2623 {
2624 GList *gIter = NULL;
2625 int stop_index = -1;
2626 int start_index = -1;
2627 enum rsc_role_e req_role = pcmk_role_unknown;
2628
2629 const char *rsc_id = ID(lrm_resource);
2630
2631 pcmk_resource_t *rsc = NULL;
2632 GList *op_list = NULL;
2633 GList *sorted_op_list = NULL;
2634
2635 xmlNode *rsc_op = NULL;
2636 xmlNode *last_failure = NULL;
2637
2638 enum action_fail_response on_fail = pcmk_on_fail_ignore;
2639 enum rsc_role_e saved_role = pcmk_role_unknown;
2640
2641 if (rsc_id == NULL) {
2642 crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE
2643 " entry without id");
2644 return NULL;
2645 }
2646 crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
2647 rsc_id, pe__node_name(node));
2648
2649
2650 for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2651 rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
2652
2653 op_list = g_list_prepend(op_list, rsc_op);
2654 }
2655
2656 if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2657 if (op_list == NULL) {
2658
2659 return NULL;
2660 }
2661 }
2662
2663
2664 rsc = unpack_find_resource(scheduler, node, rsc_id);
2665 if (rsc == NULL) {
2666 if (op_list == NULL) {
2667
2668 return NULL;
2669 } else {
2670 rsc = process_orphan_resource(lrm_resource, node, scheduler);
2671 }
2672 }
2673 CRM_ASSERT(rsc != NULL);
2674
2675
2676 if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) {
2677 unpack_shutdown_lock(lrm_resource, rsc, node, scheduler);
2678 }
2679
2680
2681 saved_role = rsc->role;
2682 rsc->role = pcmk_role_unknown;
2683 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2684
2685 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2686 xmlNode *rsc_op = (xmlNode *) gIter->data;
2687
2688 unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
2689 }
2690
2691
2692 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2693 process_recurring(node, rsc, start_index, stop_index, sorted_op_list,
2694 scheduler);
2695
2696
2697 g_list_free(sorted_op_list);
2698
2699 process_rsc_state(rsc, node, on_fail);
2700
2701 if (get_target_role(rsc, &req_role)) {
2702 if ((rsc->next_role == pcmk_role_unknown)
2703 || (req_role < rsc->next_role)) {
2704
2705 pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE);
2706
2707 } else if (req_role > rsc->next_role) {
2708 pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2709 " with requested next role %s",
2710 rsc->id, role2text(rsc->next_role), role2text(req_role));
2711 }
2712 }
2713
2714 if (saved_role > rsc->role) {
2715 rsc->role = saved_role;
2716 }
2717
2718 return rsc;
2719 }
2720
2721 static void
2722 handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
2723 pcmk_scheduler_t *scheduler)
2724 {
2725 for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list);
2726 rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
2727
2728 pcmk_resource_t *rsc;
2729 pcmk_resource_t *container;
2730 const char *rsc_id;
2731 const char *container_id;
2732
2733 if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
2734 continue;
2735 }
2736
2737 container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2738 rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2739 if (container_id == NULL || rsc_id == NULL) {
2740 continue;
2741 }
2742
2743 container = pe_find_resource(scheduler->resources, container_id);
2744 if (container == NULL) {
2745 continue;
2746 }
2747
2748 rsc = pe_find_resource(scheduler->resources, rsc_id);
2749 if ((rsc == NULL) || (rsc->container != NULL)
2750 || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2751 continue;
2752 }
2753
2754 pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2755 rsc->id, container_id);
2756 rsc->container = container;
2757 container->fillers = g_list_append(container->fillers, rsc);
2758 }
2759 }
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769 static void
2770 unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml,
2771 pcmk_scheduler_t *scheduler)
2772 {
2773 bool found_orphaned_container_filler = false;
2774
2775
2776 xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
2777 if (xml == NULL) {
2778 return;
2779 }
2780 xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
2781 if (xml == NULL) {
2782 return;
2783 }
2784
2785
2786 for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
2787 rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
2788
2789 pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler);
2790
2791 if ((rsc != NULL)
2792 && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) {
2793 found_orphaned_container_filler = true;
2794 }
2795 }
2796
2797
2798
2799
2800 if (found_orphaned_container_filler) {
2801 handle_orphaned_container_fillers(xml, scheduler);
2802 }
2803 }
2804
2805 static void
2806 set_active(pcmk_resource_t *rsc)
2807 {
2808 const pcmk_resource_t *top = pe__const_top_resource(rsc, false);
2809
2810 if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) {
2811 rsc->role = pcmk_role_unpromoted;
2812 } else {
2813 rsc->role = pcmk_role_started;
2814 }
2815 }
2816
2817 static void
2818 set_node_score(gpointer key, gpointer value, gpointer user_data)
2819 {
2820 pcmk_node_t *node = value;
2821 int *score = user_data;
2822
2823 node->weight = *score;
2824 }
2825
2826 #define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
2827 "/" XML_CIB_TAG_STATE
2828 #define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \
2829 "/" XML_LRM_TAG_RESOURCES \
2830 "/" XML_LRM_TAG_RESOURCE
2831 #define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP
2832
2833 static xmlNode *
2834 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2835 int target_rc, pcmk_scheduler_t *scheduler)
2836 {
2837 GString *xpath = NULL;
2838 xmlNode *xml = NULL;
2839
2840 CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
2841 return NULL);
2842
2843 xpath = g_string_sized_new(256);
2844 pcmk__g_strcat(xpath,
2845 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']"
2846 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']"
2847 SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
2848 NULL);
2849
2850
2851 if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
2852 pcmk__g_strcat(xpath,
2853 " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
2854 NULL);
2855
2856 } else if ((source != NULL)
2857 && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
2858 pcmk__g_strcat(xpath,
2859 " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
2860 NULL);
2861 } else {
2862 g_string_append_c(xpath, ']');
2863 }
2864
2865 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2866 LOG_DEBUG);
2867 g_string_free(xpath, TRUE);
2868
2869 if (xml && target_rc >= 0) {
2870 int rc = PCMK_OCF_UNKNOWN_ERROR;
2871 int status = PCMK_EXEC_ERROR;
2872
2873 crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
2874 crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
2875 if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
2876 return NULL;
2877 }
2878 }
2879 return xml;
2880 }
2881
2882 static xmlNode *
2883 find_lrm_resource(const char *rsc_id, const char *node_name,
2884 pcmk_scheduler_t *scheduler)
2885 {
2886 GString *xpath = NULL;
2887 xmlNode *xml = NULL;
2888
2889 CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
2890
2891 xpath = g_string_sized_new(256);
2892 pcmk__g_strcat(xpath,
2893 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2894 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']",
2895 NULL);
2896
2897 xml = get_xpath_object((const char *) xpath->str, scheduler->input,
2898 LOG_DEBUG);
2899
2900 g_string_free(xpath, TRUE);
2901 return xml;
2902 }
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913 static bool
2914 unknown_on_node(pcmk_resource_t *rsc, const char *node_name)
2915 {
2916 bool result = false;
2917 xmlXPathObjectPtr search;
2918 GString *xpath = g_string_sized_new(256);
2919
2920 pcmk__g_strcat(xpath,
2921 XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
2922 SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']"
2923 SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']",
2924 NULL);
2925 search = xpath_search(rsc->cluster->input, (const char *) xpath->str);
2926 result = (numXpathResults(search) == 0);
2927 freeXpathObject(search);
2928 g_string_free(xpath, TRUE);
2929 return result;
2930 }
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944 static bool
2945 monitor_not_running_after(const char *rsc_id, const char *node_name,
2946 const xmlNode *xml_op, bool same_node,
2947 pcmk_scheduler_t *scheduler)
2948 {
2949
2950
2951
2952 xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
2953 NULL, PCMK_OCF_NOT_RUNNING, scheduler);
2954
2955 return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
2956 }
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970 static bool
2971 non_monitor_after(const char *rsc_id, const char *node_name,
2972 const xmlNode *xml_op, bool same_node,
2973 pcmk_scheduler_t *scheduler)
2974 {
2975 xmlNode *lrm_resource = NULL;
2976
2977 lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler);
2978 if (lrm_resource == NULL) {
2979 return false;
2980 }
2981
2982 for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
2983 op != NULL; op = crm_next_same_xml(op)) {
2984 const char * task = NULL;
2985
2986 if (op == xml_op) {
2987 continue;
2988 }
2989
2990 task = crm_element_value(op, XML_LRM_ATTR_TASK);
2991
2992 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
2993 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
2994 NULL)
2995 && pe__is_newer_op(op, xml_op, same_node) > 0) {
2996 return true;
2997 }
2998 }
2999
3000 return false;
3001 }
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015 static bool
3016 newer_state_after_migrate(const char *rsc_id, const char *node_name,
3017 const xmlNode *migrate_to,
3018 const xmlNode *migrate_from,
3019 pcmk_scheduler_t *scheduler)
3020 {
3021 const xmlNode *xml_op = migrate_to;
3022 const char *source = NULL;
3023 const char *target = NULL;
3024 bool same_node = false;
3025
3026 if (migrate_from) {
3027 xml_op = migrate_from;
3028 }
3029
3030 source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
3031 target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3032
3033
3034
3035
3036 if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
3037 if (migrate_from) {
3038 xml_op = migrate_from;
3039 same_node = true;
3040
3041 } else {
3042 xml_op = migrate_to;
3043 }
3044
3045 } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
3046 if (migrate_to) {
3047 xml_op = migrate_to;
3048 same_node = true;
3049
3050 } else {
3051 xml_op = migrate_from;
3052 }
3053 }
3054
3055
3056
3057
3058
3059 return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler)
3060 || monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
3061 scheduler);
3062 }
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076 static int
3077 get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node,
3078 const pcmk_node_t *target_node,
3079 const char **source_name, const char **target_name)
3080 {
3081 *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE);
3082 *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET);
3083 if ((*source_name == NULL) || (*target_name == NULL)) {
3084 crm_err("Ignoring resource history entry %s without "
3085 XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET,
3086 ID(entry));
3087 return pcmk_rc_unpack_error;
3088 }
3089
3090 if ((source_node != NULL)
3091 && !pcmk__str_eq(*source_name, source_node->details->uname,
3092 pcmk__str_casei|pcmk__str_null_matches)) {
3093 crm_err("Ignoring resource history entry %s because "
3094 XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s",
3095 ID(entry), *source_name, pe__node_name(source_node));
3096 return pcmk_rc_unpack_error;
3097 }
3098
3099 if ((target_node != NULL)
3100 && !pcmk__str_eq(*target_name, target_node->details->uname,
3101 pcmk__str_casei|pcmk__str_null_matches)) {
3102 crm_err("Ignoring resource history entry %s because "
3103 XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s",
3104 ID(entry), *target_name, pe__node_name(target_node));
3105 return pcmk_rc_unpack_error;
3106 }
3107
3108 return pcmk_rc_ok;
3109 }
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123 static void
3124 add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node)
3125 {
3126 pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
3127 rsc->id, pe__node_name(node));
3128 rsc->role = pcmk_role_stopped;
3129 rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
3130 (gpointer) node);
3131 }
3132
3133
3134
3135
3136
3137
3138
3139 static void
3140 unpack_migrate_to_success(struct action_history *history)
3141 {
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173 int from_rc = PCMK_OCF_OK;
3174 int from_status = PCMK_EXEC_PENDING;
3175 pcmk_node_t *target_node = NULL;
3176 xmlNode *migrate_from = NULL;
3177 const char *source = NULL;
3178 const char *target = NULL;
3179 bool source_newer_op = false;
3180 bool target_newer_state = false;
3181 bool active_on_target = false;
3182
3183
3184 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3185 &target) != pcmk_rc_ok) {
3186 return;
3187 }
3188
3189
3190 source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
3191 true, history->rsc->cluster);
3192
3193
3194 migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
3195 target, source, -1, history->rsc->cluster);
3196 if (migrate_from != NULL) {
3197 if (source_newer_op) {
3198
3199
3200
3201
3202 return;
3203 }
3204 crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
3205 crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS,
3206 &from_status);
3207 }
3208
3209
3210
3211
3212 target_newer_state = newer_state_after_migrate(history->rsc->id, target,
3213 history->xml, migrate_from,
3214 history->rsc->cluster);
3215 if (source_newer_op && target_newer_state) {
3216 return;
3217 }
3218
3219
3220
3221
3222
3223 if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
3224 add_dangling_migration(history->rsc, history->node);
3225 return;
3226 }
3227
3228
3229
3230
3231 history->rsc->role = pcmk_role_started;
3232
3233 target_node = pe_find_node(history->rsc->cluster->nodes, target);
3234 active_on_target = !target_newer_state && (target_node != NULL)
3235 && target_node->details->online;
3236
3237 if (from_status != PCMK_EXEC_PENDING) {
3238 if (active_on_target) {
3239 native_add_running(history->rsc, target_node, history->rsc->cluster,
3240 TRUE);
3241 } else {
3242
3243 pe__set_resource_flags(history->rsc,
3244 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3245 pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable);
3246 }
3247 return;
3248 }
3249
3250
3251
3252
3253
3254
3255
3256 if ((target_node != NULL) && target_node->details->online
3257 && unknown_on_node(history->rsc, target)) {
3258 return;
3259 }
3260
3261 if (active_on_target) {
3262 pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3263 source);
3264
3265 native_add_running(history->rsc, target_node, history->rsc->cluster,
3266 FALSE);
3267 if ((source_node != NULL) && source_node->details->online) {
3268
3269
3270
3271
3272
3273
3274 history->rsc->partial_migration_target = target_node;
3275 history->rsc->partial_migration_source = source_node;
3276 }
3277
3278 } else if (!source_newer_op) {
3279
3280 pe__set_resource_flags(history->rsc,
3281 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
3282 pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable);
3283 }
3284 }
3285
3286
3287
3288
3289
3290
3291
3292 static void
3293 unpack_migrate_to_failure(struct action_history *history)
3294 {
3295 xmlNode *target_migrate_from = NULL;
3296 const char *source = NULL;
3297 const char *target = NULL;
3298
3299
3300 if (get_migration_node_names(history->xml, history->node, NULL, &source,
3301 &target) != pcmk_rc_ok) {
3302 return;
3303 }
3304
3305
3306
3307
3308 history->rsc->role = pcmk_role_started;
3309
3310
3311 target_migrate_from = find_lrm_op(history->rsc->id,
3312 PCMK_ACTION_MIGRATE_FROM, target, source,
3313 PCMK_OCF_OK, history->rsc->cluster);
3314
3315 if (
3316
3317
3318
3319
3320 !unknown_on_node(history->rsc, target)
3321
3322
3323
3324 && !newer_state_after_migrate(history->rsc->id, target, history->xml,
3325 target_migrate_from,
3326 history->rsc->cluster)) {
3327
3328
3329
3330
3331 pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes,
3332 target);
3333
3334 if (target_node && target_node->details->online) {
3335 native_add_running(history->rsc, target_node, history->rsc->cluster,
3336 FALSE);
3337 }
3338
3339 } else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
3340 history->rsc->cluster)) {
3341
3342
3343
3344
3345
3346
3347 history->rsc->dangling_migrations =
3348 g_list_prepend(history->rsc->dangling_migrations,
3349 (gpointer) history->node);
3350 }
3351 }
3352
3353
3354
3355
3356
3357
3358
3359 static void
3360 unpack_migrate_from_failure(struct action_history *history)
3361 {
3362 xmlNode *source_migrate_to = NULL;
3363 const char *source = NULL;
3364 const char *target = NULL;
3365
3366
3367 if (get_migration_node_names(history->xml, NULL, history->node, &source,
3368 &target) != pcmk_rc_ok) {
3369 return;
3370 }
3371
3372
3373
3374
3375 history->rsc->role = pcmk_role_started;
3376
3377
3378 source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
3379 source, target, PCMK_OCF_OK,
3380 history->rsc->cluster);
3381
3382 if (
3383
3384
3385
3386
3387 !unknown_on_node(history->rsc, source)
3388
3389
3390
3391 && !newer_state_after_migrate(history->rsc->id, source,
3392 source_migrate_to, history->xml,
3393 history->rsc->cluster)) {
3394
3395
3396
3397 pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
3398 source);
3399
3400 if (source_node && source_node->details->online) {
3401 native_add_running(history->rsc, source_node, history->rsc->cluster,
3402 TRUE);
3403 }
3404 }
3405 }
3406
3407
3408
3409
3410
3411
3412
3413 static void
3414 record_failed_op(struct action_history *history)
3415 {
3416 if (!(history->node->details->online)) {
3417 return;
3418 }
3419
3420 for (const xmlNode *xIter = history->rsc->cluster->failed->children;
3421 xIter != NULL; xIter = xIter->next) {
3422
3423 const char *key = pe__xe_history_key(xIter);
3424 const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
3425
3426 if (pcmk__str_eq(history->key, key, pcmk__str_none)
3427 && pcmk__str_eq(uname, history->node->details->uname,
3428 pcmk__str_casei)) {
3429 crm_trace("Skipping duplicate entry %s on %s",
3430 history->key, pe__node_name(history->node));
3431 return;
3432 }
3433 }
3434
3435 crm_trace("Adding entry for %s on %s to failed action list",
3436 history->key, pe__node_name(history->node));
3437 crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
3438 crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id);
3439 add_node_copy(history->rsc->cluster->failed, history->xml);
3440 }
3441
3442 static char *
3443 last_change_str(const xmlNode *xml_op)
3444 {
3445 time_t when;
3446 char *result = NULL;
3447
3448 if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
3449 &when) == pcmk_ok) {
3450 char *when_s = pcmk__epoch2str(&when, 0);
3451 const char *p = strchr(when_s, ' ');
3452
3453
3454 if ((p != NULL) && (*(++p) != '\0')) {
3455 result = strdup(p);
3456 CRM_ASSERT(result != NULL);
3457 }
3458 free(when_s);
3459 }
3460
3461 if (result == NULL) {
3462 result = strdup("unknown time");
3463 CRM_ASSERT(result != NULL);
3464 }
3465
3466 return result;
3467 }
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481 static int
3482 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
3483 {
3484 switch (first) {
3485 case pcmk_on_fail_demote:
3486 switch (second) {
3487 case pcmk_on_fail_ignore:
3488 return 1;
3489 case pcmk_on_fail_demote:
3490 return 0;
3491 default:
3492 return -1;
3493 }
3494 break;
3495
3496 case pcmk_on_fail_reset_remote:
3497 switch (second) {
3498 case pcmk_on_fail_ignore:
3499 case pcmk_on_fail_demote:
3500 case pcmk_on_fail_restart:
3501 return 1;
3502 case pcmk_on_fail_reset_remote:
3503 return 0;
3504 default:
3505 return -1;
3506 }
3507 break;
3508
3509 case pcmk_on_fail_restart_container:
3510 switch (second) {
3511 case pcmk_on_fail_ignore:
3512 case pcmk_on_fail_demote:
3513 case pcmk_on_fail_restart:
3514 case pcmk_on_fail_reset_remote:
3515 return 1;
3516 case pcmk_on_fail_restart_container:
3517 return 0;
3518 default:
3519 return -1;
3520 }
3521 break;
3522
3523 default:
3524 break;
3525 }
3526 switch (second) {
3527 case pcmk_on_fail_demote:
3528 return (first == pcmk_on_fail_ignore)? -1 : 1;
3529
3530 case pcmk_on_fail_reset_remote:
3531 switch (first) {
3532 case pcmk_on_fail_ignore:
3533 case pcmk_on_fail_demote:
3534 case pcmk_on_fail_restart:
3535 return -1;
3536 default:
3537 return 1;
3538 }
3539 break;
3540
3541 case pcmk_on_fail_restart_container:
3542 switch (first) {
3543 case pcmk_on_fail_ignore:
3544 case pcmk_on_fail_demote:
3545 case pcmk_on_fail_restart:
3546 case pcmk_on_fail_reset_remote:
3547 return -1;
3548 default:
3549 return 1;
3550 }
3551 break;
3552
3553 default:
3554 break;
3555 }
3556 return first - second;
3557 }
3558
3559
3560
3561
3562
3563
3564
3565 static void
3566 ban_from_all_nodes(pcmk_resource_t *rsc)
3567 {
3568 int score = -INFINITY;
3569 pcmk_resource_t *fail_rsc = rsc;
3570
3571 if (fail_rsc->parent != NULL) {
3572 pcmk_resource_t *parent = uber_parent(fail_rsc);
3573
3574 if (pe_rsc_is_anon_clone(parent)) {
3575
3576
3577
3578 fail_rsc = parent;
3579 }
3580 }
3581
3582
3583 crm_notice("%s will not be started under current conditions", fail_rsc->id);
3584 if (fail_rsc->allowed_nodes != NULL) {
3585 g_hash_table_destroy(fail_rsc->allowed_nodes);
3586 }
3587 fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
3588 g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3589 }
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599 static void
3600 unpack_failure_handling(struct action_history *history,
3601 enum action_fail_response *on_fail,
3602 enum rsc_role_e *fail_role)
3603 {
3604 xmlNode *config = pcmk__find_action_config(history->rsc, history->task,
3605 history->interval_ms, true);
3606
3607 GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node,
3608 history->task,
3609 history->interval_ms, config);
3610
3611 const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL);
3612
3613 *on_fail = pcmk__parse_on_fail(history->rsc, history->task,
3614 history->interval_ms, on_fail_str);
3615 *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail,
3616 meta);
3617 g_hash_table_destroy(meta);
3618 }
3619
3620
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630 static void
3631 unpack_rsc_op_failure(struct action_history *history,
3632 enum action_fail_response config_on_fail,
3633 enum rsc_role_e fail_role, xmlNode **last_failure,
3634 enum action_fail_response *on_fail)
3635 {
3636 bool is_probe = false;
3637 char *last_change_s = NULL;
3638
3639 *last_failure = history->xml;
3640
3641 is_probe = pcmk_xe_is_probe(history->xml);
3642 last_change_s = last_change_str(history->xml);
3643
3644 if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster)
3645 && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3646 crm_trace("Unexpected result (%s%s%s) was recorded for "
3647 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3648 services_ocf_exitcode_str(history->exit_status),
3649 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3650 pcmk__s(history->exit_reason, ""),
3651 (is_probe? "probe" : history->task), history->rsc->id,
3652 pe__node_name(history->node), last_change_s,
3653 history->exit_status, history->id);
3654 } else {
3655 crm_warn("Unexpected result (%s%s%s) was recorded for "
3656 "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
3657 services_ocf_exitcode_str(history->exit_status),
3658 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3659 pcmk__s(history->exit_reason, ""),
3660 (is_probe? "probe" : history->task), history->rsc->id,
3661 pe__node_name(history->node), last_change_s,
3662 history->exit_status, history->id);
3663
3664 if (is_probe && (history->exit_status != PCMK_OCF_OK)
3665 && (history->exit_status != PCMK_OCF_NOT_RUNNING)
3666 && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
3667
3668
3669
3670
3671 crm_notice("If it is not possible for %s to run on %s, see "
3672 "the resource-discovery option for location constraints",
3673 history->rsc->id, pe__node_name(history->node));
3674 }
3675
3676 record_failed_op(history);
3677 }
3678
3679 free(last_change_s);
3680
3681 if (cmp_on_fail(*on_fail, config_on_fail) < 0) {
3682 pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s",
3683 fail2text(*on_fail), fail2text(config_on_fail),
3684 history->key);
3685 *on_fail = config_on_fail;
3686 }
3687
3688 if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
3689 resource_location(history->rsc, history->node, -INFINITY,
3690 "__stop_fail__", history->rsc->cluster);
3691
3692 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
3693 unpack_migrate_to_failure(history);
3694
3695 } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
3696 unpack_migrate_from_failure(history);
3697
3698 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
3699 history->rsc->role = pcmk_role_promoted;
3700
3701 } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
3702 if (config_on_fail == pcmk_on_fail_block) {
3703 history->rsc->role = pcmk_role_promoted;
3704 pe__set_next_role(history->rsc, pcmk_role_stopped,
3705 "demote with on-fail=block");
3706
3707 } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
3708 history->rsc->role = pcmk_role_stopped;
3709
3710 } else {
3711
3712
3713
3714
3715
3716 history->rsc->role = pcmk_role_unpromoted;
3717 }
3718 }
3719
3720 if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
3721
3722 pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
3723 history->rsc->role = pcmk_role_stopped;
3724
3725 } else if (history->rsc->role < pcmk_role_started) {
3726 pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
3727 set_active(history->rsc);
3728 }
3729
3730 pe_rsc_trace(history->rsc,
3731 "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
3732 history->rsc->id, role2text(history->rsc->role),
3733 pcmk__btoa(history->node->details->unclean),
3734 fail2text(config_on_fail), role2text(fail_role));
3735
3736 if ((fail_role != pcmk_role_started)
3737 && (history->rsc->next_role < fail_role)) {
3738 pe__set_next_role(history->rsc, fail_role, "failure");
3739 }
3740
3741 if (fail_role == pcmk_role_stopped) {
3742 ban_from_all_nodes(history->rsc);
3743 }
3744 }
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755 static void
3756 block_if_unrecoverable(struct action_history *history)
3757 {
3758 char *last_change_s = NULL;
3759
3760 if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
3761 return;
3762 }
3763 if (pe_can_fence(history->node->details->data_set, history->node)) {
3764 return;
3765 }
3766
3767 last_change_s = last_change_str(history->xml);
3768 pe_proc_err("No further recovery can be attempted for %s "
3769 "because %s on %s failed (%s%s%s) at %s "
3770 CRM_XS " rc=%d id=%s",
3771 history->rsc->id, history->task, pe__node_name(history->node),
3772 services_ocf_exitcode_str(history->exit_status),
3773 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3774 pcmk__s(history->exit_reason, ""),
3775 last_change_s, history->exit_status, history->id);
3776
3777 free(last_change_s);
3778
3779 pe__clear_resource_flags(history->rsc, pcmk_rsc_managed);
3780 pe__set_resource_flags(history->rsc, pcmk_rsc_blocked);
3781 }
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792 static inline void
3793 remap_because(struct action_history *history, const char **why, int value,
3794 const char *reason)
3795 {
3796 if (history->execution_status != value) {
3797 history->execution_status = value;
3798 *why = reason;
3799 }
3800 }
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824 static void
3825 remap_operation(struct action_history *history,
3826 enum action_fail_response *on_fail, bool expired)
3827 {
3828 bool is_probe = false;
3829 int orig_exit_status = history->exit_status;
3830 int orig_exec_status = history->execution_status;
3831 const char *why = NULL;
3832 const char *task = history->task;
3833
3834
3835 history->exit_status = pcmk__effective_rc(history->exit_status);
3836 if (history->exit_status != orig_exit_status) {
3837 why = "degraded result";
3838 if (!expired && (!history->node->details->shutdown
3839 || history->node->details->online)) {
3840 record_failed_op(history);
3841 }
3842 }
3843
3844 if (!pe_rsc_is_bundled(history->rsc)
3845 && pcmk_xe_mask_probe_failure(history->xml)
3846 && ((history->execution_status != PCMK_EXEC_DONE)
3847 || (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
3848 history->execution_status = PCMK_EXEC_DONE;
3849 history->exit_status = PCMK_OCF_NOT_RUNNING;
3850 why = "equivalent probe result";
3851 }
3852
3853
3854
3855
3856
3857
3858 switch (history->execution_status) {
3859 case PCMK_EXEC_DONE:
3860 case PCMK_EXEC_ERROR:
3861 break;
3862
3863
3864 case PCMK_EXEC_NO_FENCE_DEVICE:
3865 case PCMK_EXEC_NO_SECRETS:
3866 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3867 "node-fatal error");
3868 goto remap_done;
3869
3870 default:
3871 goto remap_done;
3872 }
3873
3874 is_probe = pcmk_xe_is_probe(history->xml);
3875 if (is_probe) {
3876 task = "probe";
3877 }
3878
3879 if (history->expected_exit_status < 0) {
3880
3881
3882
3883
3884
3885
3886
3887
3888 remap_because(history, &why, PCMK_EXEC_ERROR,
3889 "obsolete history format");
3890 crm_warn("Expected result not found for %s on %s "
3891 "(corrupt or obsolete CIB?)",
3892 history->key, pe__node_name(history->node));
3893
3894 } else if (history->exit_status == history->expected_exit_status) {
3895 remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
3896
3897 } else {
3898 remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
3899 pe_rsc_debug(history->rsc,
3900 "%s on %s: expected %d (%s), got %d (%s%s%s)",
3901 history->key, pe__node_name(history->node),
3902 history->expected_exit_status,
3903 services_ocf_exitcode_str(history->expected_exit_status),
3904 history->exit_status,
3905 services_ocf_exitcode_str(history->exit_status),
3906 (pcmk__str_empty(history->exit_reason)? "" : ": "),
3907 pcmk__s(history->exit_reason, ""));
3908 }
3909
3910 switch (history->exit_status) {
3911 case PCMK_OCF_OK:
3912 if (is_probe
3913 && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
3914 char *last_change_s = last_change_str(history->xml);
3915
3916 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3917 pe_rsc_info(history->rsc, "Probe found %s active on %s at %s",
3918 history->rsc->id, pe__node_name(history->node),
3919 last_change_s);
3920 free(last_change_s);
3921 }
3922 break;
3923
3924 case PCMK_OCF_NOT_RUNNING:
3925 if (is_probe
3926 || (history->expected_exit_status == history->exit_status)
3927 || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) {
3928
3929
3930
3931
3932
3933 remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
3934 history->rsc->role = pcmk_role_stopped;
3935 *on_fail = pcmk_on_fail_ignore;
3936 pe__set_next_role(history->rsc, pcmk_role_unknown,
3937 "not running");
3938 }
3939 break;
3940
3941 case PCMK_OCF_RUNNING_PROMOTED:
3942 if (is_probe
3943 && (history->exit_status != history->expected_exit_status)) {
3944 char *last_change_s = last_change_str(history->xml);
3945
3946 remap_because(history, &why, PCMK_EXEC_DONE, "probe");
3947 pe_rsc_info(history->rsc,
3948 "Probe found %s active and promoted on %s at %s",
3949 history->rsc->id, pe__node_name(history->node),
3950 last_change_s);
3951 free(last_change_s);
3952 }
3953 if (!expired
3954 || (history->exit_status == history->expected_exit_status)) {
3955 history->rsc->role = pcmk_role_promoted;
3956 }
3957 break;
3958
3959 case PCMK_OCF_FAILED_PROMOTED:
3960 if (!expired) {
3961 history->rsc->role = pcmk_role_promoted;
3962 }
3963 remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
3964 break;
3965
3966 case PCMK_OCF_NOT_CONFIGURED:
3967 remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
3968 break;
3969
3970 case PCMK_OCF_UNIMPLEMENT_FEATURE:
3971 {
3972 guint interval_ms = 0;
3973 crm_element_value_ms(history->xml, XML_LRM_ATTR_INTERVAL_MS,
3974 &interval_ms);
3975
3976 if (interval_ms == 0) {
3977 if (!expired) {
3978 block_if_unrecoverable(history);
3979 }
3980 remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
3981 "exit status");
3982 } else {
3983 remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
3984 "exit status");
3985 }
3986 }
3987 break;
3988
3989 case PCMK_OCF_NOT_INSTALLED:
3990 case PCMK_OCF_INVALID_PARAM:
3991 case PCMK_OCF_INSUFFICIENT_PRIV:
3992 if (!expired) {
3993 block_if_unrecoverable(history);
3994 }
3995 remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
3996 break;
3997
3998 default:
3999 if (history->execution_status == PCMK_EXEC_DONE) {
4000 char *last_change_s = last_change_str(history->xml);
4001
4002 crm_info("Treating unknown exit status %d from %s of %s "
4003 "on %s at %s as failure",
4004 history->exit_status, task, history->rsc->id,
4005 pe__node_name(history->node), last_change_s);
4006 remap_because(history, &why, PCMK_EXEC_ERROR,
4007 "unknown exit status");
4008 free(last_change_s);
4009 }
4010 break;
4011 }
4012
4013 remap_done:
4014 if (why != NULL) {
4015 pe_rsc_trace(history->rsc,
4016 "Remapped %s result from [%s: %s] to [%s: %s] "
4017 "because of %s",
4018 history->key, pcmk_exec_status_str(orig_exec_status),
4019 crm_exit_str(orig_exit_status),
4020 pcmk_exec_status_str(history->execution_status),
4021 crm_exit_str(history->exit_status), why);
4022 }
4023 }
4024
4025
4026 static bool
4027 should_clear_for_param_change(const xmlNode *xml_op, const char *task,
4028 pcmk_resource_t *rsc, pcmk_node_t *node)
4029 {
4030 if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
4031 if (pe__bundle_needs_remote_name(rsc)) {
4032
4033
4034
4035
4036 pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure,
4037 rsc->cluster);
4038
4039 } else {
4040 op_digest_cache_t *digest_data = NULL;
4041
4042 digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
4043 rsc->cluster);
4044 switch (digest_data->rc) {
4045 case pcmk__digest_unknown:
4046 crm_trace("Resource %s history entry %s on %s"
4047 " has no digest to compare",
4048 rsc->id, pe__xe_history_key(xml_op),
4049 node->details->id);
4050 break;
4051 case pcmk__digest_match:
4052 break;
4053 default:
4054 return TRUE;
4055 }
4056 }
4057 }
4058 return FALSE;
4059 }
4060
4061
4062 static void
4063 order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn,
4064 pcmk_scheduler_t *scheduler)
4065 {
4066 pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id);
4067
4068 if (remote_node) {
4069 pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
4070 FALSE, scheduler);
4071
4072 order_actions(fence, action, pcmk__ar_first_implies_then);
4073 }
4074 }
4075
4076 static bool
4077 should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task,
4078 guint interval_ms, bool is_last_failure)
4079 {
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100 if (rsc->remote_reconnect_ms
4101 && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)
4102 && (interval_ms != 0)
4103 && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
4104
4105 pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id);
4106
4107 if (remote_node && !remote_node->details->remote_was_fenced) {
4108 if (is_last_failure) {
4109 crm_info("Waiting to clear monitor failure for remote node %s"
4110 " until fencing has occurred", rsc->id);
4111 }
4112 return TRUE;
4113 }
4114 }
4115 return FALSE;
4116 }
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136 static bool
4137 check_operation_expiry(struct action_history *history)
4138 {
4139 bool expired = false;
4140 bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
4141 time_t last_run = 0;
4142 int unexpired_fail_count = 0;
4143 const char *clear_reason = NULL;
4144
4145 if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
4146 pe_rsc_trace(history->rsc,
4147 "Resource history entry %s on %s is not expired: "
4148 "Not Installed does not expire",
4149 history->id, pe__node_name(history->node));
4150 return false;
4151 }
4152
4153 if ((history->rsc->failure_timeout > 0)
4154 && (crm_element_value_epoch(history->xml, XML_RSC_OP_LAST_CHANGE,
4155 &last_run) == 0)) {
4156
4157
4158
4159 time_t now = get_effective_time(history->rsc->cluster);
4160 time_t last_failure = 0;
4161
4162
4163 if ((now >= (last_run + history->rsc->failure_timeout))
4164 && !should_ignore_failure_timeout(history->rsc, history->task,
4165 history->interval_ms,
4166 is_last_failure)) {
4167 expired = true;
4168 }
4169
4170
4171 unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
4172 &last_failure,
4173 pcmk__fc_effective,
4174 history->xml);
4175
4176
4177 crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
4178 " last-failure@%lld",
4179 history->id, (long long) last_run, (expired? "" : "not "),
4180 (long long) now, unexpired_fail_count,
4181 history->rsc->failure_timeout, (long long) last_failure);
4182 last_failure += history->rsc->failure_timeout + 1;
4183 if (unexpired_fail_count && (now < last_failure)) {
4184 pe__update_recheck_time(last_failure, history->rsc->cluster,
4185 "fail count expiration");
4186 }
4187 }
4188
4189 if (expired) {
4190 if (pe_get_failcount(history->node, history->rsc, NULL,
4191 pcmk__fc_default, history->xml)) {
4192
4193
4194 if (unexpired_fail_count == 0) {
4195
4196 clear_reason = "it expired";
4197
4198 } else {
4199
4200
4201
4202
4203
4204
4205 pe_rsc_trace(history->rsc,
4206 "Resource history entry %s on %s is not expired: "
4207 "Unexpired fail count",
4208 history->id, pe__node_name(history->node));
4209 expired = false;
4210 }
4211
4212 } else if (is_last_failure
4213 && (history->rsc->remote_reconnect_ms != 0)) {
4214
4215
4216
4217 clear_reason = "reconnect interval is set";
4218 }
4219 }
4220
4221 if (!expired && is_last_failure
4222 && should_clear_for_param_change(history->xml, history->task,
4223 history->rsc, history->node)) {
4224 clear_reason = "resource parameters have changed";
4225 }
4226
4227 if (clear_reason != NULL) {
4228 pcmk_action_t *clear_op = NULL;
4229
4230
4231 clear_op = pe__clear_failcount(history->rsc, history->node,
4232 clear_reason, history->rsc->cluster);
4233
4234 if (pcmk_is_set(history->rsc->cluster->flags,
4235 pcmk_sched_fencing_enabled)
4236 && (history->rsc->remote_reconnect_ms != 0)) {
4237
4238
4239
4240
4241
4242
4243
4244
4245 crm_info("Clearing %s failure will wait until any scheduled "
4246 "fencing of %s completes",
4247 history->task, history->rsc->id);
4248 order_after_remote_fencing(clear_op, history->rsc,
4249 history->rsc->cluster);
4250 }
4251 }
4252
4253 if (expired && (history->interval_ms == 0)
4254 && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
4255 switch (history->exit_status) {
4256 case PCMK_OCF_OK:
4257 case PCMK_OCF_NOT_RUNNING:
4258 case PCMK_OCF_RUNNING_PROMOTED:
4259 case PCMK_OCF_DEGRADED:
4260 case PCMK_OCF_DEGRADED_PROMOTED:
4261
4262 pe_rsc_trace(history->rsc,
4263 "Resource history entry %s on %s is not expired: "
4264 "Probe result",
4265 history->id, pe__node_name(history->node));
4266 expired = false;
4267 break;
4268 }
4269 }
4270
4271 return expired;
4272 }
4273
4274 int
4275 pe__target_rc_from_xml(const xmlNode *xml_op)
4276 {
4277 int target_rc = 0;
4278 const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
4279
4280 if (key == NULL) {
4281 return -1;
4282 }
4283 decode_transition_key(key, NULL, NULL, NULL, &target_rc);
4284 return target_rc;
4285 }
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296 static void
4297 update_resource_state(struct action_history *history, int exit_status,
4298 const xmlNode *last_failure,
4299 enum action_fail_response *on_fail)
4300 {
4301 bool clear_past_failure = false;
4302
4303 if ((exit_status == PCMK_OCF_NOT_INSTALLED)
4304 || (!pe_rsc_is_bundled(history->rsc)
4305 && pcmk_xe_mask_probe_failure(history->xml))) {
4306 history->rsc->role = pcmk_role_stopped;
4307
4308 } else if (exit_status == PCMK_OCF_NOT_RUNNING) {
4309 clear_past_failure = true;
4310
4311 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
4312 pcmk__str_none)) {
4313 if ((last_failure != NULL)
4314 && pcmk__str_eq(history->key, pe__xe_history_key(last_failure),
4315 pcmk__str_none)) {
4316 clear_past_failure = true;
4317 }
4318 if (history->rsc->role < pcmk_role_started) {
4319 set_active(history->rsc);
4320 }
4321
4322 } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
4323 history->rsc->role = pcmk_role_started;
4324 clear_past_failure = true;
4325
4326 } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
4327 history->rsc->role = pcmk_role_stopped;
4328 clear_past_failure = true;
4329
4330 } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
4331 pcmk__str_none)) {
4332 history->rsc->role = pcmk_role_promoted;
4333 clear_past_failure = true;
4334
4335 } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
4336 pcmk__str_none)) {
4337 if (*on_fail == pcmk_on_fail_demote) {
4338
4339 clear_past_failure = true;
4340 }
4341 history->rsc->role = pcmk_role_unpromoted;
4342
4343 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
4344 pcmk__str_none)) {
4345 history->rsc->role = pcmk_role_started;
4346 clear_past_failure = true;
4347
4348 } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
4349 pcmk__str_none)) {
4350 unpack_migrate_to_success(history);
4351
4352 } else if (history->rsc->role < pcmk_role_started) {
4353 pe_rsc_trace(history->rsc, "%s active on %s",
4354 history->rsc->id, pe__node_name(history->node));
4355 set_active(history->rsc);
4356 }
4357
4358 if (!clear_past_failure) {
4359 return;
4360 }
4361
4362 switch (*on_fail) {
4363 case pcmk_on_fail_stop:
4364 case pcmk_on_fail_ban:
4365 case pcmk_on_fail_standby_node:
4366 case pcmk_on_fail_fence_node:
4367 pe_rsc_trace(history->rsc,
4368 "%s (%s) is not cleared by a completed %s",
4369 history->rsc->id, fail2text(*on_fail), history->task);
4370 break;
4371
4372 case pcmk_on_fail_block:
4373 case pcmk_on_fail_ignore:
4374 case pcmk_on_fail_demote:
4375 case pcmk_on_fail_restart:
4376 case pcmk_on_fail_restart_container:
4377 *on_fail = pcmk_on_fail_ignore;
4378 pe__set_next_role(history->rsc, pcmk_role_unknown,
4379 "clear past failures");
4380 break;
4381
4382 case pcmk_on_fail_reset_remote:
4383 if (history->rsc->remote_reconnect_ms == 0) {
4384
4385
4386
4387
4388
4389
4390 *on_fail = pcmk_on_fail_ignore;
4391 pe__set_next_role(history->rsc, pcmk_role_unknown,
4392 "clear past failures and reset remote");
4393 }
4394 break;
4395 }
4396 }
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406 static inline bool
4407 can_affect_state(struct action_history *history)
4408 {
4409 #if 0
4410
4411
4412
4413
4414
4415
4416 return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
4417 PCMK_ACTION_START, PCMK_ACTION_STOP,
4418 PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
4419 PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
4420 "asyncmon", NULL);
4421 #else
4422 return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
4423 PCMK_ACTION_META_DATA, NULL);
4424 #endif
4425 }
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435 static int
4436 unpack_action_result(struct action_history *history)
4437 {
4438 if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS,
4439 &(history->execution_status)) < 0)
4440 || (history->execution_status < PCMK_EXEC_PENDING)
4441 || (history->execution_status > PCMK_EXEC_MAX)
4442 || (history->execution_status == PCMK_EXEC_CANCELLED)) {
4443 crm_err("Ignoring resource history entry %s for %s on %s "
4444 "with invalid " XML_LRM_ATTR_OPSTATUS " '%s'",
4445 history->id, history->rsc->id, pe__node_name(history->node),
4446 pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS),
4447 ""));
4448 return pcmk_rc_unpack_error;
4449 }
4450 if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC,
4451 &(history->exit_status)) < 0)
4452 || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
4453 #if 0
4454
4455
4456
4457
4458 crm_err("Ignoring resource history entry %s for %s on %s "
4459 "with invalid " XML_LRM_ATTR_RC " '%s'",
4460 history->id, history->rsc->id, pe__node_name(history->node),
4461 pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC),
4462 ""));
4463 return pcmk_rc_unpack_error;
4464 #else
4465 history->exit_status = CRM_EX_ERROR;
4466 #endif
4467 }
4468 history->exit_reason = crm_element_value(history->xml,
4469 XML_LRM_ATTR_EXIT_REASON);
4470 return pcmk_rc_ok;
4471 }
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483 static int
4484 process_expired_result(struct action_history *history, int orig_exit_status)
4485 {
4486 if (!pe_rsc_is_bundled(history->rsc)
4487 && pcmk_xe_mask_probe_failure(history->xml)
4488 && (orig_exit_status != history->expected_exit_status)) {
4489
4490 if (history->rsc->role <= pcmk_role_stopped) {
4491 history->rsc->role = pcmk_role_unknown;
4492 }
4493 crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
4494 "Masked failure expired",
4495 history->id, history->rsc->id,
4496 pe__node_name(history->node));
4497 return pcmk_rc_ok;
4498 }
4499
4500 if (history->exit_status == history->expected_exit_status) {
4501 return pcmk_rc_undetermined;
4502 }
4503
4504 if (history->interval_ms == 0) {
4505 crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
4506 "Expired failure",
4507 history->id, history->task, history->rsc->id,
4508 pe__node_name(history->node));
4509 return pcmk_rc_ok;
4510 }
4511
4512 if (history->node->details->online && !history->node->details->unclean) {
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523 crm_notice("Rescheduling %s-interval %s of %s on %s "
4524 "after failure expired",
4525 pcmk__readable_interval(history->interval_ms), history->task,
4526 history->rsc->id, pe__node_name(history->node));
4527 crm_xml_add(history->xml, XML_LRM_ATTR_RESTART_DIGEST,
4528 "calculated-failure-timeout");
4529 return pcmk_rc_ok;
4530 }
4531
4532 return pcmk_rc_undetermined;
4533 }
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544 static void
4545 mask_probe_failure(struct action_history *history, int orig_exit_status,
4546 const xmlNode *last_failure,
4547 enum action_fail_response *on_fail)
4548 {
4549 pcmk_resource_t *ban_rsc = history->rsc;
4550
4551 if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) {
4552 ban_rsc = uber_parent(history->rsc);
4553 }
4554
4555 crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
4556 services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
4557 pe__node_name(history->node));
4558 update_resource_state(history, history->expected_exit_status, last_failure,
4559 on_fail);
4560 crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
4561
4562 record_failed_op(history);
4563 resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure",
4564 history->rsc->cluster);
4565 }
4566
4567
4568
4569
4570
4571
4572
4573
4574
4575
4576
4577
4578 static bool
4579 failure_is_newer(const struct action_history *history,
4580 const xmlNode *last_failure)
4581 {
4582 guint failure_interval_ms = 0U;
4583 long long failure_change = 0LL;
4584 long long this_change = 0LL;
4585
4586 if (last_failure == NULL) {
4587 return false;
4588 }
4589
4590 if (!pcmk__str_eq(history->task,
4591 crm_element_value(last_failure, XML_LRM_ATTR_TASK),
4592 pcmk__str_none)) {
4593 return false;
4594 }
4595
4596 if ((crm_element_value_ms(last_failure, XML_LRM_ATTR_INTERVAL_MS,
4597 &failure_interval_ms) != pcmk_ok)
4598 || (history->interval_ms != failure_interval_ms)) {
4599 return false;
4600 }
4601
4602 if ((pcmk__scan_ll(crm_element_value(history->xml, XML_RSC_OP_LAST_CHANGE),
4603 &this_change, 0LL) != pcmk_rc_ok)
4604 || (pcmk__scan_ll(crm_element_value(last_failure,
4605 XML_RSC_OP_LAST_CHANGE),
4606 &failure_change, 0LL) != pcmk_rc_ok)
4607 || (failure_change < this_change)) {
4608 return false;
4609 }
4610
4611 return true;
4612 }
4613
4614
4615
4616
4617
4618
4619
4620
4621 static void
4622 process_pending_action(struct action_history *history,
4623 const xmlNode *last_failure)
4624 {
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634 if (failure_is_newer(history, last_failure)) {
4635 return;
4636 }
4637
4638 if (strcmp(history->task, PCMK_ACTION_START) == 0) {
4639 pe__set_resource_flags(history->rsc, pcmk_rsc_start_pending);
4640 set_active(history->rsc);
4641
4642 } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
4643 history->rsc->role = pcmk_role_promoted;
4644
4645 } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
4646 && history->node->details->unclean) {
4647
4648
4649
4650 const char *migrate_target = NULL;
4651 pcmk_node_t *target = NULL;
4652
4653 migrate_target = crm_element_value(history->xml,
4654 XML_LRM_ATTR_MIGRATE_TARGET);
4655 target = pe_find_node(history->rsc->cluster->nodes, migrate_target);
4656 if (target != NULL) {
4657 stop_action(history->rsc, target, FALSE);
4658 }
4659 }
4660
4661 if (history->rsc->pending_task != NULL) {
4662
4663
4664
4665 return;
4666 }
4667
4668 if (pcmk_is_probe(history->task, history->interval_ms)) {
4669
4670
4671
4672
4673
4674 #if 0
4675 history->rsc->pending_task = strdup("probe");
4676 history->rsc->pending_node = history->node;
4677 #endif
4678 } else {
4679 history->rsc->pending_task = strdup(history->task);
4680 history->rsc->pending_node = history->node;
4681 }
4682 }
4683
4684 static void
4685 unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op,
4686 xmlNode **last_failure, enum action_fail_response *on_fail)
4687 {
4688 int old_rc = 0;
4689 bool expired = false;
4690 pcmk_resource_t *parent = rsc;
4691 enum rsc_role_e fail_role = pcmk_role_unknown;
4692 enum action_fail_response failure_strategy = pcmk_on_fail_restart;
4693
4694 struct action_history history = {
4695 .rsc = rsc,
4696 .node = node,
4697 .xml = xml_op,
4698 .execution_status = PCMK_EXEC_UNKNOWN,
4699 };
4700
4701 CRM_CHECK(rsc && node && xml_op, return);
4702
4703 history.id = ID(xml_op);
4704 if (history.id == NULL) {
4705 crm_err("Ignoring resource history entry for %s on %s without ID",
4706 rsc->id, pe__node_name(node));
4707 return;
4708 }
4709
4710
4711 history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
4712 if (history.task == NULL) {
4713 crm_err("Ignoring resource history entry %s for %s on %s without "
4714 XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node));
4715 return;
4716 }
4717 crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS,
4718 &(history.interval_ms));
4719 if (!can_affect_state(&history)) {
4720 pe_rsc_trace(rsc,
4721 "Ignoring resource history entry %s for %s on %s "
4722 "with irrelevant action '%s'",
4723 history.id, rsc->id, pe__node_name(node), history.task);
4724 return;
4725 }
4726
4727 if (unpack_action_result(&history) != pcmk_rc_ok) {
4728 return;
4729 }
4730
4731 history.expected_exit_status = pe__target_rc_from_xml(xml_op);
4732 history.key = pe__xe_history_key(xml_op);
4733 crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id));
4734
4735 pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
4736 history.id, history.task, history.call_id, pe__node_name(node),
4737 pcmk_exec_status_str(history.execution_status),
4738 crm_exit_str(history.exit_status));
4739
4740 if (node->details->unclean) {
4741 pe_rsc_trace(rsc,
4742 "%s is running on %s, which is unclean (further action "
4743 "depends on value of stop's on-fail attribute)",
4744 rsc->id, pe__node_name(node));
4745 }
4746
4747 expired = check_operation_expiry(&history);
4748 old_rc = history.exit_status;
4749
4750 remap_operation(&history, on_fail, expired);
4751
4752 if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
4753 goto done;
4754 }
4755
4756 if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
4757 mask_probe_failure(&history, old_rc, *last_failure, on_fail);
4758 goto done;
4759 }
4760
4761 if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) {
4762 parent = uber_parent(rsc);
4763 }
4764
4765 switch (history.execution_status) {
4766 case PCMK_EXEC_PENDING:
4767 process_pending_action(&history, *last_failure);
4768 goto done;
4769
4770 case PCMK_EXEC_DONE:
4771 update_resource_state(&history, history.exit_status, *last_failure,
4772 on_fail);
4773 goto done;
4774
4775 case PCMK_EXEC_NOT_INSTALLED:
4776 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4777 if (failure_strategy == pcmk_on_fail_ignore) {
4778 crm_warn("Cannot ignore failed %s of %s on %s: "
4779 "Resource agent doesn't exist "
4780 CRM_XS " status=%d rc=%d id=%s",
4781 history.task, rsc->id, pe__node_name(node),
4782 history.execution_status, history.exit_status,
4783 history.id);
4784
4785
4786
4787 *on_fail = pcmk_on_fail_ban;
4788 }
4789 resource_location(parent, node, -INFINITY, "hard-error",
4790 rsc->cluster);
4791 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4792 last_failure, on_fail);
4793 goto done;
4794
4795 case PCMK_EXEC_NOT_CONNECTED:
4796 if (pe__is_guest_or_remote_node(node)
4797 && pcmk_is_set(node->details->remote_rsc->flags,
4798 pcmk_rsc_managed)) {
4799
4800
4801
4802
4803
4804
4805 pe__set_resource_flags(node->details->remote_rsc,
4806 pcmk_rsc_failed|pcmk_rsc_stop_if_failed);
4807 }
4808 break;
4809
4810 case PCMK_EXEC_ERROR:
4811 case PCMK_EXEC_ERROR_HARD:
4812 case PCMK_EXEC_ERROR_FATAL:
4813 case PCMK_EXEC_TIMEOUT:
4814 case PCMK_EXEC_NOT_SUPPORTED:
4815 case PCMK_EXEC_INVALID:
4816 break;
4817
4818 default:
4819 break;
4820 }
4821
4822 unpack_failure_handling(&history, &failure_strategy, &fail_role);
4823 if ((failure_strategy == pcmk_on_fail_ignore)
4824 || ((failure_strategy == pcmk_on_fail_restart_container)
4825 && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
4826
4827 char *last_change_s = last_change_str(xml_op);
4828
4829 crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
4830 CRM_XS " %s",
4831 history.task, services_ocf_exitcode_str(history.exit_status),
4832 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4833 pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node),
4834 last_change_s, history.id);
4835 free(last_change_s);
4836
4837 update_resource_state(&history, history.expected_exit_status,
4838 *last_failure, on_fail);
4839 crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
4840 pe__set_resource_flags(rsc, pcmk_rsc_ignore_failure);
4841
4842 record_failed_op(&history);
4843
4844 if ((failure_strategy == pcmk_on_fail_restart_container)
4845 && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) {
4846 *on_fail = failure_strategy;
4847 }
4848
4849 } else {
4850 unpack_rsc_op_failure(&history, failure_strategy, fail_role,
4851 last_failure, on_fail);
4852
4853 if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
4854 uint8_t log_level = LOG_ERR;
4855
4856 if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
4857 log_level = LOG_NOTICE;
4858 }
4859 do_crm_log(log_level,
4860 "Preventing %s from restarting on %s because "
4861 "of hard failure (%s%s%s) " CRM_XS " %s",
4862 parent->id, pe__node_name(node),
4863 services_ocf_exitcode_str(history.exit_status),
4864 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4865 pcmk__s(history.exit_reason, ""), history.id);
4866 resource_location(parent, node, -INFINITY, "hard-error",
4867 rsc->cluster);
4868
4869 } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
4870 crm_err("Preventing %s from restarting anywhere because "
4871 "of fatal failure (%s%s%s) " CRM_XS " %s",
4872 parent->id, services_ocf_exitcode_str(history.exit_status),
4873 (pcmk__str_empty(history.exit_reason)? "" : ": "),
4874 pcmk__s(history.exit_reason, ""), history.id);
4875 resource_location(parent, NULL, -INFINITY, "fatal-error",
4876 rsc->cluster);
4877 }
4878 }
4879
4880 done:
4881 pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
4882 rsc->id, pe__node_name(node), history.id,
4883 role2text(rsc->role), role2text(rsc->next_role));
4884 }
4885
4886 static void
4887 add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite,
4888 pcmk_scheduler_t *scheduler)
4889 {
4890 const char *cluster_name = NULL;
4891
4892 pe_rule_eval_data_t rule_data = {
4893 .node_hash = NULL,
4894 .role = pcmk_role_unknown,
4895 .now = scheduler->now,
4896 .match_data = NULL,
4897 .rsc_data = NULL,
4898 .op_data = NULL
4899 };
4900
4901 g_hash_table_insert(node->details->attrs,
4902 strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
4903
4904 g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
4905 strdup(node->details->id));
4906 if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) {
4907 scheduler->dc_node = node;
4908 node->details->is_dc = TRUE;
4909 g_hash_table_insert(node->details->attrs,
4910 strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
4911 } else {
4912 g_hash_table_insert(node->details->attrs,
4913 strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
4914 }
4915
4916 cluster_name = g_hash_table_lookup(scheduler->config_hash, "cluster-name");
4917 if (cluster_name) {
4918 g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
4919 strdup(cluster_name));
4920 }
4921
4922 pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
4923 node->details->attrs, NULL, overwrite,
4924 scheduler);
4925
4926 pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data,
4927 node->details->utilization, NULL,
4928 FALSE, scheduler);
4929
4930 if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
4931 const char *site_name = pe_node_attribute_raw(node, "site-name");
4932
4933 if (site_name) {
4934 g_hash_table_insert(node->details->attrs,
4935 strdup(CRM_ATTR_SITE_NAME),
4936 strdup(site_name));
4937
4938 } else if (cluster_name) {
4939
4940 g_hash_table_insert(node->details->attrs,
4941 strdup(CRM_ATTR_SITE_NAME),
4942 strdup(cluster_name));
4943 }
4944 }
4945 }
4946
4947 static GList *
4948 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
4949 {
4950 int counter = -1;
4951 int stop_index = -1;
4952 int start_index = -1;
4953
4954 xmlNode *rsc_op = NULL;
4955
4956 GList *gIter = NULL;
4957 GList *op_list = NULL;
4958 GList *sorted_op_list = NULL;
4959
4960
4961 op_list = NULL;
4962 sorted_op_list = NULL;
4963
4964 for (rsc_op = pcmk__xe_first_child(rsc_entry);
4965 rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
4966
4967 if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
4968 pcmk__str_none)) {
4969 crm_xml_add(rsc_op, "resource", rsc);
4970 crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
4971 op_list = g_list_prepend(op_list, rsc_op);
4972 }
4973 }
4974
4975 if (op_list == NULL) {
4976
4977 return NULL;
4978 }
4979
4980 sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
4981
4982
4983 if (active_filter == FALSE) {
4984 return sorted_op_list;
4985 }
4986
4987 op_list = NULL;
4988
4989 calculate_active_ops(sorted_op_list, &start_index, &stop_index);
4990
4991 for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
4992 xmlNode *rsc_op = (xmlNode *) gIter->data;
4993
4994 counter++;
4995
4996 if (start_index < stop_index) {
4997 crm_trace("Skipping %s: not active", ID(rsc_entry));
4998 break;
4999
5000 } else if (counter < start_index) {
5001 crm_trace("Skipping %s: old", ID(rsc_op));
5002 continue;
5003 }
5004 op_list = g_list_append(op_list, rsc_op);
5005 }
5006
5007 g_list_free(sorted_op_list);
5008 return op_list;
5009 }
5010
5011 GList *
5012 find_operations(const char *rsc, const char *node, gboolean active_filter,
5013 pcmk_scheduler_t *scheduler)
5014 {
5015 GList *output = NULL;
5016 GList *intermediate = NULL;
5017
5018 xmlNode *tmp = NULL;
5019 xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE);
5020
5021 pcmk_node_t *this_node = NULL;
5022
5023 xmlNode *node_state = NULL;
5024
5025 for (node_state = pcmk__xe_first_child(status); node_state != NULL;
5026 node_state = pcmk__xe_next(node_state)) {
5027
5028 if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
5029 const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
5030
5031 if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
5032 continue;
5033 }
5034
5035 this_node = pe_find_node(scheduler->nodes, uname);
5036 if(this_node == NULL) {
5037 CRM_LOG_ASSERT(this_node != NULL);
5038 continue;
5039
5040 } else if (pe__is_guest_or_remote_node(this_node)) {
5041 determine_remote_online_status(scheduler, this_node);
5042
5043 } else {
5044 determine_online_status(node_state, this_node, scheduler);
5045 }
5046
5047 if (this_node->details->online
5048 || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) {
5049
5050
5051
5052
5053 xmlNode *lrm_rsc = NULL;
5054
5055 tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
5056 tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
5057
5058 for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
5059 lrm_rsc = pcmk__xe_next(lrm_rsc)) {
5060
5061 if (pcmk__str_eq((const char *)lrm_rsc->name,
5062 XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
5063
5064 const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
5065
5066 if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
5067 continue;
5068 }
5069
5070 intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
5071 output = g_list_concat(output, intermediate);
5072 }
5073 }
5074 }
5075 }
5076 }
5077
5078 return output;
5079 }