This source file includes following definitions.
- st_ipc_accept
- st_ipc_dispatch
- st_ipc_closed
- st_ipc_destroy
- stonith_peer_callback
- stonith_peer_ais_callback
- stonith_peer_cs_destroy
- do_local_reply
- get_stonith_flag
- stonith_notify_client
- do_stonith_async_timeout_update
- fenced_send_notification
- send_config_notification
- fenced_send_device_notification
- fenced_send_level_notification
- node_does_watchdog_fencing
- stonith_shutdown
- stonith_cleanup
- stand_alone_cpg_cb
- st_peer_update_callback
- fencer_metadata
- build_arg_context
- main
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <sys/utsname.h>
18
19 #include <stdlib.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23
24 #include <crm/crm.h>
25 #include <crm/msg_xml.h>
26 #include <crm/common/cmdline_internal.h>
27 #include <crm/common/ipc.h>
28 #include <crm/common/ipc_internal.h>
29 #include <crm/common/output_internal.h>
30
31 #include <crm/stonith-ng.h>
32 #include <crm/fencing/internal.h>
33 #include <crm/common/xml.h>
34 #include <crm/common/xml_internal.h>
35
36 #include <crm/common/mainloop.h>
37
38 #include <crm/cib/internal.h>
39
40 #include <pacemaker-fenced.h>
41
42 #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
43
44 char *stonith_our_uname = NULL;
45 long stonith_watchdog_timeout_ms = 0;
46 GList *stonith_watchdog_targets = NULL;
47
48 static GMainLoop *mainloop = NULL;
49
50 gboolean stand_alone = FALSE;
51 gboolean stonith_shutdown_flag = FALSE;
52
53 static qb_ipcs_service_t *ipcs = NULL;
54 static pcmk__output_t *out = NULL;
55
56 pcmk__supported_format_t formats[] = {
57 PCMK__SUPPORTED_FORMAT_NONE,
58 PCMK__SUPPORTED_FORMAT_TEXT,
59 PCMK__SUPPORTED_FORMAT_XML,
60 { NULL, NULL, NULL }
61 };
62
63 static struct {
64 bool no_cib_connect;
65 gchar **log_files;
66 } options;
67
68 crm_exit_t exit_code = CRM_EX_OK;
69
70 static void stonith_cleanup(void);
71
72 static int32_t
73 st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
74 {
75 if (stonith_shutdown_flag) {
76 crm_info("Ignoring new client [%d] during shutdown",
77 pcmk__client_pid(c));
78 return -EPERM;
79 }
80
81 if (pcmk__new_client(c, uid, gid) == NULL) {
82 return -EIO;
83 }
84 return 0;
85 }
86
87
88 static int32_t
89 st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
90 {
91 uint32_t id = 0;
92 uint32_t flags = 0;
93 int call_options = 0;
94 xmlNode *request = NULL;
95 pcmk__client_t *c = pcmk__find_client(qbc);
96 const char *op = NULL;
97
98 if (c == NULL) {
99 crm_info("Invalid client: %p", qbc);
100 return 0;
101 }
102
103 request = pcmk__client_data2xml(c, data, &id, &flags);
104 if (request == NULL) {
105 pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL);
106 return 0;
107 }
108
109
110 op = crm_element_value(request, F_CRM_TASK);
111 if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
112 crm_xml_add(request, F_TYPE, T_STONITH_NG);
113 crm_xml_add(request, F_STONITH_OPERATION, op);
114 crm_xml_add(request, F_STONITH_CLIENTID, c->id);
115 crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
116 crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
117
118 send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
119 free_xml(request);
120 return 0;
121 }
122
123 if (c->name == NULL) {
124 const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
125
126 if (value == NULL) {
127 value = "unknown";
128 }
129 c->name = crm_strdup_printf("%s.%u", value, c->pid);
130 }
131
132 crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
133 crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
134 " from client %s", flags, call_options, id, pcmk__client_name(c));
135
136 if (pcmk_is_set(call_options, st_opt_sync_call)) {
137 CRM_ASSERT(flags & crm_ipc_client_response);
138 CRM_LOG_ASSERT(c->request_id == 0);
139 c->request_id = id;
140 }
141
142 crm_xml_add(request, F_STONITH_CLIENTID, c->id);
143 crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c));
144 crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
145
146 crm_log_xml_trace(request, "ipc-received");
147 stonith_command(c, id, flags, request, NULL);
148
149 free_xml(request);
150 return 0;
151 }
152
153
154 static int32_t
155 st_ipc_closed(qb_ipcs_connection_t * c)
156 {
157 pcmk__client_t *client = pcmk__find_client(c);
158
159 if (client == NULL) {
160 return 0;
161 }
162
163 crm_trace("Connection %p closed", c);
164 pcmk__free_client(client);
165
166
167 return 0;
168 }
169
170 static void
171 st_ipc_destroy(qb_ipcs_connection_t * c)
172 {
173 crm_trace("Connection %p destroyed", c);
174 st_ipc_closed(c);
175 }
176
177 static void
178 stonith_peer_callback(xmlNode * msg, void *private_data)
179 {
180 const char *remote_peer = crm_element_value(msg, F_ORIG);
181 const char *op = crm_element_value(msg, F_STONITH_OPERATION);
182
183 if (pcmk__str_eq(op, "poke", pcmk__str_none)) {
184 return;
185 }
186
187 crm_log_xml_trace(msg, "Peer[inbound]");
188 stonith_command(NULL, 0, 0, msg, remote_peer);
189 }
190
191 #if SUPPORT_COROSYNC
192 static void
193 stonith_peer_ais_callback(cpg_handle_t handle,
194 const struct cpg_name *groupName,
195 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
196 {
197 uint32_t kind = 0;
198 xmlNode *xml = NULL;
199 const char *from = NULL;
200 char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
201
202 if(data == NULL) {
203 return;
204 }
205 if (kind == crm_class_cluster) {
206 xml = string2xml(data);
207 if (xml == NULL) {
208 crm_err("Invalid XML: '%.120s'", data);
209 free(data);
210 return;
211 }
212 crm_xml_add(xml, F_ORIG, from);
213
214 stonith_peer_callback(xml, NULL);
215 }
216
217 free_xml(xml);
218 free(data);
219 return;
220 }
221
222 static void
223 stonith_peer_cs_destroy(gpointer user_data)
224 {
225 crm_crit("Lost connection to cluster layer, shutting down");
226 stonith_shutdown(0);
227 }
228 #endif
229
230 void
231 do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
232 int call_options)
233 {
234
235 int local_rc = pcmk_rc_ok;
236 int rid = 0;
237 uint32_t ipc_flags = crm_ipc_server_event;
238
239 if (pcmk_is_set(call_options, st_opt_sync_call)) {
240 CRM_LOG_ASSERT(client->request_id);
241 rid = client->request_id;
242 client->request_id = 0;
243 ipc_flags = crm_ipc_flags_none;
244 }
245
246 local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
247 if (local_rc == pcmk_rc_ok) {
248 crm_trace("Sent response %d to client %s",
249 rid, pcmk__client_name(client));
250 } else {
251 crm_warn("%synchronous reply to client %s failed: %s",
252 (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
253 pcmk__client_name(client), pcmk_rc_str(local_rc));
254 }
255 }
256
257 uint64_t
258 get_stonith_flag(const char *name)
259 {
260 if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
261 return st_callback_notify_fence;
262
263 } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) {
264 return st_callback_device_add;
265
266 } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) {
267 return st_callback_device_del;
268
269 } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) {
270 return st_callback_notify_history;
271
272 } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) {
273 return st_callback_notify_history_synced;
274
275 }
276 return st_callback_unknown;
277 }
278
279 static void
280 stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
281 {
282
283 const xmlNode *update_msg = user_data;
284 pcmk__client_t *client = value;
285 const char *type = NULL;
286
287 CRM_CHECK(client != NULL, return);
288 CRM_CHECK(update_msg != NULL, return);
289
290 type = crm_element_value(update_msg, F_SUBTYPE);
291 CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
292
293 if (client->ipcs == NULL) {
294 crm_trace("Skipping client with NULL channel");
295 return;
296 }
297
298 if (pcmk_is_set(client->flags, get_stonith_flag(type))) {
299 int rc = pcmk__ipc_send_xml(client, 0, update_msg,
300 crm_ipc_server_event);
301
302 if (rc != pcmk_rc_ok) {
303 crm_warn("%s notification of client %s failed: %s "
304 CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
305 pcmk_rc_str(rc), client->id, rc);
306 } else {
307 crm_trace("Sent %s notification to client %s",
308 type, pcmk__client_name(client));
309 }
310 }
311 }
312
313 void
314 do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
315 {
316 pcmk__client_t *client = NULL;
317 xmlNode *notify_data = NULL;
318
319 if (!timeout || !call_id || !client_id) {
320 return;
321 }
322
323 client = pcmk__find_client_by_id(client_id);
324 if (!client) {
325 return;
326 }
327
328 notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
329 crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
330 crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
331 crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
332
333 crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
334
335 if (client) {
336 pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
337 }
338
339 free_xml(notify_data);
340 }
341
342
343
344
345
346
347
348
349
350 void
351 fenced_send_notification(const char *type, const pcmk__action_result_t *result,
352 xmlNode *data)
353 {
354
355 xmlNode *update_msg = create_xml_node(NULL, "notify");
356
357 CRM_LOG_ASSERT(type != NULL);
358
359 crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
360 crm_xml_add(update_msg, F_SUBTYPE, type);
361 crm_xml_add(update_msg, F_STONITH_OPERATION, type);
362 stonith__xe_set_result(update_msg, result);
363
364 if (data != NULL) {
365 add_message_xml(update_msg, F_STONITH_CALLDATA, data);
366 }
367
368 crm_trace("Notifying clients");
369 pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
370 free_xml(update_msg);
371 crm_trace("Notify complete");
372 }
373
374
375
376
377
378
379
380
381
382
383
384
385 static void
386 send_config_notification(const char *op, const pcmk__action_result_t *result,
387 const char *desc, int active)
388 {
389 xmlNode *notify_data = create_xml_node(NULL, op);
390
391 CRM_CHECK(notify_data != NULL, return);
392
393 crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
394 crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active);
395
396 fenced_send_notification(op, result, notify_data);
397 free_xml(notify_data);
398 }
399
400
401
402
403
404
405
406
407
408
409 void
410 fenced_send_device_notification(const char *op,
411 const pcmk__action_result_t *result,
412 const char *desc)
413 {
414 send_config_notification(op, result, desc, g_hash_table_size(device_list));
415 }
416
417
418
419
420
421
422
423
424
425
426 void
427 fenced_send_level_notification(const char *op,
428 const pcmk__action_result_t *result,
429 const char *desc)
430 {
431 send_config_notification(op, result, desc, g_hash_table_size(topology));
432 }
433
434
435
436
437
438
439
440
441
442
443
444 gboolean
445 node_does_watchdog_fencing(const char *node)
446 {
447 return ((stonith_watchdog_targets == NULL) ||
448 pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
449 }
450
451 void
452 stonith_shutdown(int nsig)
453 {
454 crm_info("Terminating with %d clients", pcmk__ipc_client_count());
455 stonith_shutdown_flag = TRUE;
456 if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
457 g_main_loop_quit(mainloop);
458 }
459 }
460
461 static void
462 stonith_cleanup(void)
463 {
464 fenced_cib_cleanup();
465 if (ipcs) {
466 qb_ipcs_destroy(ipcs);
467 }
468
469 crm_peer_destroy();
470 pcmk__client_cleanup();
471 free_stonith_remote_op_list();
472 free_topology_list();
473 free_device_list();
474 free_metadata_cache();
475 fenced_unregister_handlers();
476
477 free(stonith_our_uname);
478 stonith_our_uname = NULL;
479 }
480
481 static gboolean
482 stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data,
483 GError **error)
484 {
485 stand_alone = FALSE;
486 options.no_cib_connect = true;
487 return TRUE;
488 }
489
490 struct qb_ipcs_service_handlers ipc_callbacks = {
491 .connection_accept = st_ipc_accept,
492 .connection_created = NULL,
493 .msg_process = st_ipc_dispatch,
494 .connection_closed = st_ipc_closed,
495 .connection_destroyed = st_ipc_destroy
496 };
497
498
499
500
501
502
503
504
505
506 static void
507 st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
508 {
509 if ((type != crm_status_processes)
510 && !pcmk_is_set(node->flags, crm_remote_node)) {
511
512
513
514
515 xmlNode *query = create_xml_node(NULL, "stonith_command");
516
517 crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
518 crm_xml_add(query, F_TYPE, T_STONITH_NG);
519 crm_xml_add(query, F_STONITH_OPERATION, "poke");
520
521 crm_debug("Broadcasting our uname because of node %u", node->id);
522 send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
523
524 free_xml(query);
525 }
526 }
527
528 static pcmk__cluster_option_t fencer_options[] = {
529
530
531
532
533
534 {
535 PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL,
536 N_("Advanced use only: An alternate parameter to supply instead of 'port'"),
537 N_("some devices do not support the "
538 "standard 'port' parameter or may provide additional ones. Use "
539 "this to specify an alternate, device-specific, parameter "
540 "that should indicate the machine to be fenced. A value of "
541 "none can be used to tell the cluster not to supply any "
542 "additional parameters.")
543 },
544 {
545 PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL,
546 N_("A mapping of host names to ports numbers for devices that do not support host names."),
547 N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2")
548 },
549 {
550 PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL,
551 N_("Eg. node1,node2,node3"),
552 N_("A list of machines controlled by "
553 "this device (Optional unless pcmk_host_list=static-list)")
554 },
555 {
556 PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL,
557 N_("How to determine which machines are controlled by the device."),
558 N_("Allowed values: dynamic-list "
559 "(query the device via the 'list' command), static-list "
560 "(check the pcmk_host_list attribute), status "
561 "(query the device via the 'status' command), "
562 "none (assume every device can fence every "
563 "machine)")
564 },
565 {
566 PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL,
567 N_("Enable a base delay for fencing actions and specify base delay value."),
568 N_("Enable a delay of no more than the "
569 "time specified before executing fencing actions. Pacemaker "
570 "derives the overall delay by taking the value of "
571 "pcmk_delay_base and adding a random delay value such "
572 "that the sum is kept below this maximum.")
573 },
574 {
575 PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL,
576 N_("Enable a base delay for "
577 "fencing actions and specify base delay value."),
578 N_("This enables a static delay for "
579 "fencing actions, which can help avoid \"death matches\" where "
580 "two nodes try to fence each other at the same time. If "
581 "pcmk_delay_max is also used, a random delay will be "
582 "added such that the total delay is kept below that value."
583 "This can be set to a single time value to apply to any node "
584 "targeted by this device (useful if a separate device is "
585 "configured for each target), or to a node map (for example, "
586 "\"node1:1s;node2:5\") to set a different value per target.")
587 },
588 {
589 PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL,
590 N_("The maximum number of actions can be performed in parallel on this device"),
591 N_("Cluster property concurrent-fencing=true needs to be configured first."
592 "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.")
593 },
594 {
595 "pcmk_reboot_action", NULL, "string", NULL,
596 PCMK_ACTION_REBOOT, NULL,
597 N_("Advanced use only: An alternate command to run instead of 'reboot'"),
598 N_("Some devices do not support the standard commands or may provide additional ones.\n"
599 "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.")
600 },
601 {
602 "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL,
603 N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"),
604 N_("Some devices need much more/less time to complete than normal."
605 "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.")
606 },
607 {
608 "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL,
609 N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"),
610 N_("Some devices do not support multiple connections."
611 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
612 " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.")
613 },
614 {
615 "pcmk_off_action", NULL, "string", NULL,
616 PCMK_ACTION_OFF, NULL,
617 N_("Advanced use only: An alternate command to run instead of \'off\'"),
618 N_("Some devices do not support the standard commands or may provide additional ones."
619 "Use this to specify an alternate, device-specific, command that implements the \'off\' action.")
620 },
621 {
622 "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL,
623 N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"),
624 N_("Some devices need much more/less time to complete than normal."
625 "Use this to specify an alternate, device-specific, timeout for \'off\' actions.")
626 },
627 {
628 "pcmk_off_retries",NULL, "integer", NULL, "2", NULL,
629 N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"),
630 N_("Some devices do not support multiple connections."
631 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
632 " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.")
633 },
634 {
635 "pcmk_on_action", NULL, "string", NULL,
636 PCMK_ACTION_ON, NULL,
637 N_("Advanced use only: An alternate command to run instead of 'on'"),
638 N_("Some devices do not support the standard commands or may provide additional ones."
639 "Use this to specify an alternate, device-specific, command that implements the \'on\' action.")
640 },
641 {
642 "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL,
643 N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"),
644 N_("Some devices need much more/less time to complete than normal."
645 "Use this to specify an alternate, device-specific, timeout for \'on\' actions.")
646 },
647 {
648 "pcmk_on_retries",NULL, "integer", NULL, "2", NULL,
649 N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"),
650 N_("Some devices do not support multiple connections."
651 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
652 " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.")
653 },
654 {
655 "pcmk_list_action",NULL, "string", NULL,
656 PCMK_ACTION_LIST, NULL,
657 N_("Advanced use only: An alternate command to run instead of \'list\'"),
658 N_("Some devices do not support the standard commands or may provide additional ones."
659 "Use this to specify an alternate, device-specific, command that implements the \'list\' action.")
660 },
661 {
662 "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL,
663 N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"),
664 N_("Some devices need much more/less time to complete than normal."
665 "Use this to specify an alternate, device-specific, timeout for \'list\' actions.")
666 },
667 {
668 "pcmk_list_retries",NULL, "integer", NULL, "2", NULL,
669 N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"),
670 N_("Some devices do not support multiple connections."
671 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
672 " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.")
673 },
674 {
675 "pcmk_monitor_action", NULL, "string", NULL,
676 PCMK_ACTION_MONITOR, NULL,
677 N_("Advanced use only: An alternate command to run instead of \'monitor\'"),
678 N_("Some devices do not support the standard commands or may provide additional ones."
679 "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.")
680 },
681 {
682 "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL,
683 N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"),
684 N_("Some devices need much more/less time to complete than normal.\n"
685 "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.")
686 },
687 {
688 "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL,
689 N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"),
690 N_("Some devices do not support multiple connections."
691 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
692 " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.")
693 },
694 {
695 "pcmk_status_action", NULL, "string", NULL,
696 PCMK_ACTION_STATUS, NULL,
697 N_("Advanced use only: An alternate command to run instead of \'status\'"),
698 N_("Some devices do not support the standard commands or may provide additional ones."
699 "Use this to specify an alternate, device-specific, command that implements the \'status\' action.")
700 },
701 {
702 "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL,
703 N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"),
704 N_("Some devices need much more/less time to complete than normal."
705 "Use this to specify an alternate, device-specific, timeout for \'status\' actions.")
706 },
707 {
708 "pcmk_status_retries",NULL, "integer", NULL, "2", NULL,
709 N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"),
710 N_("Some devices do not support multiple connections."
711 " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
712 " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.")
713 },
714 };
715
716 void
717 fencer_metadata(void)
718 {
719 const char *desc_short = N_("Instance attributes available for all "
720 "\"stonith\"-class resources");
721 const char *desc_long = N_("Instance attributes available for all \"stonith\"-"
722 "class resources and used by Pacemaker's fence "
723 "daemon, formerly known as stonithd");
724
725 gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short,
726 desc_long, fencer_options,
727 PCMK__NELEM(fencer_options));
728 printf("%s", s);
729 g_free(s);
730 }
731
732 static GOptionEntry entries[] = {
733 { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone,
734 N_("Deprecated (will be removed in a future release)"), NULL },
735
736 { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK,
737 stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL },
738
739 { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
740 &options.log_files, N_("Send logs to the additional named logfile"), NULL },
741
742 { NULL }
743 };
744
745 static GOptionContext *
746 build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
747 {
748 GOptionContext *context = NULL;
749
750 context = pcmk__build_arg_context(args, "text (default), xml", group,
751 "[metadata]");
752 pcmk__add_main_args(context, entries);
753 return context;
754 }
755
756 int
757 main(int argc, char **argv)
758 {
759 int rc = pcmk_rc_ok;
760 crm_cluster_t *cluster = NULL;
761 crm_ipc_t *old_instance = NULL;
762
763 GError *error = NULL;
764
765 GOptionGroup *output_group = NULL;
766 pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
767 gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
768 GOptionContext *context = build_arg_context(args, &output_group);
769
770 crm_log_preinit(NULL, argc, argv);
771
772 pcmk__register_formats(output_group, formats);
773 if (!g_option_context_parse_strv(context, &processed_args, &error)) {
774 exit_code = CRM_EX_USAGE;
775 goto done;
776 }
777
778 rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
779 if (rc != pcmk_rc_ok) {
780 exit_code = CRM_EX_ERROR;
781 g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
782 "Error creating output format %s: %s",
783 args->output_ty, pcmk_rc_str(rc));
784 goto done;
785 }
786
787 if (args->version) {
788 out->version(out, false);
789 goto done;
790 }
791
792 if ((g_strv_length(processed_args) >= 2)
793 && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
794 fencer_metadata();
795 goto done;
796 }
797
798
799 pcmk__add_logfiles(options.log_files, out);
800
801 crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
802 (args->verbosity > 0), argc, argv, FALSE);
803
804 crm_notice("Starting Pacemaker fencer");
805
806 old_instance = crm_ipc_new("stonith-ng", 0);
807 if (old_instance == NULL) {
808
809
810
811 exit_code = CRM_EX_FATAL;
812 goto done;
813 }
814
815 if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
816
817 crm_ipc_close(old_instance);
818 crm_ipc_destroy(old_instance);
819 crm_err("pacemaker-fenced is already active, aborting startup");
820 goto done;
821 } else {
822
823 crm_ipc_destroy(old_instance);
824 old_instance = NULL;
825 }
826
827 mainloop_add_signal(SIGTERM, stonith_shutdown);
828
829 crm_peer_init();
830
831 rc = fenced_scheduler_init();
832 if (rc != pcmk_rc_ok) {
833 exit_code = CRM_EX_FATAL;
834 g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
835 "Error initializing scheduler data: %s", pcmk_rc_str(rc));
836 goto done;
837 }
838
839 cluster = pcmk_cluster_new();
840
841 if (!stand_alone) {
842 #if SUPPORT_COROSYNC
843 if (is_corosync_cluster()) {
844 cluster->destroy = stonith_peer_cs_destroy;
845 cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback;
846 cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
847 }
848 #endif
849
850 crm_set_status_callback(&st_peer_update_callback);
851
852 if (crm_cluster_connect(cluster) == FALSE) {
853 exit_code = CRM_EX_FATAL;
854 crm_crit("Cannot sign in to the cluster... terminating");
855 goto done;
856 }
857 pcmk__str_update(&stonith_our_uname, cluster->uname);
858
859 if (!options.no_cib_connect) {
860 setup_cib();
861 }
862
863 } else {
864 pcmk__str_update(&stonith_our_uname, "localhost");
865 crm_warn("Stand-alone mode is deprecated and will be removed "
866 "in a future release");
867 }
868
869 init_device_list();
870 init_topology_list();
871
872 pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
873
874
875 mainloop = g_main_loop_new(NULL, FALSE);
876 crm_notice("Pacemaker fencer successfully started and accepting connections");
877 g_main_loop_run(mainloop);
878
879 done:
880 g_strfreev(processed_args);
881 pcmk__free_arg_context(context);
882
883 g_strfreev(options.log_files);
884
885 stonith_cleanup();
886 pcmk_cluster_free(cluster);
887 fenced_scheduler_cleanup();
888
889 pcmk__output_and_clear_error(&error, out);
890
891 if (out != NULL) {
892 out->finish(out, exit_code, true, NULL);
893 pcmk__output_free(out);
894 }
895
896 pcmk__unregister_formats();
897 crm_exit(exit_code);
898 }