This source file includes following definitions.
- st_ipc_accept
- st_ipc_dispatch
- st_ipc_closed
- st_ipc_destroy
- stonith_peer_callback
- handle_cpg_message
- stonith_peer_cs_destroy
- do_local_reply
- fenced_parse_notify_flag
- stonith_notify_client
- do_stonith_async_timeout_update
- fenced_send_notification
- fenced_send_config_notification
- node_does_watchdog_fencing
- stonith_shutdown
- stonith_cleanup
- st_peer_update_callback
- fencer_metadata
- build_arg_context
- main
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sys/param.h>
13 #include <stdio.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <unistd.h>
17 #include <sys/utsname.h>
18
19 #include <stdlib.h>
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <inttypes.h>
23
24 #include <crm/crm.h>
25 #include <crm/common/cmdline_internal.h>
26 #include <crm/common/ipc.h>
27 #include <crm/common/ipc_internal.h>
28 #include <crm/common/output_internal.h>
29
30 #include <crm/stonith-ng.h>
31 #include <crm/fencing/internal.h>
32 #include <crm/common/xml.h>
33 #include <crm/common/xml_internal.h>
34
35 #include <crm/common/mainloop.h>
36
37 #include <crm/cib/internal.h>
38
39 #include <pacemaker-fenced.h>
40
41 #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster"
42
43
44 long long stonith_watchdog_timeout_ms = 0;
45
46 GList *stonith_watchdog_targets = NULL;
47
48 static GMainLoop *mainloop = NULL;
49
50 gboolean stonith_shutdown_flag = FALSE;
51
52 static qb_ipcs_service_t *ipcs = NULL;
53 static pcmk__output_t *out = NULL;
54
55 pcmk__supported_format_t formats[] = {
56 PCMK__SUPPORTED_FORMAT_NONE,
57 PCMK__SUPPORTED_FORMAT_TEXT,
58 PCMK__SUPPORTED_FORMAT_XML,
59 { NULL, NULL, NULL }
60 };
61
62 static struct {
63 gboolean stand_alone;
64 gchar **log_files;
65 } options;
66
67 crm_exit_t exit_code = CRM_EX_OK;
68
69 static void stonith_cleanup(void);
70
71 static int32_t
72 st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
73 {
74 if (stonith_shutdown_flag) {
75 crm_info("Ignoring new client [%d] during shutdown",
76 pcmk__client_pid(c));
77 return -ECONNREFUSED;
78 }
79
80 if (pcmk__new_client(c, uid, gid) == NULL) {
81 return -ENOMEM;
82 }
83 return 0;
84 }
85
86
87 static int32_t
88 st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
89 {
90 uint32_t id = 0;
91 uint32_t flags = 0;
92 uint32_t call_options = st_opt_none;
93 xmlNode *request = NULL;
94 pcmk__client_t *c = pcmk__find_client(qbc);
95 const char *op = NULL;
96 int rc = pcmk_rc_ok;
97
98 if (c == NULL) {
99 crm_info("Invalid client: %p", qbc);
100 return 0;
101 }
102
103 rc = pcmk__ipc_msg_append(&c->buffer, data);
104
105 if (rc == pcmk_rc_ipc_more) {
106
107 return 0;
108
109 } else if (rc == pcmk_rc_ok) {
110
111
112
113 request = pcmk__client_data2xml(c, &id, &flags);
114 g_byte_array_free(c->buffer, TRUE);
115 c->buffer = NULL;
116
117 } else {
118
119
120
121 crm_err("Error when reading IPC message: %s", pcmk_rc_str(rc));
122
123 if (c->buffer != NULL) {
124 g_byte_array_free(c->buffer, TRUE);
125 c->buffer = NULL;
126 }
127
128 return 0;
129 }
130
131 if (request == NULL) {
132 pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL);
133 return 0;
134 }
135
136 op = crm_element_value(request, PCMK__XA_CRM_TASK);
137 if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) {
138 crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
139 crm_xml_add(request, PCMK__XA_ST_OP, op);
140 crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
141 crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
142 crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node());
143
144 pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, request);
145 pcmk__xml_free(request);
146 return 0;
147 }
148
149 if (c->name == NULL) {
150 const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME);
151
152 c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid);
153 }
154
155 rc = pcmk__xe_get_flags(request, PCMK__XA_ST_CALLOPT, &call_options,
156 st_opt_none);
157 if (rc != pcmk_rc_ok) {
158 crm_warn("Couldn't parse options from IPC request: %s",
159 pcmk_rc_str(rc));
160 }
161
162 crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32
163 " from client %s", flags, call_options, id, pcmk__client_name(c));
164
165 if (pcmk_is_set(call_options, st_opt_sync_call)) {
166 pcmk__assert(pcmk_is_set(flags, crm_ipc_client_response));
167 CRM_LOG_ASSERT(c->request_id == 0);
168 c->request_id = id;
169 }
170
171 crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id);
172 crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c));
173 crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node());
174
175 crm_log_xml_trace(request, "ipc-received");
176 stonith_command(c, id, flags, request, NULL);
177
178 pcmk__xml_free(request);
179 return 0;
180 }
181
182
183 static int32_t
184 st_ipc_closed(qb_ipcs_connection_t * c)
185 {
186 pcmk__client_t *client = pcmk__find_client(c);
187
188 if (client == NULL) {
189 return 0;
190 }
191
192 crm_trace("Connection %p closed", c);
193 pcmk__free_client(client);
194
195
196 return 0;
197 }
198
199 static void
200 st_ipc_destroy(qb_ipcs_connection_t * c)
201 {
202 crm_trace("Connection %p destroyed", c);
203 st_ipc_closed(c);
204 }
205
206 static void
207 stonith_peer_callback(xmlNode * msg, void *private_data)
208 {
209 const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC);
210 const char *op = crm_element_value(msg, PCMK__XA_ST_OP);
211
212 if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) {
213 return;
214 }
215
216 crm_log_xml_trace(msg, "Peer[inbound]");
217 stonith_command(NULL, 0, 0, msg, remote_peer);
218 }
219
220 #if SUPPORT_COROSYNC
221 static void
222 handle_cpg_message(cpg_handle_t handle, const struct cpg_name *groupName,
223 uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
224 {
225 xmlNode *xml = NULL;
226 const char *from = NULL;
227 char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from);
228
229 if(data == NULL) {
230 return;
231 }
232
233 xml = pcmk__xml_parse(data);
234 if (xml == NULL) {
235 crm_err("Invalid XML: '%.120s'", data);
236 free(data);
237 return;
238 }
239 crm_xml_add(xml, PCMK__XA_SRC, from);
240 stonith_peer_callback(xml, NULL);
241
242 pcmk__xml_free(xml);
243 free(data);
244 }
245
246 static void
247 stonith_peer_cs_destroy(gpointer user_data)
248 {
249 crm_crit("Lost connection to cluster layer, shutting down");
250 stonith_shutdown(0);
251 }
252 #endif
253
254 void
255 do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
256 int call_options)
257 {
258
259 int local_rc = pcmk_rc_ok;
260 int rid = 0;
261 uint32_t ipc_flags = crm_ipc_server_event;
262
263 if (pcmk_is_set(call_options, st_opt_sync_call)) {
264 CRM_LOG_ASSERT(client->request_id);
265 rid = client->request_id;
266 client->request_id = 0;
267 ipc_flags = crm_ipc_flags_none;
268 }
269
270 local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags);
271 if (local_rc == pcmk_rc_ok) {
272 crm_trace("Sent response %d to client %s",
273 rid, pcmk__client_name(client));
274 } else {
275 crm_warn("%synchronous reply to client %s failed: %s",
276 (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"),
277 pcmk__client_name(client), pcmk_rc_str(local_rc));
278 }
279 }
280
281
282
283
284
285
286
287
288
289 enum fenced_notify_flags
290 fenced_parse_notify_flag(const char *type)
291 {
292 if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) {
293 return fenced_nf_fence_result;
294 }
295 if (pcmk__str_eq(type, STONITH_OP_DEVICE_ADD, pcmk__str_none)) {
296 return fenced_nf_device_registered;
297 }
298 if (pcmk__str_eq(type, STONITH_OP_DEVICE_DEL, pcmk__str_none)) {
299 return fenced_nf_device_removed;
300 }
301 if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) {
302 return fenced_nf_history_changed;
303 }
304 if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED,
305 pcmk__str_none)) {
306 return fenced_nf_history_synced;
307 }
308 return fenced_nf_none;
309 }
310
311 static void
312 stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
313 {
314
315 const xmlNode *update_msg = user_data;
316 pcmk__client_t *client = value;
317 const char *type = NULL;
318
319 CRM_CHECK(client != NULL, return);
320 CRM_CHECK(update_msg != NULL, return);
321
322 type = crm_element_value(update_msg, PCMK__XA_SUBT);
323 CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
324
325 if (client->ipcs == NULL) {
326 crm_trace("Skipping client with NULL channel");
327 return;
328 }
329
330 if (pcmk_is_set(client->flags, fenced_parse_notify_flag(type))) {
331 int rc = pcmk__ipc_send_xml(client, 0, update_msg,
332 crm_ipc_server_event);
333
334 if (rc != pcmk_rc_ok) {
335 crm_warn("%s notification of client %s failed: %s "
336 QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client),
337 pcmk_rc_str(rc), client->id, rc);
338 } else {
339 crm_trace("Sent %s notification to client %s",
340 type, pcmk__client_name(client));
341 }
342 }
343 }
344
345 void
346 do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
347 {
348 pcmk__client_t *client = NULL;
349 xmlNode *notify_data = NULL;
350
351 if (!timeout || !call_id || !client_id) {
352 return;
353 }
354
355 client = pcmk__find_client_by_id(client_id);
356 if (!client) {
357 return;
358 }
359
360 notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE);
361 crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE);
362 crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id);
363 crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout);
364
365 crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
366
367 if (client) {
368 pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event);
369 }
370
371 pcmk__xml_free(notify_data);
372 }
373
374
375
376
377
378
379
380
381
382 void
383 fenced_send_notification(const char *type, const pcmk__action_result_t *result,
384 xmlNode *data)
385 {
386
387 xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY);
388
389 CRM_LOG_ASSERT(type != NULL);
390
391 crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY);
392 crm_xml_add(update_msg, PCMK__XA_SUBT, type);
393 crm_xml_add(update_msg, PCMK__XA_ST_OP, type);
394 stonith__xe_set_result(update_msg, result);
395
396 if (data != NULL) {
397 xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA);
398
399 pcmk__xml_copy(wrapper, data);
400 }
401
402 crm_trace("Notifying clients");
403 pcmk__foreach_ipc_client(stonith_notify_client, update_msg);
404 pcmk__xml_free(update_msg);
405 crm_trace("Notify complete");
406 }
407
408
409
410
411
412
413
414
415
416
417
418
419 void
420 fenced_send_config_notification(const char *op,
421 const pcmk__action_result_t *result,
422 const char *desc)
423 {
424 xmlNode *notify_data = pcmk__xe_create(NULL, op);
425
426 crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc);
427
428 fenced_send_notification(op, result, notify_data);
429 pcmk__xml_free(notify_data);
430 }
431
432
433
434
435
436
437
438
439
440
441
442 gboolean
443 node_does_watchdog_fencing(const char *node)
444 {
445 return ((stonith_watchdog_targets == NULL) ||
446 pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei));
447 }
448
449 void
450 stonith_shutdown(int nsig)
451 {
452 crm_info("Terminating with %d clients", pcmk__ipc_client_count());
453 stonith_shutdown_flag = TRUE;
454 if (mainloop != NULL && g_main_loop_is_running(mainloop)) {
455 g_main_loop_quit(mainloop);
456 }
457 }
458
459 static void
460 stonith_cleanup(void)
461 {
462 fenced_cib_cleanup();
463 if (ipcs) {
464 qb_ipcs_destroy(ipcs);
465 }
466
467 pcmk__cluster_destroy_node_caches();
468 pcmk__client_cleanup();
469 free_stonith_remote_op_list();
470 free_topology_list();
471 fenced_free_device_table();
472 free_metadata_cache();
473 fenced_unregister_handlers();
474 }
475
476 struct qb_ipcs_service_handlers ipc_callbacks = {
477 .connection_accept = st_ipc_accept,
478 .connection_created = NULL,
479 .msg_process = st_ipc_dispatch,
480 .connection_closed = st_ipc_closed,
481 .connection_destroyed = st_ipc_destroy
482 };
483
484
485
486
487
488
489
490
491
492 static void
493 st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
494 const void *data)
495 {
496 if ((type != pcmk__node_update_processes)
497 && !pcmk_is_set(node->flags, pcmk__node_status_remote)) {
498
499
500
501
502 xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND);
503
504 crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG);
505 crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE);
506
507 crm_debug("Broadcasting our uname because of node %" PRIu32,
508 node->cluster_layer_id);
509 pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, query);
510
511 pcmk__xml_free(query);
512 }
513 }
514
515
516
517
518
519
520 static int
521 fencer_metadata(void)
522 {
523 const char *name = PCMK__SERVER_FENCED;
524 const char *desc_short = N_("Instance attributes available for all "
525 "\"stonith\"-class resources");
526 const char *desc_long = N_("Instance attributes available for all "
527 "\"stonith\"-class resources and used by "
528 "Pacemaker's fence daemon");
529
530 return pcmk__daemon_metadata(out, name, desc_short, desc_long,
531 pcmk__opt_fencing);
532 }
533
534 static GOptionEntry entries[] = {
535 { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE,
536 &options.stand_alone, N_("Intended for use in regression testing only"),
537 NULL },
538
539 { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY,
540 &options.log_files, N_("Send logs to the additional named logfile"), NULL },
541
542 { NULL }
543 };
544
545 static GOptionContext *
546 build_arg_context(pcmk__common_args_t *args, GOptionGroup **group)
547 {
548 GOptionContext *context = NULL;
549
550 context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
551 pcmk__add_main_args(context, entries);
552 return context;
553 }
554
555 int
556 main(int argc, char **argv)
557 {
558 int rc = pcmk_rc_ok;
559 pcmk_cluster_t *cluster = NULL;
560 crm_ipc_t *old_instance = NULL;
561
562 GError *error = NULL;
563
564 GOptionGroup *output_group = NULL;
565 pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
566 gchar **processed_args = pcmk__cmdline_preproc(argv, "l");
567 GOptionContext *context = build_arg_context(args, &output_group);
568
569 crm_log_preinit(NULL, argc, argv);
570
571 pcmk__register_formats(output_group, formats);
572 if (!g_option_context_parse_strv(context, &processed_args, &error)) {
573 exit_code = CRM_EX_USAGE;
574 goto done;
575 }
576
577 rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
578 if (rc != pcmk_rc_ok) {
579 exit_code = CRM_EX_ERROR;
580 g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
581 "Error creating output format %s: %s",
582 args->output_ty, pcmk_rc_str(rc));
583 goto done;
584 }
585
586 if (args->version) {
587 out->version(out, false);
588 goto done;
589 }
590
591 if ((g_strv_length(processed_args) >= 2)
592 && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) {
593
594 rc = fencer_metadata();
595 if (rc != pcmk_rc_ok) {
596 exit_code = CRM_EX_FATAL;
597 g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
598 "Unable to display metadata: %s", pcmk_rc_str(rc));
599 }
600 goto done;
601 }
602
603
604 pcmk__add_logfiles(options.log_files, out);
605
606 crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE,
607 (args->verbosity > 0), argc, argv, FALSE);
608
609 crm_notice("Starting Pacemaker fencer");
610
611 old_instance = crm_ipc_new("stonith-ng", 0);
612 if (old_instance == NULL) {
613
614
615
616 exit_code = CRM_EX_FATAL;
617 goto done;
618 }
619
620 if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) {
621
622 crm_ipc_close(old_instance);
623 crm_ipc_destroy(old_instance);
624 crm_crit("Aborting start-up because another fencer instance is "
625 "already active");
626 goto done;
627 } else {
628
629 crm_ipc_destroy(old_instance);
630 old_instance = NULL;
631 }
632
633 mainloop_add_signal(SIGTERM, stonith_shutdown);
634
635 pcmk__cluster_init_node_caches();
636
637 rc = fenced_scheduler_init();
638 if (rc != pcmk_rc_ok) {
639 exit_code = CRM_EX_FATAL;
640 g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
641 "Error initializing scheduler data: %s", pcmk_rc_str(rc));
642 goto done;
643 }
644
645 cluster = pcmk_cluster_new();
646
647 #if SUPPORT_COROSYNC
648 if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
649 pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy);
650 pcmk_cpg_set_deliver_fn(cluster, handle_cpg_message);
651 pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb);
652 }
653 #endif
654
655 pcmk__cluster_set_status_callback(&st_peer_update_callback);
656
657 if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) {
658 exit_code = CRM_EX_FATAL;
659 crm_crit("Cannot sign in to the cluster... terminating");
660 goto done;
661 }
662 fenced_set_local_node(cluster->priv->node_name);
663
664 if (!options.stand_alone) {
665 setup_cib();
666 }
667
668 fenced_init_device_table();
669 init_topology_list();
670
671 pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks);
672
673
674 mainloop = g_main_loop_new(NULL, FALSE);
675 crm_notice("Pacemaker fencer successfully started and accepting connections");
676 g_main_loop_run(mainloop);
677
678 done:
679 g_strfreev(processed_args);
680 pcmk__free_arg_context(context);
681
682 g_strfreev(options.log_files);
683
684 stonith_cleanup();
685 pcmk_cluster_free(cluster);
686 fenced_scheduler_cleanup();
687
688 pcmk__output_and_clear_error(&error, out);
689
690 if (out != NULL) {
691 out->finish(out, exit_code, true, NULL);
692 pcmk__output_free(out);
693 }
694
695 pcmk__unregister_formats();
696 crm_exit(exit_code);
697 }