pacemaker  2.0.4-2deceaa
Scalable High-Availability cluster resource manager
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mainloop.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2020 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #ifndef _GNU_SOURCE
13 # define _GNU_SOURCE
14 #endif
15 
16 #include <stdlib.h>
17 #include <string.h>
18 #include <signal.h>
19 #include <errno.h>
20 
21 #include <sys/wait.h>
22 
23 #include <crm/crm.h>
24 #include <crm/common/xml.h>
25 #include <crm/common/mainloop.h>
27 
28 #include <qb/qbarray.h>
29 
30 struct mainloop_child_s {
31  pid_t pid;
32  char *desc;
33  unsigned timerid;
34  gboolean timeout;
35  void *privatedata;
36 
38 
39  /* Called when a process dies */
40  void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode);
41 };
42 
43 struct trigger_s {
44  GSource source;
45  gboolean running;
46  gboolean trigger;
47  void *user_data;
48  guint id;
49 
50 };
51 
52 static gboolean
53 crm_trigger_prepare(GSource * source, gint * timeout)
54 {
55  crm_trigger_t *trig = (crm_trigger_t *) source;
56 
57  /* cluster-glue's FD and IPC related sources make use of
58  * g_source_add_poll() but do not set a timeout in their prepare
59  * functions
60  *
61  * This means mainloop's poll() will block until an event for one
62  * of these sources occurs - any /other/ type of source, such as
63  * this one or g_idle_*, that doesn't use g_source_add_poll() is
64  * S-O-L and won't be processed until there is something fd-based
65  * happens.
66  *
67  * Luckily the timeout we can set here affects all sources and
68  * puts an upper limit on how long poll() can take.
69  *
70  * So unconditionally set a small-ish timeout, not too small that
71  * we're in constant motion, which will act as an upper bound on
72  * how long the signal handling might be delayed for.
73  */
74  *timeout = 500; /* Timeout in ms */
75 
76  return trig->trigger;
77 }
78 
79 static gboolean
80 crm_trigger_check(GSource * source)
81 {
82  crm_trigger_t *trig = (crm_trigger_t *) source;
83 
84  return trig->trigger;
85 }
86 
87 static gboolean
88 crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
89 {
90  int rc = TRUE;
91  crm_trigger_t *trig = (crm_trigger_t *) source;
92 
93  if (trig->running) {
94  /* Wait until the existing job is complete before starting the next one */
95  return TRUE;
96  }
97  trig->trigger = FALSE;
98 
99  if (callback) {
100  rc = callback(trig->user_data);
101  if (rc < 0) {
102  crm_trace("Trigger handler %p not yet complete", trig);
103  trig->running = TRUE;
104  rc = TRUE;
105  }
106  }
107  return rc;
108 }
109 
110 static void
111 crm_trigger_finalize(GSource * source)
112 {
113  crm_trace("Trigger %p destroyed", source);
114 }
115 
116 static GSourceFuncs crm_trigger_funcs = {
117  crm_trigger_prepare,
118  crm_trigger_check,
119  crm_trigger_dispatch,
120  crm_trigger_finalize,
121 };
122 
123 static crm_trigger_t *
124 mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data),
125  gpointer userdata)
126 {
127  crm_trigger_t *trigger = NULL;
128 
129  trigger = (crm_trigger_t *) source;
130 
131  trigger->id = 0;
132  trigger->trigger = FALSE;
133  trigger->user_data = userdata;
134 
135  if (dispatch) {
136  g_source_set_callback(source, dispatch, trigger, NULL);
137  }
138 
139  g_source_set_priority(source, priority);
140  g_source_set_can_recurse(source, FALSE);
141 
142  trigger->id = g_source_attach(source, NULL);
143  return trigger;
144 }
145 
146 void
148 {
149  crm_trace("Trigger handler %p complete", trig);
150  trig->running = FALSE;
151 }
152 
153 /* If dispatch returns:
154  * -1: Job running but not complete
155  * 0: Remove the trigger from mainloop
156  * 1: Leave the trigger in mainloop
157  */
159 mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata)
160 {
161  GSource *source = NULL;
162 
163  CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource));
164  source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t));
165  CRM_ASSERT(source != NULL);
166 
167  return mainloop_setup_trigger(source, priority, dispatch, userdata);
168 }
169 
170 void
172 {
173  if(source) {
174  source->trigger = TRUE;
175  }
176 }
177 
178 gboolean
180 {
181  GSource *gs = NULL;
182 
183  if(source == NULL) {
184  return TRUE;
185  }
186 
187  gs = (GSource *)source;
188 
189  g_source_destroy(gs); /* Remove from mainloop, ref_count-- */
190  g_source_unref(gs); /* The caller no longer carries a reference to source
191  *
192  * At this point the source should be free'd,
193  * unless we're currently processing said
194  * source, in which case mainloop holds an
195  * additional reference and it will be free'd
196  * once our processing completes
197  */
198  return TRUE;
199 }
200 
201 // Define a custom glib source for signal handling
202 
203 // Data structure for custom glib source
204 typedef struct signal_s {
205  crm_trigger_t trigger; // trigger that invoked source (must be first)
206  void (*handler) (int sig); // signal handler
207  int signal; // signal that was received
208 } crm_signal_t;
209 
210 // Table to associate signal handlers with signal numbers
211 static crm_signal_t *crm_signals[NSIG];
212 
224 static gboolean
225 crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata)
226 {
227  crm_signal_t *sig = (crm_signal_t *) source;
228 
229  if(sig->signal != SIGCHLD) {
230  crm_notice("Caught '%s' signal "CRM_XS" %d (%s handler)",
231  strsignal(sig->signal), sig->signal,
232  (sig->handler? "invoking" : "no"));
233  }
234 
235  sig->trigger.trigger = FALSE;
236  if (sig->handler) {
237  sig->handler(sig->signal);
238  }
239  return TRUE;
240 }
241 
251 static void
252 mainloop_signal_handler(int sig)
253 {
254  if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) {
255  mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]);
256  }
257 }
258 
259 // Functions implementing our custom glib source for signal handling
260 static GSourceFuncs crm_signal_funcs = {
261  crm_trigger_prepare,
262  crm_trigger_check,
263  crm_signal_dispatch,
264  crm_trigger_finalize,
265 };
266 
281 {
282  sigset_t mask;
283  struct sigaction sa;
284  struct sigaction old;
285 
286  if (sigemptyset(&mask) < 0) {
287  crm_err("Could not set handler for signal %d: %s",
288  sig, pcmk_strerror(errno));
289  return SIG_ERR;
290  }
291 
292  memset(&sa, 0, sizeof(struct sigaction));
293  sa.sa_handler = dispatch;
294  sa.sa_flags = SA_RESTART;
295  sa.sa_mask = mask;
296 
297  if (sigaction(sig, &sa, &old) < 0) {
298  crm_err("Could not set handler for signal %d: %s",
299  sig, pcmk_strerror(errno));
300  return SIG_ERR;
301  }
302  return old.sa_handler;
303 }
304 
305 static void
306 mainloop_destroy_signal_entry(int sig)
307 {
308  crm_signal_t *tmp = crm_signals[sig];
309 
310  crm_signals[sig] = NULL;
311 
312  crm_trace("Destroying signal %d", sig);
314 }
315 
327 gboolean
328 mainloop_add_signal(int sig, void (*dispatch) (int sig))
329 {
330  GSource *source = NULL;
331  int priority = G_PRIORITY_HIGH - 1;
332 
333  if (sig == SIGTERM) {
334  /* TERM is higher priority than other signals,
335  * signals are higher priority than other ipc.
336  * Yes, minus: smaller is "higher"
337  */
338  priority--;
339  }
340 
341  if (sig >= NSIG || sig < 0) {
342  crm_err("Signal %d is out of range", sig);
343  return FALSE;
344 
345  } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) {
346  crm_trace("Signal handler for %d is already installed", sig);
347  return TRUE;
348 
349  } else if (crm_signals[sig] != NULL) {
350  crm_err("Different signal handler for %d is already installed", sig);
351  return FALSE;
352  }
353 
354  CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource));
355  source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t));
356 
357  crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL);
358  CRM_ASSERT(crm_signals[sig] != NULL);
359 
360  crm_signals[sig]->handler = dispatch;
361  crm_signals[sig]->signal = sig;
362 
363  if (crm_signal_handler(sig, mainloop_signal_handler) == SIG_ERR) {
364  mainloop_destroy_signal_entry(sig);
365  return FALSE;
366  }
367 #if 0
368  /* If we want signals to interrupt mainloop's poll(), instead of waiting for
369  * the timeout, then we should call siginterrupt() below
370  *
371  * For now, just enforce a low timeout
372  */
373  if (siginterrupt(sig, 1) < 0) {
374  crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig);
375  }
376 #endif
377 
378  return TRUE;
379 }
380 
381 gboolean
383 {
384  if (sig >= NSIG || sig < 0) {
385  crm_err("Signal %d is out of range", sig);
386  return FALSE;
387 
388  } else if (crm_signal_handler(sig, NULL) == SIG_ERR) {
389  crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig);
390  return FALSE;
391 
392  } else if (crm_signals[sig] == NULL) {
393  return TRUE;
394  }
395  mainloop_destroy_signal_entry(sig);
396  return TRUE;
397 }
398 
399 static qb_array_t *gio_map = NULL;
400 
401 void
403 {
404  if (gio_map) {
405  qb_array_free(gio_map);
406  }
407 
408  for (int sig = 0; sig < NSIG; ++sig) {
409  mainloop_destroy_signal_entry(sig);
410  }
411 }
412 
413 /*
414  * libqb...
415  */
416 struct gio_to_qb_poll {
417  int32_t is_used;
418  guint source;
419  int32_t events;
420  void *data;
421  qb_ipcs_dispatch_fn_t fn;
422  enum qb_loop_priority p;
423 };
424 
425 static gboolean
426 gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data)
427 {
428  struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
429  gint fd = g_io_channel_unix_get_fd(gio);
430 
431  crm_trace("%p.%d %d", data, fd, condition);
432 
433  /* if this assert get's hit, then there is a race condition between
434  * when we destroy a fd and when mainloop actually gives it up */
435  CRM_ASSERT(adaptor->is_used > 0);
436 
437  return (adaptor->fn(fd, condition, adaptor->data) == 0);
438 }
439 
440 static void
441 gio_poll_destroy(gpointer data)
442 {
443  struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data;
444 
445  adaptor->is_used--;
446  CRM_ASSERT(adaptor->is_used >= 0);
447 
448  if (adaptor->is_used == 0) {
449  crm_trace("Marking adaptor %p unused", adaptor);
450  adaptor->source = 0;
451  }
452 }
453 
462 static gint
463 conv_prio_libqb2glib(enum qb_loop_priority prio)
464 {
465  gint ret = G_PRIORITY_DEFAULT;
466  switch (prio) {
467  case QB_LOOP_LOW:
468  ret = G_PRIORITY_LOW;
469  break;
470  case QB_LOOP_HIGH:
471  ret = G_PRIORITY_HIGH;
472  break;
473  default:
474  crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
475  prio);
476  /* fall-through */
477  case QB_LOOP_MED:
478  break;
479  }
480  return ret;
481 }
482 
491 static enum qb_ipcs_rate_limit
492 conv_libqb_prio2ratelimit(enum qb_loop_priority prio)
493 {
494  /* this is an inversion of what libqb's qb_ipcs_request_rate_limit does */
495  enum qb_ipcs_rate_limit ret = QB_IPCS_RATE_NORMAL;
496  switch (prio) {
497  case QB_LOOP_LOW:
498  ret = QB_IPCS_RATE_SLOW;
499  break;
500  case QB_LOOP_HIGH:
501  ret = QB_IPCS_RATE_FAST;
502  break;
503  default:
504  crm_trace("Invalid libqb's loop priority %d, assuming QB_LOOP_MED",
505  prio);
506  /* fall-through */
507  case QB_LOOP_MED:
508  break;
509  }
510  return ret;
511 }
512 
513 static int32_t
514 gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts,
515  void *data, qb_ipcs_dispatch_fn_t fn, int32_t add)
516 {
517  struct gio_to_qb_poll *adaptor;
518  GIOChannel *channel;
519  int32_t res = 0;
520 
521  res = qb_array_index(gio_map, fd, (void **)&adaptor);
522  if (res < 0) {
523  crm_err("Array lookup failed for fd=%d: %d", fd, res);
524  return res;
525  }
526 
527  crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor);
528 
529  if (add && adaptor->source) {
530  crm_err("Adaptor for descriptor %d is still in-use", fd);
531  return -EEXIST;
532  }
533  if (!add && !adaptor->is_used) {
534  crm_err("Adaptor for descriptor %d is not in-use", fd);
535  return -ENOENT;
536  }
537 
538  /* channel is created with ref_count = 1 */
539  channel = g_io_channel_unix_new(fd);
540  if (!channel) {
541  crm_err("No memory left to add fd=%d", fd);
542  return -ENOMEM;
543  }
544 
545  if (adaptor->source) {
546  g_source_remove(adaptor->source);
547  adaptor->source = 0;
548  }
549 
550  /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */
551  evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR);
552 
553  adaptor->fn = fn;
554  adaptor->events = evts;
555  adaptor->data = data;
556  adaptor->p = p;
557  adaptor->is_used++;
558  adaptor->source =
559  g_io_add_watch_full(channel, conv_prio_libqb2glib(p), evts,
560  gio_read_socket, adaptor, gio_poll_destroy);
561 
562  /* Now that mainloop now holds a reference to channel,
563  * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
564  *
565  * This means that channel will be free'd by:
566  * g_main_context_dispatch()
567  * -> g_source_destroy_internal()
568  * -> g_source_callback_unref()
569  * shortly after gio_poll_destroy() completes
570  */
571  g_io_channel_unref(channel);
572 
573  crm_trace("Added to mainloop with gsource id=%d", adaptor->source);
574  if (adaptor->source > 0) {
575  return 0;
576  }
577 
578  return -EINVAL;
579 }
580 
581 static int32_t
582 gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts,
583  void *data, qb_ipcs_dispatch_fn_t fn)
584 {
585  return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE);
586 }
587 
588 static int32_t
589 gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts,
590  void *data, qb_ipcs_dispatch_fn_t fn)
591 {
592  return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE);
593 }
594 
595 static int32_t
596 gio_poll_dispatch_del(int32_t fd)
597 {
598  struct gio_to_qb_poll *adaptor;
599 
600  crm_trace("Looking for fd=%d", fd);
601  if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) {
602  if (adaptor->source) {
603  g_source_remove(adaptor->source);
604  adaptor->source = 0;
605  }
606  }
607  return 0;
608 }
609 
610 struct qb_ipcs_poll_handlers gio_poll_funcs = {
611  .job_add = NULL,
612  .dispatch_add = gio_poll_dispatch_add,
613  .dispatch_mod = gio_poll_dispatch_mod,
614  .dispatch_del = gio_poll_dispatch_del,
615 };
616 
617 static enum qb_ipc_type
618 pick_ipc_type(enum qb_ipc_type requested)
619 {
620  const char *env = getenv("PCMK_ipc_type");
621 
622  if (env && strcmp("shared-mem", env) == 0) {
623  return QB_IPC_SHM;
624  } else if (env && strcmp("socket", env) == 0) {
625  return QB_IPC_SOCKET;
626  } else if (env && strcmp("posix", env) == 0) {
627  return QB_IPC_POSIX_MQ;
628  } else if (env && strcmp("sysv", env) == 0) {
629  return QB_IPC_SYSV_MQ;
630  } else if (requested == QB_IPC_NATIVE) {
631  /* We prefer shared memory because the server never blocks on
632  * send. If part of a message fits into the socket, libqb
633  * needs to block until the remainder can be sent also.
634  * Otherwise the client will wait forever for the remaining
635  * bytes.
636  */
637  return QB_IPC_SHM;
638  }
639  return requested;
640 }
641 
642 qb_ipcs_service_t *
643 mainloop_add_ipc_server(const char *name, enum qb_ipc_type type,
644  struct qb_ipcs_service_handlers *callbacks)
645 {
646  return mainloop_add_ipc_server_with_prio(name, type, callbacks, QB_LOOP_MED);
647 }
648 
649 qb_ipcs_service_t *
650 mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type,
651  struct qb_ipcs_service_handlers *callbacks,
652  enum qb_loop_priority prio)
653 {
654  int rc = 0;
655  qb_ipcs_service_t *server = NULL;
656 
657  if (gio_map == NULL) {
658  gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1);
659  }
660 
661  server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks);
662 
663  if (server == NULL) {
664  crm_err("Could not create %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
665  return NULL;
666  }
667 
668  if (prio != QB_LOOP_MED) {
669  qb_ipcs_request_rate_limit(server, conv_libqb_prio2ratelimit(prio));
670  }
671 
672 #ifdef HAVE_IPCS_GET_BUFFER_SIZE
673  /* All clients should use at least ipc_buffer_max as their buffer size */
674  qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size());
675 #endif
676 
677  qb_ipcs_poll_handlers_set(server, &gio_poll_funcs);
678 
679  rc = qb_ipcs_run(server);
680  if (rc < 0) {
681  crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc);
682  return NULL;
683  }
684 
685  return server;
686 }
687 
688 void
689 mainloop_del_ipc_server(qb_ipcs_service_t * server)
690 {
691  if (server) {
692  qb_ipcs_destroy(server);
693  }
694 }
695 
696 struct mainloop_io_s {
697  char *name;
698  void *userdata;
699 
700  int fd;
701  guint source;
702  crm_ipc_t *ipc;
703  GIOChannel *channel;
704 
705  int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata);
706  int (*dispatch_fn_io) (gpointer userdata);
707  void (*destroy_fn) (gpointer userdata);
708 
709 };
710 
711 static gboolean
712 mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data)
713 {
714  gboolean keep = TRUE;
715  mainloop_io_t *client = data;
716 
717  CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio));
718 
719  if (condition & G_IO_IN) {
720  if (client->ipc) {
721  long rc = 0;
722  int max = 10;
723 
724  do {
725  rc = crm_ipc_read(client->ipc);
726  if (rc <= 0) {
727  crm_trace("Message acquisition from %s[%p] failed: %s (%ld)",
728  client->name, client, pcmk_strerror(rc), rc);
729 
730  } else if (client->dispatch_fn_ipc) {
731  const char *buffer = crm_ipc_buffer(client->ipc);
732 
733  crm_trace("New message from %s[%p] = %ld (I/O condition=%d)", client->name, client, rc, condition);
734  if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) {
735  crm_trace("Connection to %s no longer required", client->name);
736  keep = FALSE;
737  }
738  }
739 
740  } while (keep && rc > 0 && --max > 0);
741 
742  } else {
743  crm_trace("New message from %s[%p] %u", client->name, client, condition);
744  if (client->dispatch_fn_io) {
745  if (client->dispatch_fn_io(client->userdata) < 0) {
746  crm_trace("Connection to %s no longer required", client->name);
747  keep = FALSE;
748  }
749  }
750  }
751  }
752 
753  if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) {
754  crm_err("Connection to %s closed " CRM_XS "client=%p condition=%d",
755  client->name, client, condition);
756  keep = FALSE;
757 
758  } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) {
759  crm_trace("The connection %s[%p] has been closed (I/O condition=%d)",
760  client->name, client, condition);
761  keep = FALSE;
762 
763  } else if ((condition & G_IO_IN) == 0) {
764  /*
765  #define GLIB_SYSDEF_POLLIN =1
766  #define GLIB_SYSDEF_POLLPRI =2
767  #define GLIB_SYSDEF_POLLOUT =4
768  #define GLIB_SYSDEF_POLLERR =8
769  #define GLIB_SYSDEF_POLLHUP =16
770  #define GLIB_SYSDEF_POLLNVAL =32
771 
772  typedef enum
773  {
774  G_IO_IN GLIB_SYSDEF_POLLIN,
775  G_IO_OUT GLIB_SYSDEF_POLLOUT,
776  G_IO_PRI GLIB_SYSDEF_POLLPRI,
777  G_IO_ERR GLIB_SYSDEF_POLLERR,
778  G_IO_HUP GLIB_SYSDEF_POLLHUP,
779  G_IO_NVAL GLIB_SYSDEF_POLLNVAL
780  } GIOCondition;
781 
782  A bitwise combination representing a condition to watch for on an event source.
783 
784  G_IO_IN There is data to read.
785  G_IO_OUT Data can be written (without blocking).
786  G_IO_PRI There is urgent data to read.
787  G_IO_ERR Error condition.
788  G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets).
789  G_IO_NVAL Invalid request. The file descriptor is not open.
790  */
791  crm_err("Strange condition: %d", condition);
792  }
793 
794  /* keep == FALSE results in mainloop_gio_destroy() being called
795  * just before the source is removed from mainloop
796  */
797  return keep;
798 }
799 
800 static void
801 mainloop_gio_destroy(gpointer c)
802 {
803  mainloop_io_t *client = c;
804  char *c_name = strdup(client->name);
805 
806  /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c
807  * client->channel will still have ref_count > 0... should be == 1
808  */
809  crm_trace("Destroying client %s[%p]", c_name, c);
810 
811  if (client->ipc) {
812  crm_ipc_close(client->ipc);
813  }
814 
815  if (client->destroy_fn) {
816  void (*destroy_fn) (gpointer userdata) = client->destroy_fn;
817 
818  client->destroy_fn = NULL;
819  destroy_fn(client->userdata);
820  }
821 
822  if (client->ipc) {
823  crm_ipc_t *ipc = client->ipc;
824 
825  client->ipc = NULL;
826  crm_ipc_destroy(ipc);
827  }
828 
829  crm_trace("Destroyed client %s[%p]", c_name, c);
830 
831  free(client->name); client->name = NULL;
832  free(client);
833 
834  free(c_name);
835 }
836 
838 mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata,
839  struct ipc_client_callbacks *callbacks)
840 {
841  mainloop_io_t *client = NULL;
842  crm_ipc_t *conn = crm_ipc_new(name, max_size);
843 
844  if (conn && crm_ipc_connect(conn)) {
845  int32_t fd = crm_ipc_get_fd(conn);
846 
847  client = mainloop_add_fd(name, priority, fd, userdata, NULL);
848  }
849 
850  if (client == NULL) {
851  crm_perror(LOG_TRACE, "Connection to %s failed", name);
852  if (conn) {
853  crm_ipc_close(conn);
854  crm_ipc_destroy(conn);
855  }
856  return NULL;
857  }
858 
859  client->ipc = conn;
860  client->destroy_fn = callbacks->destroy;
861  client->dispatch_fn_ipc = callbacks->dispatch;
862  return client;
863 }
864 
865 void
867 {
868  mainloop_del_fd(client);
869 }
870 
871 crm_ipc_t *
873 {
874  if (client) {
875  return client->ipc;
876  }
877  return NULL;
878 }
879 
881 mainloop_add_fd(const char *name, int priority, int fd, void *userdata,
882  struct mainloop_fd_callbacks * callbacks)
883 {
884  mainloop_io_t *client = NULL;
885 
886  if (fd >= 0) {
887  client = calloc(1, sizeof(mainloop_io_t));
888  if (client == NULL) {
889  return NULL;
890  }
891  client->name = strdup(name);
892  client->userdata = userdata;
893 
894  if (callbacks) {
895  client->destroy_fn = callbacks->destroy;
896  client->dispatch_fn_io = callbacks->dispatch;
897  }
898 
899  client->fd = fd;
900  client->channel = g_io_channel_unix_new(fd);
901  client->source =
902  g_io_add_watch_full(client->channel, priority,
903  (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback,
904  client, mainloop_gio_destroy);
905 
906  /* Now that mainloop now holds a reference to channel,
907  * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new().
908  *
909  * This means that channel will be free'd by:
910  * g_main_context_dispatch() or g_source_remove()
911  * -> g_source_destroy_internal()
912  * -> g_source_callback_unref()
913  * shortly after mainloop_gio_destroy() completes
914  */
915  g_io_channel_unref(client->channel);
916  crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd);
917  } else {
918  errno = EINVAL;
919  }
920 
921  return client;
922 }
923 
924 void
926 {
927  if (client != NULL) {
928  crm_trace("Removing client %s[%p]", client->name, client);
929  if (client->source) {
930  /* Results in mainloop_gio_destroy() being called just
931  * before the source is removed from mainloop
932  */
933  g_source_remove(client->source);
934  }
935  }
936 }
937 
938 static GListPtr child_list = NULL;
939 
940 pid_t
942 {
943  return child->pid;
944 }
945 
946 const char *
948 {
949  return child->desc;
950 }
951 
952 int
954 {
955  return child->timeout;
956 }
957 
958 void *
960 {
961  return child->privatedata;
962 }
963 
964 void
966 {
967  child->privatedata = NULL;
968 }
969 
970 /* good function name */
971 static void
972 child_free(mainloop_child_t *child)
973 {
974  if (child->timerid != 0) {
975  crm_trace("Removing timer %d", child->timerid);
976  g_source_remove(child->timerid);
977  child->timerid = 0;
978  }
979  free(child->desc);
980  free(child);
981 }
982 
983 /* terrible function name */
984 static int
985 child_kill_helper(mainloop_child_t *child)
986 {
987  int rc;
988  if (child->flags & mainloop_leave_pid_group) {
989  crm_debug("Kill pid %d only. leave group intact.", child->pid);
990  rc = kill(child->pid, SIGKILL);
991  } else {
992  crm_debug("Kill pid %d's group", child->pid);
993  rc = kill(-child->pid, SIGKILL);
994  }
995 
996  if (rc < 0) {
997  if (errno != ESRCH) {
998  crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid);
999  }
1000  return -errno;
1001  }
1002  return 0;
1003 }
1004 
1005 static gboolean
1006 child_timeout_callback(gpointer p)
1007 {
1008  mainloop_child_t *child = p;
1009  int rc = 0;
1010 
1011  child->timerid = 0;
1012  if (child->timeout) {
1013  crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid);
1014  return FALSE;
1015  }
1016 
1017  rc = child_kill_helper(child);
1018  if (rc == -ESRCH) {
1019  /* Nothing left to do. pid doesn't exist */
1020  return FALSE;
1021  }
1022 
1023  child->timeout = TRUE;
1024  crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid);
1025 
1026  child->timerid = g_timeout_add(5000, child_timeout_callback, child);
1027  return FALSE;
1028 }
1029 
1030 static bool
1031 child_waitpid(mainloop_child_t *child, int flags)
1032 {
1033  int rc = 0;
1034  int core = 0;
1035  int signo = 0;
1036  int status = 0;
1037  int exitcode = 0;
1038  bool callback_needed = true;
1039 
1040  rc = waitpid(child->pid, &status, flags);
1041  if (rc == 0) { // WNOHANG in flags, and child status is not available
1042  crm_trace("Child process %d (%s) still active",
1043  child->pid, child->desc);
1044  callback_needed = false;
1045 
1046  } else if (rc != child->pid) {
1047  /* According to POSIX, possible conditions:
1048  * - child->pid was non-positive (process group or any child),
1049  * and rc is specific child
1050  * - errno ECHILD (pid does not exist or is not child)
1051  * - errno EINVAL (invalid flags)
1052  * - errno EINTR (caller interrupted by signal)
1053  *
1054  * @TODO Handle these cases more specifically.
1055  */
1056  signo = SIGCHLD;
1057  exitcode = 1;
1058  crm_notice("Wait for child process %d (%s) interrupted: %s",
1059  child->pid, child->desc, pcmk_strerror(errno));
1060 
1061  } else if (WIFEXITED(status)) {
1062  exitcode = WEXITSTATUS(status);
1063  crm_trace("Child process %d (%s) exited with status %d",
1064  child->pid, child->desc, exitcode);
1065 
1066  } else if (WIFSIGNALED(status)) {
1067  signo = WTERMSIG(status);
1068  crm_trace("Child process %d (%s) exited with signal %d (%s)",
1069  child->pid, child->desc, signo, strsignal(signo));
1070 
1071 #ifdef WCOREDUMP // AIX, SunOS, maybe others
1072  } else if (WCOREDUMP(status)) {
1073  core = 1;
1074  crm_err("Child process %d (%s) dumped core",
1075  child->pid, child->desc);
1076 #endif
1077 
1078  } else { // flags must contain WUNTRACED and/or WCONTINUED to reach this
1079  crm_trace("Child process %d (%s) stopped or continued",
1080  child->pid, child->desc);
1081  callback_needed = false;
1082  }
1083 
1084  if (callback_needed && child->callback) {
1085  child->callback(child, child->pid, core, signo, exitcode);
1086  }
1087  return callback_needed;
1088 }
1089 
1090 static void
1091 child_death_dispatch(int signal)
1092 {
1093  for (GList *iter = child_list; iter; ) {
1094  GList *saved = iter;
1095  mainloop_child_t *child = iter->data;
1096 
1097  iter = iter->next;
1098  if (child_waitpid(child, WNOHANG)) {
1099  crm_trace("Removing completed process %d from child list",
1100  child->pid);
1101  child_list = g_list_remove_link(child_list, saved);
1102  g_list_free(saved);
1103  child_free(child);
1104  }
1105  }
1106 }
1107 
1108 static gboolean
1109 child_signal_init(gpointer p)
1110 {
1111  crm_trace("Installed SIGCHLD handler");
1112  /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */
1113  mainloop_add_signal(SIGCHLD, child_death_dispatch);
1114 
1115  /* In case they terminated before the signal handler was installed */
1116  child_death_dispatch(SIGCHLD);
1117  return FALSE;
1118 }
1119 
1120 gboolean
1122 {
1123  GListPtr iter;
1124  mainloop_child_t *child = NULL;
1125  mainloop_child_t *match = NULL;
1126  /* It is impossible to block SIGKILL, this allows us to
1127  * call waitpid without WNOHANG flag.*/
1128  int waitflags = 0, rc = 0;
1129 
1130  for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) {
1131  child = iter->data;
1132  if (pid == child->pid) {
1133  match = child;
1134  }
1135  }
1136 
1137  if (match == NULL) {
1138  return FALSE;
1139  }
1140 
1141  rc = child_kill_helper(match);
1142  if(rc == -ESRCH) {
1143  /* It's gone, but hasn't shown up in waitpid() yet. Wait until we get
1144  * SIGCHLD and let handler clean it up as normal (so we get the correct
1145  * return code/status). The blocking alternative would be to call
1146  * child_waitpid(match, 0).
1147  */
1148  crm_trace("Waiting for signal that child process %d completed",
1149  match->pid);
1150  return TRUE;
1151 
1152  } else if(rc != 0) {
1153  /* If KILL for some other reason set the WNOHANG flag since we
1154  * can't be certain what happened.
1155  */
1156  waitflags = WNOHANG;
1157  }
1158 
1159  if (!child_waitpid(match, waitflags)) {
1160  /* not much we can do if this occurs */
1161  return FALSE;
1162  }
1163 
1164  child_list = g_list_remove(child_list, match);
1165  child_free(match);
1166  return TRUE;
1167 }
1168 
1169 /* Create/Log a new tracked process
1170  * To track a process group, use -pid
1171  *
1172  * @TODO Using a non-positive pid (i.e. any child, or process group) would
1173  * likely not be useful since we will free the child after the first
1174  * completed process.
1175  */
1176 void
1177 mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *privatedata, enum mainloop_child_flags flags,
1178  void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
1179 {
1180  static bool need_init = TRUE;
1181  mainloop_child_t *child = g_new(mainloop_child_t, 1);
1182 
1183  child->pid = pid;
1184  child->timerid = 0;
1185  child->timeout = FALSE;
1186  child->privatedata = privatedata;
1187  child->callback = callback;
1188  child->flags = flags;
1189 
1190  if(desc) {
1191  child->desc = strdup(desc);
1192  }
1193 
1194  if (timeout) {
1195  child->timerid = g_timeout_add(timeout, child_timeout_callback, child);
1196  }
1197 
1198  child_list = g_list_append(child_list, child);
1199 
1200  if(need_init) {
1201  need_init = FALSE;
1202  /* SIGCHLD processing has to be invoked from mainloop.
1203  * We do not want it to be possible to both add a child pid
1204  * to mainloop, and have the pid's exit callback invoked within
1205  * the same callstack. */
1206  g_timeout_add(1, child_signal_init, NULL);
1207  }
1208 }
1209 
1210 void
1211 mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
1212  void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode))
1213 {
1214  mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback);
1215 }
1216 
1217 struct mainloop_timer_s {
1218  guint id;
1219  guint period_ms;
1220  bool repeat;
1221  char *name;
1222  GSourceFunc cb;
1223  void *userdata;
1224 };
1225 
1226 static gboolean
1227 mainloop_timer_cb(gpointer user_data)
1228 {
1229  int id = 0;
1230  bool repeat = FALSE;
1231  struct mainloop_timer_s *t = user_data;
1232 
1233  CRM_ASSERT(t != NULL);
1234 
1235  id = t->id;
1236  t->id = 0; /* Ensure it's unset during callbacks so that
1237  * mainloop_timer_running() works as expected
1238  */
1239 
1240  if(t->cb) {
1241  crm_trace("Invoking callbacks for timer %s", t->name);
1242  repeat = t->repeat;
1243  if(t->cb(t->userdata) == FALSE) {
1244  crm_trace("Timer %s complete", t->name);
1245  repeat = FALSE;
1246  }
1247  }
1248 
1249  if(repeat) {
1250  /* Restore if repeating */
1251  t->id = id;
1252  }
1253 
1254  return repeat;
1255 }
1256 
1257 bool
1259 {
1260  if(t && t->id != 0) {
1261  return TRUE;
1262  }
1263  return FALSE;
1264 }
1265 
1266 void
1268 {
1270  if(t && t->period_ms > 0) {
1271  crm_trace("Starting timer %s", t->name);
1272  t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t);
1273  }
1274 }
1275 
1276 void
1278 {
1279  if(t && t->id != 0) {
1280  crm_trace("Stopping timer %s", t->name);
1281  g_source_remove(t->id);
1282  t->id = 0;
1283  }
1284 }
1285 
1286 guint
1288 {
1289  guint last = 0;
1290 
1291  if(t) {
1292  last = t->period_ms;
1293  t->period_ms = period_ms;
1294  }
1295 
1296  if(t && t->id != 0 && last != t->period_ms) {
1298  }
1299  return last;
1300 }
1301 
1303 mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata)
1304 {
1305  mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t));
1306 
1307  if(t) {
1308  if(name) {
1309  t->name = crm_strdup_printf("%s-%u-%d", name, period_ms, repeat);
1310  } else {
1311  t->name = crm_strdup_printf("%p-%u-%d", t, period_ms, repeat);
1312  }
1313  t->id = 0;
1314  t->period_ms = period_ms;
1315  t->repeat = repeat;
1316  t->cb = cb;
1317  t->userdata = userdata;
1318  crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata);
1319  }
1320  return t;
1321 }
1322 
1323 void
1325 {
1326  if(t) {
1327  crm_trace("Destroying timer %s", t->name);
1329  free(t->name);
1330  free(t);
1331  }
1332 }
1333 
1334 /*
1335  * Helpers to make sure certain events aren't lost at shutdown
1336  */
1337 
1338 static gboolean
1339 drain_timeout_cb(gpointer user_data)
1340 {
1341  bool *timeout_popped = (bool*) user_data;
1342 
1343  *timeout_popped = TRUE;
1344  return FALSE;
1345 }
1346 
1359 void
1360 pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool (*check)(guint))
1361 {
1362  bool timeout_popped = FALSE;
1363  guint timer = 0;
1364  GMainContext *ctx = NULL;
1365 
1366  CRM_CHECK(mloop && check, return);
1367 
1368  ctx = g_main_loop_get_context(mloop);
1369  if (ctx) {
1370  time_t start_time = time(NULL);
1371 
1372  timer = g_timeout_add(timer_ms, drain_timeout_cb, &timeout_popped);
1373  while (!timeout_popped
1374  && check(timer_ms - (time(NULL) - start_time) * 1000)) {
1375  g_main_context_iteration(ctx, TRUE);
1376  }
1377  }
1378  if (!timeout_popped && (timer > 0)) {
1379  g_source_remove(timer);
1380  }
1381 }
1382 
1383 // Deprecated functions kept only for backward API compatibility
1384 gboolean crm_signal(int sig, void (*dispatch) (int sig));
1385 
1386 /*
1387  * \brief Use crm_signal_handler() instead
1388  * \deprecated
1389  */
1390 gboolean
1391 crm_signal(int sig, void (*dispatch) (int sig))
1392 {
1393  return crm_signal_handler(sig, dispatch) != SIG_ERR;
1394 }
1395 
#define LOG_TRACE
Definition: logging.h:36
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:233
bool crm_ipc_connect(crm_ipc_t *client)
Establish an IPC connection to a Pacemaker component.
Definition: ipc.c:1158
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:365
const char * pcmk_strerror(int rc)
Definition: results.c:55
struct signal_s crm_signal_t
#define crm_crit(fmt, args...)
Definition: logging.h:362
gboolean mainloop_add_signal(int sig, void(*dispatch)(int sig))
Definition: mainloop.c:328
mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks *callbacks)
Definition: mainloop.c:881
void mainloop_timer_start(mainloop_timer_t *t)
Definition: mainloop.c:1267
guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms)
Definition: mainloop.c:1287
mainloop_child_flags
Definition: mainloop.h:26
void mainloop_timer_del(mainloop_timer_t *t)
Definition: mainloop.c:1324
gboolean mainloop_child_kill(pid_t pid)
Definition: mainloop.c:1121
int crm_ipc_get_fd(crm_ipc_t *client)
Definition: ipc.c:1263
void(* sighandler_t)(int)
Definition: mainloop.h:48
qb_ipcs_service_t * mainloop_add_ipc_server_with_prio(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks, enum qb_loop_priority prio)
Start server-side API end-point, hooked into the internal event loop.
Definition: mainloop.c:650
void pcmk_drain_main_loop(GMainLoop *mloop, guint timer_ms, bool(*check)(guint))
Process main loop events while a certain condition is met.
Definition: mainloop.c:1360
struct mainloop_timer_s mainloop_timer_t
Definition: mainloop.h:34
struct mainloop_io_s mainloop_io_t
Definition: mainloop.h:32
void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *userdata, enum mainloop_child_flags, void(*callback)(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode))
Definition: mainloop.c:1177
struct mainloop_child_s mainloop_child_t
Definition: mainloop.h:33
void mainloop_set_trigger(crm_trigger_t *source)
Definition: mainloop.c:171
void mainloop_cleanup(void)
Definition: mainloop.c:402
int(* dispatch)(gpointer userdata)
Definition: mainloop.h:115
pid_t mainloop_child_pid(mainloop_child_t *child)
Definition: mainloop.c:941
long crm_ipc_read(crm_ipc_t *client)
Definition: ipc.c:1373
uint32_t pid
Definition: internal.h:81
gboolean mainloop_destroy_trigger(crm_trigger_t *source)
Definition: mainloop.c:179
struct qb_ipcs_poll_handlers gio_poll_funcs
Definition: mainloop.c:610
void mainloop_timer_stop(mainloop_timer_t *t)
Definition: mainloop.c:1277
Wrappers for and extensions to glib mainloop.
const char * crm_ipc_buffer(crm_ipc_t *client)
Definition: ipc.c:1420
struct trigger_s crm_trigger_t
Definition: mainloop.h:31
uint32_t id
Definition: internal.h:80
void(* destroy)(gpointer)
Definition: mainloop.h:75
#define crm_warn(fmt, args...)
Definition: logging.h:364
int rc
Definition: pcmk_fence.c:34
#define crm_debug(fmt, args...)
Definition: logging.h:368
void * mainloop_child_userdata(mainloop_child_t *child)
Definition: mainloop.c:959
struct crm_ipc_s crm_ipc_t
Definition: ipc.h:58
void(* destroy)(gpointer userdata)
Definition: mainloop.h:116
gboolean crm_signal(int sig, void(*dispatch)(int sig))
Definition: mainloop.c:1391
#define crm_trace(fmt, args...)
Definition: logging.h:369
sighandler_t crm_signal_handler(int sig, sighandler_t dispatch)
Definition: mainloop.c:280
Wrappers for and extensions to libxml2.
crm_trigger_t * mainloop_add_trigger(int priority, int(*dispatch)(gpointer user_data), gpointer userdata)
Definition: mainloop.c:159
void mainloop_del_ipc_client(mainloop_io_t *client)
Definition: mainloop.c:866
unsigned int crm_ipc_default_buffer_size(void)
Definition: ipc.c:71
void crm_ipc_destroy(crm_ipc_t *client)
Definition: ipc.c:1240
bool mainloop_timer_running(mainloop_timer_t *t)
Definition: mainloop.c:1258
bool crm_ipc_connected(crm_ipc_t *client)
Definition: ipc.c:1277
#define CRM_XS
Definition: logging.h:54
void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *userdata, void(*callback)(mainloop_child_t *p, pid_t pid, int core, int signo, int exitcode))
Definition: mainloop.c:1211
#define crm_perror(level, fmt, args...)
Send a system error message to both the log and stderr.
Definition: logging.h:314
void mainloop_trigger_complete(crm_trigger_t *trig)
Definition: mainloop.c:147
crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t *client)
Definition: mainloop.c:872
#define crm_err(fmt, args...)
Definition: logging.h:363
#define CRM_ASSERT(expr)
Definition: results.h:42
void mainloop_clear_child_userdata(mainloop_child_t *child)
Definition: mainloop.c:965
crm_ipc_t * crm_ipc_new(const char *name, size_t max_size)
Definition: ipc.c:1128
char data[0]
Definition: internal.h:90
void mainloop_del_fd(mainloop_io_t *client)
Definition: mainloop.c:925
void mainloop_del_ipc_server(qb_ipcs_service_t *server)
Definition: mainloop.c:689
mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata)
Definition: mainloop.c:1303
gboolean mainloop_destroy_signal(int sig)
Definition: mainloop.c:382
mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks)
Definition: mainloop.c:838
qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers *callbacks)
Definition: mainloop.c:643
char * name
Definition: pcmk_fence.c:30
void crm_ipc_close(crm_ipc_t *client)
Definition: ipc.c:1225
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
const char * mainloop_child_name(mainloop_child_t *child)
Definition: mainloop.c:947
unsigned int timeout
Definition: pcmk_fence.c:31
GList * GListPtr
Definition: crm.h:214
int(* dispatch)(const char *buffer, ssize_t length, gpointer userdata)
Definition: mainloop.h:74
uint64_t flags
Definition: remote.c:149
enum crm_ais_msg_types type
Definition: internal.h:83
int mainloop_child_timeout(mainloop_child_t *child)
Definition: mainloop.c:953