This source file includes following definitions.
- sysrq_trigger
- panic_local
- panic_sbd
- pcmk__panic
- pcmk__locate_sbd
- pcmk__get_sbd_timeout
- pcmk__get_sbd_sync_resource_startup
- pcmk__auto_watchdog_timeout
- pcmk__valid_sbd_timeout
1
2
3
4
5
6
7
8
9
10 #include <crm_internal.h>
11
12 #include <sched.h>
13 #include <sys/ioctl.h>
14 #include <sys/reboot.h>
15
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 #include <ctype.h>
20 #include <dirent.h>
21 #include <signal.h>
22
23 #ifdef _POSIX_MEMLOCK
24 # include <sys/mman.h>
25 #endif
26
27 static pid_t sbd_pid = 0;
28
29 static void
30 sysrq_trigger(char t)
31 {
32 #if SUPPORT_PROCFS
33 FILE *procf;
34
35
36 procf = fopen("/proc/sysrq-trigger", "a");
37 if (!procf) {
38 crm_perror(LOG_WARNING, "Opening sysrq-trigger failed");
39 return;
40 }
41 crm_info("sysrq-trigger: %c", t);
42 fprintf(procf, "%c\n", t);
43 fclose(procf);
44 #endif
45 return;
46 }
47
48
49
50
51
52
53 static void
54 panic_local(void)
55 {
56 int rc = pcmk_ok;
57 uid_t uid = geteuid();
58 pid_t ppid = getppid();
59
60 if(uid != 0 && ppid > 1) {
61
62
63
64
65
66
67 crm_emerg("Signaling parent %lld to panic", (long long) ppid);
68 crm_exit(CRM_EX_PANIC);
69 return;
70
71 } else if (uid != 0) {
72 #if SUPPORT_PROCFS
73
74
75
76
77 union sigval signal_value;
78
79 memset(&signal_value, 0, sizeof(signal_value));
80 ppid = pcmk__procfs_pid_of("pacemakerd");
81 crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid);
82
83 if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) {
84 crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic",
85 (long long) ppid);
86 }
87 #endif
88
89
90 crm_exit(CRM_EX_PANIC);
91 return;
92 }
93
94
95
96 if (pcmk__str_eq("crash", getenv("PCMK_panic_action"), pcmk__str_casei)) {
97 sysrq_trigger('c');
98 } else {
99 sysrq_trigger('b');
100 }
101
102 reboot(RB_AUTOBOOT);
103 rc = errno;
104
105 crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d",
106 (long long) ppid, pcmk_rc_str(rc), rc);
107
108 if(ppid > 1) {
109
110 exit(CRM_EX_PANIC);
111 } else {
112
113 exit(CRM_EX_FATAL);
114 }
115 }
116
117
118
119
120
121 static void
122 panic_sbd(void)
123 {
124 union sigval signal_value;
125 pid_t ppid = getppid();
126
127 crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid);
128
129 memset(&signal_value, 0, sizeof(signal_value));
130
131 if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) {
132 crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate",
133 (long long) sbd_pid);
134 panic_local();
135 }
136
137 if(ppid > 1) {
138
139 exit(CRM_EX_PANIC);
140 } else {
141
142 exit(CRM_EX_FATAL);
143 }
144 }
145
146
147
148
149
150
151
152
153
154
155 void
156 pcmk__panic(const char *origin)
157 {
158 static struct qb_log_callsite *panic_cs = NULL;
159
160 if (panic_cs == NULL) {
161 panic_cs = qb_log_callsite_get(__func__, __FILE__, "panic-delay",
162 LOG_TRACE, __LINE__, crm_trace_nonlog);
163 }
164
165
166 (void) pcmk__locate_sbd();
167
168 if (panic_cs && panic_cs->targets) {
169
170 crm_emerg("Shutting down instead of panicking the node "
171 CRM_XS " origin=%s sbd=%lld parent=%d",
172 origin, (long long) sbd_pid, getppid());
173 crm_exit(CRM_EX_FATAL);
174 return;
175 }
176
177 if(sbd_pid > 1) {
178 crm_emerg("Signaling sbd[%lld] to panic the system: %s",
179 (long long) sbd_pid, origin);
180 panic_sbd();
181
182 } else {
183 crm_emerg("Panicking the system directly: %s", origin);
184 panic_local();
185 }
186 }
187
188
189
190
191
192 pid_t
193 pcmk__locate_sbd(void)
194 {
195 char *pidfile = NULL;
196 char *sbd_path = NULL;
197 int rc;
198
199 if(sbd_pid > 1) {
200 return sbd_pid;
201 }
202
203
204 pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid");
205 sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR);
206
207
208 rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid);
209 if (rc == pcmk_rc_ok) {
210 crm_trace("SBD detected at pid %lld (via PID file %s)",
211 (long long) sbd_pid, pidfile);
212
213 #if SUPPORT_PROCFS
214 } else {
215
216 sbd_pid = pcmk__procfs_pid_of("sbd");
217 crm_trace("SBD detected at pid %lld (via procfs)",
218 (long long) sbd_pid);
219 #endif
220 }
221
222 if(sbd_pid < 0) {
223 sbd_pid = 0;
224 crm_trace("SBD not detected");
225 }
226
227 free(pidfile);
228 free(sbd_path);
229
230 return sbd_pid;
231 }
232
233 long
234 pcmk__get_sbd_timeout(void)
235 {
236 static long sbd_timeout = -2;
237
238 if (sbd_timeout == -2) {
239 sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT"));
240 }
241 return sbd_timeout;
242 }
243
244 bool
245 pcmk__get_sbd_sync_resource_startup(void)
246 {
247 static bool sync_resource_startup = false;
248 static bool checked_sync_resource_startup = false;
249
250 if (!checked_sync_resource_startup) {
251 sync_resource_startup =
252 crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
253 checked_sync_resource_startup = true;
254 }
255
256 return sync_resource_startup;
257 }
258
259 long
260 pcmk__auto_watchdog_timeout()
261 {
262 long sbd_timeout = pcmk__get_sbd_timeout();
263
264 return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout);
265 }
266
267 bool
268 pcmk__valid_sbd_timeout(const char *value)
269 {
270 long st_timeout = value? crm_get_msec(value) : 0;
271
272 if (st_timeout < 0) {
273 st_timeout = pcmk__auto_watchdog_timeout();
274 crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)",
275 st_timeout, value);
276 }
277
278 if (st_timeout == 0) {
279 crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)",
280 value? value : "default");
281
282 } else if (pcmk__locate_sbd() == 0) {
283 crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) "
284 "but SBD not active", (value? value : "auto"));
285 crm_exit(CRM_EX_FATAL);
286 return false;
287
288 } else {
289 long sbd_timeout = pcmk__get_sbd_timeout();
290
291 if (st_timeout < sbd_timeout) {
292 crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short "
293 "(must be >%ldms)", value, sbd_timeout);
294 crm_exit(CRM_EX_FATAL);
295 return false;
296 }
297 crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms",
298 value, sbd_timeout);
299 }
300 return true;
301 }