1 /*
2 * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This software is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18 #ifndef FSA_DEFINES__H
19 # define FSA_DEFINES__H
20
21 /*======================================
22 * States the DC/CRMd can be in
23 *======================================*/
24 enum crmd_fsa_state {
25 S_IDLE = 0, /* Nothing happening */
26
27 S_ELECTION, /* Take part in the election algorithm as
28 * described below
29 */
30 S_INTEGRATION, /* integrate that status of new nodes (which is
31 * all of them if we have just been elected DC)
32 * to form a complete and up-to-date picture of
33 * the CIB
34 */
35 S_FINALIZE_JOIN, /* integrate that status of new nodes (which is
36 * all of them if we have just been elected DC)
37 * to form a complete and up-to-date picture of
38 * the CIB
39 */
40 S_NOT_DC, /* we are in crmd/slave mode */
41 S_POLICY_ENGINE, /* Determine next stable state of the cluster */
42 S_RECOVERY, /* Something bad happened, check everything is ok
43 * before continuing and attempt to recover if
44 * required
45 */
46 S_RELEASE_DC, /* we were the DC, but now we arent anymore,
47 * possibly by our own request, and we should
48 * release all unnecessary sub-systems, finish
49 * any pending actions, do general cleanup and
50 * unset anything that makes us think we are
51 * special :)
52 */
53 S_STARTING, /* we are just starting out */
54 S_PENDING, /* we are not a full/active member yet */
55 S_STOPPING, /* We are in the final stages of shutting down */
56 S_TERMINATE, /* We are going to shutdown, this is the equiv of
57 * "Sending TERM signal to all processes" in Linux
58 * and in worst case scenarios could be considered
59 * a self STONITH
60 */
61 S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable
62 * state of the cluster a reality
63 */
64
65 S_HALT, /* Freeze - don't do anything
66 * Something bad happened that needs the admin to fix
67 * Wait for I_ELECTION
68 */
69
70 /* ----------- Last input found in table is above ---------- */
71 S_ILLEGAL /* This is an illegal FSA state */
72 /* (must be last) */
73 };
74
75 # define MAXSTATE S_ILLEGAL
76 /*
77 A state diagram can be constructed from the dc_fsa.dot with the
78 following command:
79
80 dot -Tpng crmd_fsa.dot > crmd_fsa.png
81
82 Description:
83
84 Once we start and do some basic sanity checks, we go into the
85 S_NOT_DC state and await instructions from the DC or input from
86 the CCM which indicates the election algorithm needs to run.
87
88 If the election algorithm is triggered we enter the S_ELECTION state
89 from where we can either go back to the S_NOT_DC state or progress
90 to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC
91 but arent anymore).
92
93 The election algorithm has been adapted from
94 http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR521
95
96 Loosely known as the Bully Algorithm, its major points are:
97 - Election is initiated by any node (N) notices that the controller
98 is no longer responding
99 - Concurrent multiple elections are possible
100 - Algorithm
101 + N sends ELECTION messages to all nodes that occur earlier in
102 the CCM's membership list.
103 + If no one responds, N wins and becomes controller
104 + N sends out CONTROLLER messages to all other nodes in the
105 partition
106 + If one of higher-ups answers, it takes over. N is done.
107
108 Once the election is complete, if we are the DC, we enter the
109 S_INTEGRATION state which is a DC-in-waiting style state. We are
110 the DC, but we shouldn't do anything yet because we may not have an
111 up-to-date picture of the cluster. There may of course be times
112 when this fails, so we should go back to the S_RECOVERY stage and
113 check everything is ok. We may also end up here if a new node came
114 online, since each node is authorative on itself and we would want
115 to incorporate its information into the CIB.
116
117 Once we have the latest CIB, we then enter the S_POLICY_ENGINE state
118 where invoke the Policy Engine. It is possible that between
119 invoking the Policy Engine and receiving an answer, that we receive
120 more input. In this case we would discard the orginal result and
121 invoke it again.
122
123 Once we are satisfied with the output from the Policy Engine we
124 enter S_TRANSITION_ENGINE and feed the Policy Engine's output to the
125 Transition Engine who attempts to make the Policy Engine's
126 calculation a reality. If the transition completes successfully,
127 we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the
128 current unstable state and try again.
129
130 Of course we may be asked to shutdown at any time, however we must
131 progress to S_NOT_DC before doing so. Once we have handed over DC
132 duties to another node, we can then shut down like everyone else,
133 that is by asking the DC for permission and waiting it to take all
134 our resources away.
135
136 The case where we are the DC and the only node in the cluster is a
137 special case and handled as an escalation which takes us to
138 S_SHUTDOWN. Similarly if any other point in the shutdown
139 fails or stalls, this is escalated and we end up in S_TERMINATE.
140
141 At any point, the CRMd/DC can relay messages for its sub-systems,
142 but outbound messages (from sub-systems) should probably be blocked
143 until S_INTEGRATION (for the DC case) or the join protocol has
144 completed (for the CRMd case)
145
146 */
147
148 /*======================================
149 *
150 * Inputs/Events/Stimuli to be given to the finite state machine
151 *
152 * Some of these a true events, and others a synthesised based on
153 * the "register" (see below) and the contents or source of messages.
154 *
155 * At this point, my plan is to have a loop of some sort that keeps
156 * going until receiving I_NULL
157 *
158 *======================================*/
159 enum crmd_fsa_input {
160 /* 0 */
161 I_NULL, /* Nothing happened */
162 /* 1 */
163
164 I_CIB_OP, /* An update to the CIB occurred */
165 I_CIB_UPDATE, /* An update to the CIB occurred */
166 I_DC_TIMEOUT, /* We have lost communication with the DC */
167 I_ELECTION, /* Someone started an election */
168 I_PE_CALC, /* The Policy Engine needs to be invoked */
169 I_RELEASE_DC, /* The election completed and we were not
170 * elected, but we were the DC beforehand
171 */
172 I_ELECTION_DC, /* The election completed and we were (re-)elected
173 * DC
174 */
175 I_ERROR, /* Something bad happened (more serious than
176 * I_FAIL) and may not have been due to the action
177 * being performed. For example, we may have lost
178 * our connection to the CIB.
179 */
180 /* 9 */
181 I_FAIL, /* The action failed to complete successfully */
182 I_INTEGRATED,
183 I_FINALIZED,
184 I_NODE_JOIN, /* A node has entered the cluster */
185 I_NOT_DC, /* We are not and were not the DC before or after
186 * the current operation or state
187 */
188 I_RECOVERED, /* The recovery process completed successfully */
189 I_RELEASE_FAIL, /* We could not give up DC status for some reason
190 */
191 I_RELEASE_SUCCESS, /* We are no longer the DC */
192 I_RESTART, /* The current set of actions needs to be
193 * restarted
194 */
195 I_TE_SUCCESS, /* Some non-resource, non-ccm action is required
196 * of us, eg. ping
197 */
198 /* 20 */
199 I_ROUTER, /* Do our job as router and forward this to the
200 * right place
201 */
202 I_SHUTDOWN, /* We are asking to shutdown */
203 I_STOP, /* We have been told to shutdown */
204 I_TERMINATE, /* Actually exit */
205 I_STARTUP,
206 I_PE_SUCCESS, /* The action completed successfully */
207
208 I_JOIN_OFFER, /* The DC is offering membership */
209 I_JOIN_REQUEST, /* The client is requesting membership */
210 I_JOIN_RESULT, /* If not the DC: The result of a join request
211 * Else: A client is responding with its local state info
212 */
213
214 I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen"
215 * and until it does, we can't do anything else
216 */
217
218 I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */
219
220 I_LRM_EVENT,
221
222 /* 30 */
223 I_PENDING,
224 I_HALT,
225
226 /* ------------ Last input found in table is above ----------- */
227 I_ILLEGAL /* This is an illegal value for an FSA input */
228 /* (must be last) */
229 };
230
231 # define MAXINPUT I_ILLEGAL
232
233 # define I_MESSAGE I_ROUTER
234
235 /*======================================
236 *
237 * actions
238 *
239 * Some of the actions below will always occur together for now, but I can
240 * foresee that this may not always be the case. So I've split them up so
241 * that if they ever do need to be called independently in the future, it
242 * won't be a problem.
243 *
244 * For example, separating A_LRM_CONNECT from A_STARTUP might be useful
245 * if we ever try to recover from a faulty or disconnected LRM.
246 *
247 *======================================*/
248
249 /* Don't do anything */
250 # define A_NOTHING 0x0000000000000000ULL
251
252 /* -- Startup actions -- */
253 /* Hook to perform any actions (other than starting the CIB,
254 * connecting to HA or the CCM) that might be needed as part
255 * of the startup.
256 */
257 # define A_STARTUP 0x0000000000000001ULL
258 /* Hook to perform any actions that might be needed as part
259 * after startup is successful.
260 */
261 # define A_STARTED 0x0000000000000002ULL
262 /* Connect to Heartbeat */
263 # define A_HA_CONNECT 0x0000000000000004ULL
264 # define A_HA_DISCONNECT 0x0000000000000008ULL
265
266 # define A_INTEGRATE_TIMER_START 0x0000000000000010ULL
267 # define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL
268 # define A_FINALIZE_TIMER_START 0x0000000000000040ULL
269 # define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL
270
271 /* -- Election actions -- */
272 # define A_DC_TIMER_START 0x0000000000000100ULL
273 # define A_DC_TIMER_STOP 0x0000000000000200ULL
274 # define A_ELECTION_COUNT 0x0000000000000400ULL
275 # define A_ELECTION_VOTE 0x0000000000000800ULL
276
277 # define A_ELECTION_START 0x0000000000001000ULL
278
279 /* -- Message processing -- */
280 /* Process the queue of requests */
281 # define A_MSG_PROCESS 0x0000000000002000ULL
282 /* Send the message to the correct recipient */
283 # define A_MSG_ROUTE 0x0000000000004000ULL
284
285 /* Send a welcome message to new node(s) */
286 # define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL
287
288 /* -- Server Join protocol actions -- */
289 /* Send a welcome message to all nodes */
290 # define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL
291 /* Process the remote node's ack of our join message */
292 # define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL
293 /* Send out the reults of the Join phase */
294 # define A_DC_JOIN_FINALIZE 0x0000000000040000ULL
295 /* Send out the reults of the Join phase */
296 # define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL
297
298 /* -- Client Join protocol actions -- */
299 # define A_CL_JOIN_QUERY 0x0000000000100000ULL
300 # define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL
301 /* Request membership to the DC list */
302 # define A_CL_JOIN_REQUEST 0x0000000000400000ULL
303 /* Did the DC accept or reject the request */
304 # define A_CL_JOIN_RESULT 0x0000000000800000ULL
305
306 /* -- Recovery, DC start/stop -- */
307 /* Something bad happened, try to recover */
308 # define A_RECOVER 0x0000000001000000ULL
309 /* Hook to perform any actions (apart from starting, the TE, PE
310 * and gathering the latest CIB) that might be necessary before
311 * giving up the responsibilities of being the DC.
312 */
313 # define A_DC_RELEASE 0x0000000002000000ULL
314 /* */
315 # define A_DC_RELEASED 0x0000000004000000ULL
316 /* Hook to perform any actions (apart from starting, the TE, PE
317 * and gathering the latest CIB) that might be necessary before
318 * taking over the responsibilities of being the DC.
319 */
320 # define A_DC_TAKEOVER 0x0000000008000000ULL
321
322 /* -- Shutdown actions -- */
323 # define A_SHUTDOWN 0x0000000010000000ULL
324 # define A_STOP 0x0000000020000000ULL
325 # define A_EXIT_0 0x0000000040000000ULL
326 # define A_EXIT_1 0x0000000080000000ULL
327
328 # define A_SHUTDOWN_REQ 0x0000000100000000ULL
329 # define A_ELECTION_CHECK 0x0000000200000000ULL
330 # define A_DC_JOIN_FINAL 0x0000000400000000ULL
331
332 /* -- CCM actions -- */
333 # define A_CCM_CONNECT 0x0000001000000000ULL
334 # define A_CCM_DISCONNECT 0x0000002000000000ULL
335
336 /* -- CIB actions -- */
337 # define A_CIB_START 0x0000020000000000ULL
338 # define A_CIB_STOP 0x0000040000000000ULL
339
340 /* -- Transition Engine actions -- */
341 /* Attempt to reach the newly calculated cluster state. This is
342 * only called once per transition (except if it is asked to
343 * stop the transition or start a new one).
344 * Once given a cluster state to reach, the TE will determine
345 * tasks that can be performed in parallel, execute them, wait
346 * for replies and then determine the next set until the new
347 * state is reached or no further tasks can be taken.
348 */
349 # define A_TE_INVOKE 0x0000100000000000ULL
350 # define A_TE_START 0x0000200000000000ULL
351 # define A_TE_STOP 0x0000400000000000ULL
352 # define A_TE_CANCEL 0x0000800000000000ULL
353 # define A_TE_HALT 0x0001000000000000ULL
354
355 /* -- Policy Engine actions -- */
356 /* Calculate the next state for the cluster. This is only
357 * invoked once per needed calculation.
358 */
359 # define A_PE_INVOKE 0x0002000000000000ULL
360 # define A_PE_START 0x0004000000000000ULL
361 # define A_PE_STOP 0x0008000000000000ULL
362 /* -- Misc actions -- */
363 /* Add a system generate "block" so that resources arent moved
364 * to or are activly moved away from the affected node. This
365 * way we can return quickly even if busy with other things.
366 */
367 # define A_NODE_BLOCK 0x0010000000000000ULL
368 /* Update our information in the local CIB */
369 # define A_UPDATE_NODESTATUS 0x0020000000000000ULL
370 # define A_CIB_BUMPGEN 0x0040000000000000ULL
371 # define A_READCONFIG 0x0080000000000000ULL
372
373 /* -- LRM Actions -- */
374 /* Connect to the Local Resource Manager */
375 # define A_LRM_CONNECT 0x0100000000000000ULL
376 /* Disconnect from the Local Resource Manager */
377 # define A_LRM_DISCONNECT 0x0200000000000000ULL
378 # define A_LRM_INVOKE 0x0400000000000000ULL
379 # define A_LRM_EVENT 0x0800000000000000ULL
380
381 /* -- Logging actions -- */
382 # define A_LOG 0x1000000000000000ULL
383 # define A_ERROR 0x2000000000000000ULL
384 # define A_WARN 0x4000000000000000ULL
385
386 # define O_EXIT (A_SHUTDOWN|A_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP)
387 # define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED)
388 # define O_PE_RESTART (A_PE_START|A_PE_STOP)
389 # define O_TE_RESTART (A_TE_START|A_TE_STOP)
390 # define O_CIB_RESTART (A_CIB_START|A_CIB_STOP)
391 # define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT)
392 # define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START)
393 /*======================================
394 *
395 * "register" contents
396 *
397 * Things we may want to remember regardless of which state we are in.
398 *
399 * These also count as inputs for synthesizing I_*
400 *
401 *======================================*/
402 # define R_THE_DC 0x00000001ULL
403 /* Are we the DC? */
404 # define R_STARTING 0x00000002ULL
405 /* Are we starting up? */
406 # define R_SHUTDOWN 0x00000004ULL
407 /* Are we trying to shut down? */
408 # define R_STAYDOWN 0x00000008ULL
409 /* Should we restart? */
410
411 # define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */
412 # define R_READ_CONFIG 0x00000040ULL
413 # define R_INVOKE_PE 0x00000080ULL
414 /* Does the PE needed to be invoked at
415 the next appropriate point? */
416
417 # define R_CIB_CONNECTED 0x00000100ULL
418 /* Is the CIB connected? */
419 # define R_PE_CONNECTED 0x00000200ULL
420 /* Is the Policy Engine connected? */
421 # define R_TE_CONNECTED 0x00000400ULL
422 /* Is the Transition Engine connected? */
423 # define R_LRM_CONNECTED 0x00000800ULL
424 /* Is the Local Resource Manager
425 connected? */
426
427 # define R_CIB_REQUIRED 0x00001000ULL
428 /* Is the CIB required? */
429 # define R_PE_REQUIRED 0x00002000ULL
430 /* Is the Policy Engine required? */
431 # define R_TE_REQUIRED 0x00004000ULL
432 /* Is the Transition Engine required? */
433 # define R_ST_REQUIRED 0x00008000ULL
434 /* Is the Stonith daemon required? */
435
436 # define R_CIB_DONE 0x00010000ULL
437 /* Have we calculated the CIB? */
438 # define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */
439 # define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */
440
441 # define R_MEMBERSHIP 0x00100000ULL /* Have we got CCM data yet */
442 # define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */
443
444 # define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */
445 # define R_CCM_DISCONNECTED 0x00800000ULL /* did we sign out of our own accord */
446
447 # define R_REQ_PEND 0x01000000ULL
448 /* Are there Requests waiting for
449 processing? */
450 # define R_PE_PEND 0x02000000ULL
451 /* Has the PE been invoked and we're
452 awaiting a reply? */
453 # define R_TE_PEND 0x04000000ULL
454 /* Has the TE been invoked and we're
455 awaiting completion? */
456 # define R_RESP_PEND 0x08000000ULL
457 /* Do we have clients waiting on a
458 response? if so perhaps we shouldn't
459 stop yet */
460
461 # define R_IN_TRANSITION 0x10000000ULL
462 /* */
463 # define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all
464 * resources in preparation for
465 * shutting down */
466
467 # define R_IN_RECOVERY 0x80000000ULL
468
469 /*
470 * Magic RC used within CRMd to indicate direct nacks
471 * (operation is invalid in current state)
472 */
473 #define CRM_DIRECT_NACK_RC (99)
474
475 enum crmd_fsa_cause {
476 C_UNKNOWN = 0,
477 C_STARTUP,
478 C_IPC_MESSAGE,
479 C_HA_MESSAGE,
480 C_CCM_CALLBACK,
481 C_CRMD_STATUS_CALLBACK,
482 C_LRM_OP_CALLBACK,
483 C_LRM_MONITOR_CALLBACK,
484 C_TIMER_POPPED,
485 C_SHUTDOWN,
486 C_HEARTBEAT_FAILED,
487 C_SUBSYSTEM_CONNECT,
488 C_HA_DISCONNECT,
489 C_FSA_INTERNAL,
490 C_ILLEGAL
491 };
492
493 extern const char *fsa_input2string(enum crmd_fsa_input input);
494 extern const char *fsa_state2string(enum crmd_fsa_state state);
495 extern const char *fsa_cause2string(enum crmd_fsa_cause cause);
496 extern const char *fsa_action2string(long long action);
497
498 #endif