1 /* 2 * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net> 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2 of the License, or (at your option) any later version. 8 * 9 * This software is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with this library; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #ifndef FSA_DEFINES__H 19 # define FSA_DEFINES__H 20 21 /*====================================== 22 * States the DC/CRMd can be in 23 *======================================*/ 24 enum crmd_fsa_state { 25 S_IDLE = 0, /* Nothing happening */ 26 27 S_ELECTION, /* Take part in the election algorithm as 28 * described below 29 */ 30 S_INTEGRATION, /* integrate that status of new nodes (which is 31 * all of them if we have just been elected DC) 32 * to form a complete and up-to-date picture of 33 * the CIB 34 */ 35 S_FINALIZE_JOIN, /* integrate that status of new nodes (which is 36 * all of them if we have just been elected DC) 37 * to form a complete and up-to-date picture of 38 * the CIB 39 */ 40 S_NOT_DC, /* we are in crmd/slave mode */ 41 S_POLICY_ENGINE, /* Determine next stable state of the cluster */ 42 S_RECOVERY, /* Something bad happened, check everything is ok 43 * before continuing and attempt to recover if 44 * required 45 */ 46 S_RELEASE_DC, /* we were the DC, but now we arent anymore, 47 * possibly by our own request, and we should 48 * release all unnecessary sub-systems, finish 49 * any pending actions, do general cleanup and 50 * unset anything that makes us think we are 51 * special :) 52 */ 53 S_STARTING, /* we are just starting out */ 54 S_PENDING, /* we are not a full/active member yet */ 55 S_STOPPING, /* We are in the final stages of shutting down */ 56 S_TERMINATE, /* We are going to shutdown, this is the equiv of 57 * "Sending TERM signal to all processes" in Linux 58 * and in worst case scenarios could be considered 59 * a self STONITH 60 */ 61 S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable 62 * state of the cluster a reality 63 */ 64 65 S_HALT, /* Freeze - don't do anything 66 * Something bad happened that needs the admin to fix 67 * Wait for I_ELECTION 68 */ 69 70 /* ----------- Last input found in table is above ---------- */ 71 S_ILLEGAL /* This is an illegal FSA state */ 72 /* (must be last) */ 73 }; 74 75 # define MAXSTATE S_ILLEGAL 76 /* 77 A state diagram can be constructed from the dc_fsa.dot with the 78 following command: 79 80 dot -Tpng crmd_fsa.dot > crmd_fsa.png 81 82 Description: 83 84 Once we start and do some basic sanity checks, we go into the 85 S_NOT_DC state and await instructions from the DC or input from 86 the CCM which indicates the election algorithm needs to run. 87 88 If the election algorithm is triggered we enter the S_ELECTION state 89 from where we can either go back to the S_NOT_DC state or progress 90 to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC 91 but arent anymore). 92 93 The election algorithm has been adapted from 94 http://www.cs.indiana.edu/cgi-bin/techreports/TRNNN.cgi?trnum=TR521 95 96 Loosely known as the Bully Algorithm, its major points are: 97 - Election is initiated by any node (N) notices that the controller 98 is no longer responding 99 - Concurrent multiple elections are possible 100 - Algorithm 101 + N sends ELECTION messages to all nodes that occur earlier in 102 the CCM's membership list. 103 + If no one responds, N wins and becomes controller 104 + N sends out CONTROLLER messages to all other nodes in the 105 partition 106 + If one of higher-ups answers, it takes over. N is done. 107 108 Once the election is complete, if we are the DC, we enter the 109 S_INTEGRATION state which is a DC-in-waiting style state. We are 110 the DC, but we shouldn't do anything yet because we may not have an 111 up-to-date picture of the cluster. There may of course be times 112 when this fails, so we should go back to the S_RECOVERY stage and 113 check everything is ok. We may also end up here if a new node came 114 online, since each node is authorative on itself and we would want 115 to incorporate its information into the CIB. 116 117 Once we have the latest CIB, we then enter the S_POLICY_ENGINE state 118 where invoke the Policy Engine. It is possible that between 119 invoking the Policy Engine and receiving an answer, that we receive 120 more input. In this case we would discard the orginal result and 121 invoke it again. 122 123 Once we are satisfied with the output from the Policy Engine we 124 enter S_TRANSITION_ENGINE and feed the Policy Engine's output to the 125 Transition Engine who attempts to make the Policy Engine's 126 calculation a reality. If the transition completes successfully, 127 we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the 128 current unstable state and try again. 129 130 Of course we may be asked to shutdown at any time, however we must 131 progress to S_NOT_DC before doing so. Once we have handed over DC 132 duties to another node, we can then shut down like everyone else, 133 that is by asking the DC for permission and waiting it to take all 134 our resources away. 135 136 The case where we are the DC and the only node in the cluster is a 137 special case and handled as an escalation which takes us to 138 S_SHUTDOWN. Similarly if any other point in the shutdown 139 fails or stalls, this is escalated and we end up in S_TERMINATE. 140 141 At any point, the CRMd/DC can relay messages for its sub-systems, 142 but outbound messages (from sub-systems) should probably be blocked 143 until S_INTEGRATION (for the DC case) or the join protocol has 144 completed (for the CRMd case) 145 146 */ 147 148 /*====================================== 149 * 150 * Inputs/Events/Stimuli to be given to the finite state machine 151 * 152 * Some of these a true events, and others a synthesised based on 153 * the "register" (see below) and the contents or source of messages. 154 * 155 * At this point, my plan is to have a loop of some sort that keeps 156 * going until receiving I_NULL 157 * 158 *======================================*/ 159 enum crmd_fsa_input { 160 /* 0 */ 161 I_NULL, /* Nothing happened */ 162 /* 1 */ 163 164 I_CIB_OP, /* An update to the CIB occurred */ 165 I_CIB_UPDATE, /* An update to the CIB occurred */ 166 I_DC_TIMEOUT, /* We have lost communication with the DC */ 167 I_ELECTION, /* Someone started an election */ 168 I_PE_CALC, /* The Policy Engine needs to be invoked */ 169 I_RELEASE_DC, /* The election completed and we were not 170 * elected, but we were the DC beforehand 171 */ 172 I_ELECTION_DC, /* The election completed and we were (re-)elected 173 * DC 174 */ 175 I_ERROR, /* Something bad happened (more serious than 176 * I_FAIL) and may not have been due to the action 177 * being performed. For example, we may have lost 178 * our connection to the CIB. 179 */ 180 /* 9 */ 181 I_FAIL, /* The action failed to complete successfully */ 182 I_INTEGRATED, 183 I_FINALIZED, 184 I_NODE_JOIN, /* A node has entered the cluster */ 185 I_NOT_DC, /* We are not and were not the DC before or after 186 * the current operation or state 187 */ 188 I_RECOVERED, /* The recovery process completed successfully */ 189 I_RELEASE_FAIL, /* We could not give up DC status for some reason 190 */ 191 I_RELEASE_SUCCESS, /* We are no longer the DC */ 192 I_RESTART, /* The current set of actions needs to be 193 * restarted 194 */ 195 I_TE_SUCCESS, /* Some non-resource, non-ccm action is required 196 * of us, eg. ping 197 */ 198 /* 20 */ 199 I_ROUTER, /* Do our job as router and forward this to the 200 * right place 201 */ 202 I_SHUTDOWN, /* We are asking to shutdown */ 203 I_STOP, /* We have been told to shutdown */ 204 I_TERMINATE, /* Actually exit */ 205 I_STARTUP, 206 I_PE_SUCCESS, /* The action completed successfully */ 207 208 I_JOIN_OFFER, /* The DC is offering membership */ 209 I_JOIN_REQUEST, /* The client is requesting membership */ 210 I_JOIN_RESULT, /* If not the DC: The result of a join request 211 * Else: A client is responding with its local state info 212 */ 213 214 I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen" 215 * and until it does, we can't do anything else 216 */ 217 218 I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */ 219 220 I_LRM_EVENT, 221 222 /* 30 */ 223 I_PENDING, 224 I_HALT, 225 226 /* ------------ Last input found in table is above ----------- */ 227 I_ILLEGAL /* This is an illegal value for an FSA input */ 228 /* (must be last) */ 229 }; 230 231 # define MAXINPUT I_ILLEGAL 232 233 # define I_MESSAGE I_ROUTER 234 235 /*====================================== 236 * 237 * actions 238 * 239 * Some of the actions below will always occur together for now, but I can 240 * foresee that this may not always be the case. So I've split them up so 241 * that if they ever do need to be called independently in the future, it 242 * won't be a problem. 243 * 244 * For example, separating A_LRM_CONNECT from A_STARTUP might be useful 245 * if we ever try to recover from a faulty or disconnected LRM. 246 * 247 *======================================*/ 248 249 /* Don't do anything */ 250 # define A_NOTHING 0x0000000000000000ULL 251 252 /* -- Startup actions -- */ 253 /* Hook to perform any actions (other than starting the CIB, 254 * connecting to HA or the CCM) that might be needed as part 255 * of the startup. 256 */ 257 # define A_STARTUP 0x0000000000000001ULL 258 /* Hook to perform any actions that might be needed as part 259 * after startup is successful. 260 */ 261 # define A_STARTED 0x0000000000000002ULL 262 /* Connect to Heartbeat */ 263 # define A_HA_CONNECT 0x0000000000000004ULL 264 # define A_HA_DISCONNECT 0x0000000000000008ULL 265 266 # define A_INTEGRATE_TIMER_START 0x0000000000000010ULL 267 # define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL 268 # define A_FINALIZE_TIMER_START 0x0000000000000040ULL 269 # define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL 270 271 /* -- Election actions -- */ 272 # define A_DC_TIMER_START 0x0000000000000100ULL 273 # define A_DC_TIMER_STOP 0x0000000000000200ULL 274 # define A_ELECTION_COUNT 0x0000000000000400ULL 275 # define A_ELECTION_VOTE 0x0000000000000800ULL 276 277 # define A_ELECTION_START 0x0000000000001000ULL 278 279 /* -- Message processing -- */ 280 /* Process the queue of requests */ 281 # define A_MSG_PROCESS 0x0000000000002000ULL 282 /* Send the message to the correct recipient */ 283 # define A_MSG_ROUTE 0x0000000000004000ULL 284 285 /* Send a welcome message to new node(s) */ 286 # define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL 287 288 /* -- Server Join protocol actions -- */ 289 /* Send a welcome message to all nodes */ 290 # define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL 291 /* Process the remote node's ack of our join message */ 292 # define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL 293 /* Send out the reults of the Join phase */ 294 # define A_DC_JOIN_FINALIZE 0x0000000000040000ULL 295 /* Send out the reults of the Join phase */ 296 # define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL 297 298 /* -- Client Join protocol actions -- */ 299 # define A_CL_JOIN_QUERY 0x0000000000100000ULL 300 # define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL 301 /* Request membership to the DC list */ 302 # define A_CL_JOIN_REQUEST 0x0000000000400000ULL 303 /* Did the DC accept or reject the request */ 304 # define A_CL_JOIN_RESULT 0x0000000000800000ULL 305 306 /* -- Recovery, DC start/stop -- */ 307 /* Something bad happened, try to recover */ 308 # define A_RECOVER 0x0000000001000000ULL 309 /* Hook to perform any actions (apart from starting, the TE, PE 310 * and gathering the latest CIB) that might be necessary before 311 * giving up the responsibilities of being the DC. 312 */ 313 # define A_DC_RELEASE 0x0000000002000000ULL 314 /* */ 315 # define A_DC_RELEASED 0x0000000004000000ULL 316 /* Hook to perform any actions (apart from starting, the TE, PE 317 * and gathering the latest CIB) that might be necessary before 318 * taking over the responsibilities of being the DC. 319 */ 320 # define A_DC_TAKEOVER 0x0000000008000000ULL 321 322 /* -- Shutdown actions -- */ 323 # define A_SHUTDOWN 0x0000000010000000ULL 324 # define A_STOP 0x0000000020000000ULL 325 # define A_EXIT_0 0x0000000040000000ULL 326 # define A_EXIT_1 0x0000000080000000ULL 327 328 # define A_SHUTDOWN_REQ 0x0000000100000000ULL 329 # define A_ELECTION_CHECK 0x0000000200000000ULL 330 # define A_DC_JOIN_FINAL 0x0000000400000000ULL 331 332 /* -- CCM actions -- */ 333 # define A_CCM_CONNECT 0x0000001000000000ULL 334 # define A_CCM_DISCONNECT 0x0000002000000000ULL 335 336 /* -- CIB actions -- */ 337 # define A_CIB_START 0x0000020000000000ULL 338 # define A_CIB_STOP 0x0000040000000000ULL 339 340 /* -- Transition Engine actions -- */ 341 /* Attempt to reach the newly calculated cluster state. This is 342 * only called once per transition (except if it is asked to 343 * stop the transition or start a new one). 344 * Once given a cluster state to reach, the TE will determine 345 * tasks that can be performed in parallel, execute them, wait 346 * for replies and then determine the next set until the new 347 * state is reached or no further tasks can be taken. 348 */ 349 # define A_TE_INVOKE 0x0000100000000000ULL 350 # define A_TE_START 0x0000200000000000ULL 351 # define A_TE_STOP 0x0000400000000000ULL 352 # define A_TE_CANCEL 0x0000800000000000ULL 353 # define A_TE_HALT 0x0001000000000000ULL 354 355 /* -- Policy Engine actions -- */ 356 /* Calculate the next state for the cluster. This is only 357 * invoked once per needed calculation. 358 */ 359 # define A_PE_INVOKE 0x0002000000000000ULL 360 # define A_PE_START 0x0004000000000000ULL 361 # define A_PE_STOP 0x0008000000000000ULL 362 /* -- Misc actions -- */ 363 /* Add a system generate "block" so that resources arent moved 364 * to or are activly moved away from the affected node. This 365 * way we can return quickly even if busy with other things. 366 */ 367 # define A_NODE_BLOCK 0x0010000000000000ULL 368 /* Update our information in the local CIB */ 369 # define A_UPDATE_NODESTATUS 0x0020000000000000ULL 370 # define A_CIB_BUMPGEN 0x0040000000000000ULL 371 # define A_READCONFIG 0x0080000000000000ULL 372 373 /* -- LRM Actions -- */ 374 /* Connect to the Local Resource Manager */ 375 # define A_LRM_CONNECT 0x0100000000000000ULL 376 /* Disconnect from the Local Resource Manager */ 377 # define A_LRM_DISCONNECT 0x0200000000000000ULL 378 # define A_LRM_INVOKE 0x0400000000000000ULL 379 # define A_LRM_EVENT 0x0800000000000000ULL 380 381 /* -- Logging actions -- */ 382 # define A_LOG 0x1000000000000000ULL 383 # define A_ERROR 0x2000000000000000ULL 384 # define A_WARN 0x4000000000000000ULL 385 386 # define O_EXIT (A_SHUTDOWN|A_STOP|A_CCM_DISCONNECT|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP) 387 # define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED) 388 # define O_PE_RESTART (A_PE_START|A_PE_STOP) 389 # define O_TE_RESTART (A_TE_START|A_TE_STOP) 390 # define O_CIB_RESTART (A_CIB_START|A_CIB_STOP) 391 # define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT) 392 # define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START) 393 /*====================================== 394 * 395 * "register" contents 396 * 397 * Things we may want to remember regardless of which state we are in. 398 * 399 * These also count as inputs for synthesizing I_* 400 * 401 *======================================*/ 402 # define R_THE_DC 0x00000001ULL 403 /* Are we the DC? */ 404 # define R_STARTING 0x00000002ULL 405 /* Are we starting up? */ 406 # define R_SHUTDOWN 0x00000004ULL 407 /* Are we trying to shut down? */ 408 # define R_STAYDOWN 0x00000008ULL 409 /* Should we restart? */ 410 411 # define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */ 412 # define R_READ_CONFIG 0x00000040ULL 413 # define R_INVOKE_PE 0x00000080ULL 414 /* Does the PE needed to be invoked at 415 the next appropriate point? */ 416 417 # define R_CIB_CONNECTED 0x00000100ULL 418 /* Is the CIB connected? */ 419 # define R_PE_CONNECTED 0x00000200ULL 420 /* Is the Policy Engine connected? */ 421 # define R_TE_CONNECTED 0x00000400ULL 422 /* Is the Transition Engine connected? */ 423 # define R_LRM_CONNECTED 0x00000800ULL 424 /* Is the Local Resource Manager 425 connected? */ 426 427 # define R_CIB_REQUIRED 0x00001000ULL 428 /* Is the CIB required? */ 429 # define R_PE_REQUIRED 0x00002000ULL 430 /* Is the Policy Engine required? */ 431 # define R_TE_REQUIRED 0x00004000ULL 432 /* Is the Transition Engine required? */ 433 # define R_ST_REQUIRED 0x00008000ULL 434 /* Is the Stonith daemon required? */ 435 436 # define R_CIB_DONE 0x00010000ULL 437 /* Have we calculated the CIB? */ 438 # define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */ 439 # define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */ 440 441 # define R_MEMBERSHIP 0x00100000ULL /* Have we got CCM data yet */ 442 # define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */ 443 444 # define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */ 445 # define R_CCM_DISCONNECTED 0x00800000ULL /* did we sign out of our own accord */ 446 447 # define R_REQ_PEND 0x01000000ULL 448 /* Are there Requests waiting for 449 processing? */ 450 # define R_PE_PEND 0x02000000ULL 451 /* Has the PE been invoked and we're 452 awaiting a reply? */ 453 # define R_TE_PEND 0x04000000ULL 454 /* Has the TE been invoked and we're 455 awaiting completion? */ 456 # define R_RESP_PEND 0x08000000ULL 457 /* Do we have clients waiting on a 458 response? if so perhaps we shouldn't 459 stop yet */ 460 461 # define R_IN_TRANSITION 0x10000000ULL 462 /* */ 463 # define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all 464 * resources in preparation for 465 * shutting down */ 466 467 # define R_IN_RECOVERY 0x80000000ULL 468 469 /* 470 * Magic RC used within CRMd to indicate direct nacks 471 * (operation is invalid in current state) 472 */ 473 #define CRM_DIRECT_NACK_RC (99) 474 475 enum crmd_fsa_cause { 476 C_UNKNOWN = 0, 477 C_STARTUP, 478 C_IPC_MESSAGE, 479 C_HA_MESSAGE, 480 C_CCM_CALLBACK, 481 C_CRMD_STATUS_CALLBACK, 482 C_LRM_OP_CALLBACK, 483 C_LRM_MONITOR_CALLBACK, 484 C_TIMER_POPPED, 485 C_SHUTDOWN, 486 C_HEARTBEAT_FAILED, 487 C_SUBSYSTEM_CONNECT, 488 C_HA_DISCONNECT, 489 C_FSA_INTERNAL, 490 C_ILLEGAL 491 }; 492 493 extern const char *fsa_input2string(enum crmd_fsa_input input); 494 extern const char *fsa_state2string(enum crmd_fsa_state state); 495 extern const char *fsa_cause2string(enum crmd_fsa_cause cause); 496 extern const char *fsa_action2string(long long action); 497 498 #endif