1 /*
2 * Copyright 2009-2025 the Pacemaker project contributors
3 *
4 * This source code is licensed under the GNU General Public License version 2
5 * or later (GPLv2+) WITHOUT ANY WARRANTY.
6 */
7
8 #include <stdint.h> // uint32_t, uint64_t
9 #include <libxml/tree.h> // xmlNode
10
11 #include <crm/common/mainloop.h>
12 #include <crm/cluster.h>
13 #include <crm/stonith-ng.h>
14 #include <crm/fencing/internal.h>
15
16 /*!
17 * \internal
18 * \brief Check whether target has already been fenced recently
19 *
20 * \param[in] tolerance Number of seconds to look back in time
21 * \param[in] target Name of node to search for
22 * \param[in] action Action we want to match
23 *
24 * \return TRUE if an equivalent fencing operation took place in the last
25 * \p tolerance seconds, FALSE otherwise
26 */
27 gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
28
29 /*!
30 * \internal
31 * \brief Flags for \c fenced_device_t configuration, state, and support
32 */
33 enum fenced_device_flags {
34 //! This flag has no effect
35 fenced_df_none = UINT32_C(0),
36
37 //! Device supports list action
38 fenced_df_supports_list = (UINT32_C(1) << 0),
39
40 //! Device supports on action
41 fenced_df_supports_on = (UINT32_C(1) << 1),
42
43 //! Device supports reboot action
44 fenced_df_supports_reboot = (UINT32_C(1) << 2),
45
46 //! Device supports status action
47 fenced_df_supports_status = (UINT32_C(1) << 3),
48
49 //! Device is automatically used to unfence newly joined nodes
50 fenced_df_auto_unfence = (UINT32_C(1) << 4),
51
52 //! Device has run a successful list, status, or monitor action on this node
53 fenced_df_verified = (UINT32_C(1) << 5),
54
55 //! Device has been registered via the stonith API
56 fenced_df_api_registered = (UINT32_C(1) << 6),
57
58 //! Device has been registered via the fencer's CIB diff callback
59 fenced_df_cib_registered = (UINT32_C(1) << 7),
60
61 //! Device has not yet been re-registered after a CIB change
62 fenced_df_dirty = (UINT32_C(1) << 8),
63 };
64
65 /*!
66 * \internal
67 * \brief Set flags for a fencing device
68 *
69 * \param[in,out] device Device whose flags to set (\c fenced_device_t)
70 * \param[in] set_flags Group of <tt>enum fenced_device_flags</tt> to set
71 */
72 #define fenced_device_set_flags(device, set_flags) do { \
73 pcmk__assert((device) != NULL); \
74 (device)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
75 "Fence device", (device)->id, \
76 (device)->flags, set_flags, \
77 #set_flags); \
78 } while (0)
79
80 /*!
81 * \internal
82 * \brief Clear flags for a fencing device
83 *
84 * \param[in,out] device Device whose flags to clear (\c fenced_device_t)
85 * \param[in] clear_flags Group of <tt>enum fenced_device_flags</tt> to
86 * clear
87 */
88 #define fenced_device_clear_flags(device, clear_flags) do { \
89 pcmk__assert((device) != NULL); \
90 (device)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
91 LOG_TRACE, "Fence device", \
92 (device)->id, \
93 (device)->flags, \
94 clear_flags, #clear_flags); \
95 } while (0)
96
97 /*!
98 * \internal
99 * \brief Flags for fencer client notification types
100 */
101 enum fenced_notify_flags {
102 //! This flag has no effect
103 fenced_nf_none = UINT32_C(0),
104
105 //! Notify about fencing operation results
106 fenced_nf_fence_result = (UINT32_C(1) << 0),
107
108 // @TODO Consider notifying about device registrations via the CIB
109 //! Notify about fencing device registrations via the fencer API
110 fenced_nf_device_registered = (UINT32_C(1) << 1),
111
112 // @TODO Consider notifying about device removals via the CIB
113 //! Notify about fencing device removals via the fencer API
114 fenced_nf_device_removed = (UINT32_C(1) << 2),
115
116 //! Notify about changes to fencing history
117 fenced_nf_history_changed = (UINT32_C(1) << 3),
118
119 /* @FIXME A comment in stonith_fence_history() says its check is not
120 * conclusive: it may send a "history synced" notification when the history
121 * has not been synced. Hence "might have been synced" below. Try to find a
122 * better test.
123 */
124 //! Notify when the fencing history might have been synced
125 fenced_nf_history_synced = (UINT32_C(1) << 4),
126 };
127
128 enum fenced_notify_flags fenced_parse_notify_flag(const char *type);
129
130 typedef struct {
131 char *id;
132 char *agent;
133 char *namespace;
134
135 /*! list of actions that must execute on the target node. Used for unfencing */
136 GString *on_target_actions;
137 GList *targets;
138 time_t targets_age;
139
140 uint32_t flags; // Group of enum fenced_device_flags
141
142 GHashTable *params;
143 GHashTable *aliases;
144 GList *pending_ops;
145 mainloop_timer_t *timer;
146 crm_trigger_t *work;
147 xmlNode *agent_metadata;
148 const char *default_host_arg;
149 } fenced_device_t;
150
151 /* These values are used to index certain arrays by "phase". Usually an
152 * operation has only one "phase", so phase is always zero. However, some
153 * reboots are remapped to "off" then "on", in which case "reboot" will be
154 * phase 0, "off" will be phase 1 and "on" will be phase 2.
155 */
156 enum st_remap_phase {
157 st_phase_requested = 0,
158 st_phase_off = 1,
159 st_phase_on = 2,
160 st_phase_max = 3
161 };
162
163 typedef struct remote_fencing_op_s {
164 /* @TODO Abstract the overlap with async_command_t (some members have
165 * different names for the same thing), which should allow reducing
166 * duplication in some functions
167 */
168
169 /* The unique id associated with this operation */
170 char *id;
171 /*! The node this operation will fence */
172 char *target;
173 /*! The fencing action to perform on the target. (reboot, on, off) */
174 char *action;
175
176 /*! When was the fencing action recorded (seconds since epoch) */
177 time_t created;
178
179 /*! Marks if the final notifications have been sent to local stonith clients. */
180 gboolean notify_sent;
181 /*! The number of query replies received */
182 guint replies;
183 /*! The number of query replies expected */
184 guint replies_expected;
185 /*! Does this node own control of this operation */
186 gboolean owner;
187 /*! After query is complete, This the high level timer that expires the entire operation */
188 guint op_timer_total;
189 /*! This timer expires the current fencing request. Many fencing
190 * requests may exist in a single operation */
191 guint op_timer_one;
192 /*! This timer expires the query request sent out to determine
193 * what nodes are contain what devices, and who those devices can fence */
194 guint query_timer;
195 /*! This is the default timeout to use for each fencing device if no
196 * custom timeout is received in the query. */
197 gint base_timeout;
198 /*! This is the calculated total timeout an operation can take before
199 * expiring. This is calculated by adding together all the timeout
200 * values associated with the devices this fencing operation may call */
201 gint total_timeout;
202
203 /*!
204 * Fencing delay (in seconds) requested by API client (used by controller to
205 * implement \c PCMK_OPT_PRIORITY_FENCING_DELAY). A value of -1 means
206 * disable all configured delays.
207 */
208 int client_delay;
209
210 /*! Delegate is the node being asked to perform a fencing action
211 * on behalf of the node that owns the remote operation. Some operations
212 * will involve multiple delegates. This value represents the final delegate
213 * that is used. */
214 char *delegate;
215 /*! The point at which the remote operation completed */
216 time_t completed;
217 //! Group of enum stonith_call_options associated with this operation
218 uint32_t call_options;
219
220 /*! The current state of the remote operation. This indicates
221 * what stage the op is in, query, exec, done, duplicate, failed. */
222 enum op_state state;
223 /*! The node that owns the remote operation */
224 char *originator;
225 /*! The local client id that initiated the fencing request */
226 char *client_id;
227 /*! The client's call_id that initiated the fencing request */
228 int client_callid;
229 /*! The name of client that initiated the fencing request */
230 char *client_name;
231 /*! List of the received query results for all the nodes in the cpg group */
232 GList *query_results;
233 /*! The original request that initiated the remote stonith operation */
234 xmlNode *request;
235
236 /*! The current topology level being executed */
237 guint level;
238 /*! The current operation phase being executed */
239 enum st_remap_phase phase;
240
241 /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
242 GList *automatic_list;
243 /*! List of all devices at the currently executing topology level */
244 GList *devices_list;
245 /*! Current entry in the topology device list */
246 GList *devices;
247
248 /*! List of duplicate operations attached to this operation. Once this operation
249 * completes, the duplicate operations will be closed out as well. */
250 GList *duplicates;
251
252 /*! The point at which the remote operation completed(nsec) */
253 long long completed_nsec;
254
255 /*! The (potentially intermediate) result of the operation */
256 pcmk__action_result_t result;
257 } remote_fencing_op_t;
258
259 void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged);
260
261 // How the user specified the target of a topology level
262 enum fenced_target_by {
263 fenced_target_by_unknown = -1, // Invalid or not yet parsed
264 fenced_target_by_name, // By target name
265 fenced_target_by_pattern, // By a pattern matching target names
266 fenced_target_by_attribute, // By a node attribute/value on target
267 };
268
269 /*
270 * Complex fencing requirements are specified via fencing topologies.
271 * A topology consists of levels; each level is a list of fencing devices.
272 * Topologies are stored in a hash table by node name. When a node needs to be
273 * fenced, if it has an entry in the topology table, the levels are tried
274 * sequentially, and the devices in each level are tried sequentially.
275 * Fencing is considered successful as soon as any level succeeds;
276 * a level is considered successful if all its devices succeed.
277 * Essentially, all devices at a given level are "and-ed" and the
278 * levels are "or-ed".
279 *
280 * This structure is used for the topology table entries.
281 * Topology levels start from 1, so levels[0] is unused and always NULL.
282 */
283 typedef struct stonith_topology_s {
284 enum fenced_target_by kind; // How target was specified
285
286 /*! Node name regex or attribute name=value for which topology applies */
287 char *target;
288 char *target_value;
289 char *target_pattern;
290 char *target_attribute;
291
292 /*! Names of fencing devices at each topology level */
293 GList *levels[ST__LEVEL_COUNT];
294
295 } stonith_topology_t;
296
297 void stonith_shutdown(int nsig);
298
299 void fenced_init_device_table(void);
300 void fenced_free_device_table(void);
301 bool fenced_has_watchdog_device(void);
302 void fenced_foreach_device(GHFunc fn, gpointer user_data);
303 void fenced_foreach_device_remove(GHRFunc fn);
304
305 void init_topology_list(void);
306 void free_topology_list(void);
307 void free_stonith_remote_op_list(void);
308 void init_stonith_remote_op_hash_table(GHashTable **table);
309 void free_metadata_cache(void);
310 void fenced_unregister_handlers(void);
311
312 void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
313 xmlNode *op_request, const char *remote_peer);
314
315 int fenced_device_register(const xmlNode *dev, bool from_cib);
316
317 void stonith_device_remove(const char *id, bool from_cib);
318
319 char *stonith_level_key(const xmlNode *msg, enum fenced_target_by);
320 void fenced_register_level(xmlNode *msg, pcmk__action_result_t *result);
321 void fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result);
322
323 stonith_topology_t *find_topology_for_host(const char *host);
324
325 void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client,
326 int call_options);
327
328 xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data,
329 const pcmk__action_result_t *result);
330
331 void
332 do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
333
334 void fenced_send_notification(const char *type,
335 const pcmk__action_result_t *result,
336 xmlNode *data);
337 void fenced_send_config_notification(const char *op,
338 const pcmk__action_result_t *result,
339 const char *desc);
340
341 remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client,
342 xmlNode *request,
343 gboolean manual_ack);
344
345 void fenced_process_fencing_reply(xmlNode *msg);
346
347 int process_remote_stonith_query(xmlNode * msg);
348
349 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
350
351 void stonith_fence_history(xmlNode *msg, xmlNode **output,
352 const char *remote_peer, int options);
353
354 void stonith_fence_history_trim(void);
355
356 bool fencing_peer_active(pcmk__node_status_t *peer);
357
358 void set_fencing_completed(remote_fencing_op_t * op);
359
360 int fenced_handle_manual_confirmation(const pcmk__client_t *client,
361 xmlNode *msg);
362
363 const char *fenced_device_reboot_action(const char *device_id);
364 bool fenced_device_supports_on(const char *device_id);
365
366 gboolean node_has_attr(const char *node, const char *name, const char *value);
367
368 gboolean node_does_watchdog_fencing(const char *node);
369
370 void fencing_topology_init(void);
371 void setup_cib(void);
372 void fenced_cib_cleanup(void);
373
374 int fenced_scheduler_init(void);
375 void fenced_set_local_node(const char *node_name);
376 const char *fenced_get_local_node(void);
377 void fenced_scheduler_cleanup(void);
378 void fenced_scheduler_run(xmlNode *cib);
379
380 /*!
381 * \internal
382 * \brief Get the device flag to use with a given action when searching devices
383 *
384 * \param[in] action Action to check
385 *
386 * \return \c fenced_df_supports_on if \p action is "on", otherwise
387 * \c fenced_df_none
388 */
389 static inline uint32_t
390 fenced_support_flag(const char *action)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
391 {
392 if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) {
393 return fenced_df_supports_on;
394 }
395 return fenced_df_none;
396 }
397
398 extern GHashTable *topology;
399 extern long long stonith_watchdog_timeout_ms;
400 extern GList *stonith_watchdog_targets;
401 extern GHashTable *stonith_remote_op_list;
402 extern crm_exit_t exit_code;
403 extern gboolean stonith_shutdown_flag;