1 /*
2 * Copyright 2009-2023 the Pacemaker project contributors
3 *
4 * This source code is licensed under the GNU General Public License version 2
5 * or later (GPLv2+) WITHOUT ANY WARRANTY.
6 */
7
8 #include <stdint.h> // uint32_t, uint64_t
9 #include <crm/common/mainloop.h>
10
11 /*!
12 * \internal
13 * \brief Check whether target has already been fenced recently
14 *
15 * \param[in] tolerance Number of seconds to look back in time
16 * \param[in] target Name of node to search for
17 * \param[in] action Action we want to match
18 *
19 * \return TRUE if an equivalent fencing operation took place in the last
20 * \p tolerance seconds, FALSE otherwise
21 */
22 gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
23
24 typedef struct stonith_device_s {
25 char *id;
26 char *agent;
27 char *namespace;
28
29 /*! list of actions that must execute on the target node. Used for unfencing */
30 GString *on_target_actions;
31 GList *targets;
32 time_t targets_age;
33 gboolean has_attr_map;
34
35 // Whether target's nodeid should be passed as a parameter to the agent
36 gboolean include_nodeid;
37
38 /* whether the cluster should automatically unfence nodes with the device */
39 gboolean automatic_unfencing;
40 guint priority;
41
42 uint32_t flags; // Group of enum st_device_flags
43
44 GHashTable *params;
45 GHashTable *aliases;
46 GList *pending_ops;
47 mainloop_timer_t *timer;
48 crm_trigger_t *work;
49 xmlNode *agent_metadata;
50
51 /*! A verified device is one that has contacted the
52 * agent successfully to perform a monitor operation */
53 gboolean verified;
54
55 gboolean cib_registered;
56 gboolean api_registered;
57 gboolean dirty;
58 } stonith_device_t;
59
60 /* These values are used to index certain arrays by "phase". Usually an
61 * operation has only one "phase", so phase is always zero. However, some
62 * reboots are remapped to "off" then "on", in which case "reboot" will be
63 * phase 0, "off" will be phase 1 and "on" will be phase 2.
64 */
65 enum st_remap_phase {
66 st_phase_requested = 0,
67 st_phase_off = 1,
68 st_phase_on = 2,
69 st_phase_max = 3
70 };
71
72 typedef struct remote_fencing_op_s {
73 /* The unique id associated with this operation */
74 char *id;
75 /*! The node this operation will fence */
76 char *target;
77 /*! The fencing action to perform on the target. (reboot, on, off) */
78 char *action;
79
80 /*! When was the fencing action recorded (seconds since epoch) */
81 time_t created;
82
83 /*! Marks if the final notifications have been sent to local stonith clients. */
84 gboolean notify_sent;
85 /*! The number of query replies received */
86 guint replies;
87 /*! The number of query replies expected */
88 guint replies_expected;
89 /*! Does this node own control of this operation */
90 gboolean owner;
91 /*! After query is complete, This the high level timer that expires the entire operation */
92 guint op_timer_total;
93 /*! This timer expires the current fencing request. Many fencing
94 * requests may exist in a single operation */
95 guint op_timer_one;
96 /*! This timer expires the query request sent out to determine
97 * what nodes are contain what devices, and who those devices can fence */
98 guint query_timer;
99 /*! This is the default timeout to use for each fencing device if no
100 * custom timeout is received in the query. */
101 gint base_timeout;
102 /*! This is the calculated total timeout an operation can take before
103 * expiring. This is calculated by adding together all the timeout
104 * values associated with the devices this fencing operation may call */
105 gint total_timeout;
106
107 /*! Requested fencing delay.
108 * Value -1 means disable any static/random fencing delays. */
109 int delay;
110
111 /*! Delegate is the node being asked to perform a fencing action
112 * on behalf of the node that owns the remote operation. Some operations
113 * will involve multiple delegates. This value represents the final delegate
114 * that is used. */
115 char *delegate;
116 /*! The point at which the remote operation completed */
117 time_t completed;
118 //! Group of enum stonith_call_options associated with this operation
119 uint32_t call_options;
120
121 /*! The current state of the remote operation. This indicates
122 * what stage the op is in, query, exec, done, duplicate, failed. */
123 enum op_state state;
124 /*! The node that owns the remote operation */
125 char *originator;
126 /*! The local client id that initiated the fencing request */
127 char *client_id;
128 /*! The client's call_id that initiated the fencing request */
129 int client_callid;
130 /*! The name of client that initiated the fencing request */
131 char *client_name;
132 /*! List of the received query results for all the nodes in the cpg group */
133 GList *query_results;
134 /*! The original request that initiated the remote stonith operation */
135 xmlNode *request;
136
137 /*! The current topology level being executed */
138 guint level;
139 /*! The current operation phase being executed */
140 enum st_remap_phase phase;
141
142 /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
143 GList *automatic_list;
144 /*! List of all devices at the currently executing topology level */
145 GList *devices_list;
146 /*! Current entry in the topology device list */
147 GList *devices;
148
149 /*! List of duplicate operations attached to this operation. Once this operation
150 * completes, the duplicate operations will be closed out as well. */
151 GList *duplicates;
152
153 /*! The point at which the remote operation completed(nsec) */
154 long long completed_nsec;
155
156 /*! The (potentially intermediate) result of the operation */
157 pcmk__action_result_t result;
158 } remote_fencing_op_t;
159
160 void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged);
161
162 // Fencer-specific client flags
163 enum st_client_flags {
164 st_callback_unknown = UINT64_C(0),
165 st_callback_notify_fence = (UINT64_C(1) << 0),
166 st_callback_device_add = (UINT64_C(1) << 2),
167 st_callback_device_del = (UINT64_C(1) << 4),
168 st_callback_notify_history = (UINT64_C(1) << 5),
169 st_callback_notify_history_synced = (UINT64_C(1) << 6)
170 };
171
172 // How the user specified the target of a topology level
173 enum fenced_target_by {
174 fenced_target_by_unknown = -1, // Invalid or not yet parsed
175 fenced_target_by_name, // By target name
176 fenced_target_by_pattern, // By a pattern matching target names
177 fenced_target_by_attribute, // By a node attribute/value on target
178 };
179
180 /*
181 * Complex fencing requirements are specified via fencing topologies.
182 * A topology consists of levels; each level is a list of fencing devices.
183 * Topologies are stored in a hash table by node name. When a node needs to be
184 * fenced, if it has an entry in the topology table, the levels are tried
185 * sequentially, and the devices in each level are tried sequentially.
186 * Fencing is considered successful as soon as any level succeeds;
187 * a level is considered successful if all its devices succeed.
188 * Essentially, all devices at a given level are "and-ed" and the
189 * levels are "or-ed".
190 *
191 * This structure is used for the topology table entries.
192 * Topology levels start from 1, so levels[0] is unused and always NULL.
193 */
194 typedef struct stonith_topology_s {
195 enum fenced_target_by kind; // How target was specified
196
197 /*! Node name regex or attribute name=value for which topology applies */
198 char *target;
199 char *target_value;
200 char *target_pattern;
201 char *target_attribute;
202
203 /*! Names of fencing devices at each topology level */
204 GList *levels[ST_LEVEL_MAX];
205
206 } stonith_topology_t;
207
208 void init_device_list(void);
209 void free_device_list(void);
210 void init_topology_list(void);
211 void free_topology_list(void);
212 void free_stonith_remote_op_list(void);
213 void init_stonith_remote_op_hash_table(GHashTable **table);
214 void free_metadata_cache(void);
215 void fenced_unregister_handlers(void);
216
217 uint64_t get_stonith_flag(const char *name);
218
219 void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
220 xmlNode *op_request, const char *remote_peer);
221
222 int stonith_device_register(xmlNode *msg, gboolean from_cib);
223
224 void stonith_device_remove(const char *id, bool from_cib);
225
226 char *stonith_level_key(const xmlNode *msg, enum fenced_target_by);
227 void fenced_register_level(xmlNode *msg, char **desc,
228 pcmk__action_result_t *result);
229 void fenced_unregister_level(xmlNode *msg, char **desc,
230 pcmk__action_result_t *result);
231
232 stonith_topology_t *find_topology_for_host(const char *host);
233
234 void do_local_reply(xmlNode *notify_src, pcmk__client_t *client,
235 int call_options);
236
237 xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data,
238 const pcmk__action_result_t *result);
239
240 void
241 do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
242
243 void fenced_send_notification(const char *type,
244 const pcmk__action_result_t *result,
245 xmlNode *data);
246 void fenced_send_device_notification(const char *op,
247 const pcmk__action_result_t *result,
248 const char *desc);
249 void fenced_send_level_notification(const char *op,
250 const pcmk__action_result_t *result,
251 const char *desc);
252
253 remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client,
254 xmlNode *request,
255 gboolean manual_ack);
256
257 void fenced_process_fencing_reply(xmlNode *msg);
258
259 int process_remote_stonith_query(xmlNode * msg);
260
261 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
262
263 void stonith_fence_history(xmlNode *msg, xmlNode **output,
264 const char *remote_peer, int options);
265
266 void stonith_fence_history_trim(void);
267
268 bool fencing_peer_active(crm_node_t *peer);
269
270 void set_fencing_completed(remote_fencing_op_t * op);
271
272 int fenced_handle_manual_confirmation(const pcmk__client_t *client,
273 xmlNode *msg);
274 void fencer_metadata(void);
275
276 const char *fenced_device_reboot_action(const char *device_id);
277 bool fenced_device_supports_on(const char *device_id);
278
279 gboolean node_has_attr(const char *node, const char *name, const char *value);
280
281 gboolean node_does_watchdog_fencing(const char *node);
282
283 static inline void
284 fenced_set_protocol_error(pcmk__action_result_t *result)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
285 {
286 pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
287 "Fencer API request missing required information (bug?)");
288 }
289
290 /*!
291 * \internal
292 * \brief Get the device flag to use with a given action when searching devices
293 *
294 * \param[in] action Action to check
295 *
296 * \return st_device_supports_on if \p action is "on", otherwise
297 * st_device_supports_none
298 */
299 static inline uint32_t
300 fenced_support_flag(const char *action)
/* ![[previous]](../icons/left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
301 {
302 if (pcmk__str_eq(action, "on", pcmk__str_none)) {
303 return st_device_supports_on;
304 }
305 return st_device_supports_none;
306 }
307
308 extern char *stonith_our_uname;
309 extern gboolean stand_alone;
310 extern GHashTable *device_list;
311 extern GHashTable *topology;
312 extern long stonith_watchdog_timeout_ms;
313 extern GList *stonith_watchdog_targets;
314
315 extern GHashTable *stonith_remote_op_list;