1 /*
2 * Copyright 2009-2022 the Pacemaker project contributors
3 *
4 * This source code is licensed under the GNU General Public License version 2
5 * or later (GPLv2+) WITHOUT ANY WARRANTY.
6 */
7
8 #include <stdint.h> // uint32_t, uint64_t
9 #include <crm/common/mainloop.h>
10
11 /*!
12 * \internal
13 * \brief Check whether target has already been fenced recently
14 *
15 * \param[in] tolerance Number of seconds to look back in time
16 * \param[in] target Name of node to search for
17 * \param[in] action Action we want to match
18 *
19 * \return TRUE if an equivalent fencing operation took place in the last
20 * \p tolerance seconds, FALSE otherwise
21 */
22 gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
23
24 typedef struct stonith_device_s {
25 char *id;
26 char *agent;
27 char *namespace;
28
29 /*! list of actions that must execute on the target node. Used for unfencing */
30 char *on_target_actions;
31 GList *targets;
32 time_t targets_age;
33 gboolean has_attr_map;
34 /* should nodeid parameter for victim be included in agent arguments */
35 gboolean include_nodeid;
36 /* whether the cluster should automatically unfence nodes with the device */
37 gboolean automatic_unfencing;
38 guint priority;
39
40 uint32_t flags; // Group of enum st_device_flags
41
42 GHashTable *params;
43 GHashTable *aliases;
44 GList *pending_ops;
45 mainloop_timer_t *timer;
46 crm_trigger_t *work;
47 xmlNode *agent_metadata;
48
49 /*! A verified device is one that has contacted the
50 * agent successfully to perform a monitor operation */
51 gboolean verified;
52
53 gboolean cib_registered;
54 gboolean api_registered;
55 gboolean dirty;
56 } stonith_device_t;
57
58 /* These values are used to index certain arrays by "phase". Usually an
59 * operation has only one "phase", so phase is always zero. However, some
60 * reboots are remapped to "off" then "on", in which case "reboot" will be
61 * phase 0, "off" will be phase 1 and "on" will be phase 2.
62 */
63 enum st_remap_phase {
64 st_phase_requested = 0,
65 st_phase_off = 1,
66 st_phase_on = 2,
67 st_phase_max = 3
68 };
69
70 typedef struct remote_fencing_op_s {
71 /* The unique id associated with this operation */
72 char *id;
73 /*! The node this operation will fence */
74 char *target;
75 /*! The fencing action to perform on the target. (reboot, on, off) */
76 char *action;
77
78 /*! When was the fencing action recorded (seconds since epoch) */
79 time_t created;
80
81 /*! Marks if the final notifications have been sent to local stonith clients. */
82 gboolean notify_sent;
83 /*! The number of query replies received */
84 guint replies;
85 /*! The number of query replies expected */
86 guint replies_expected;
87 /*! Does this node own control of this operation */
88 gboolean owner;
89 /*! After query is complete, This the high level timer that expires the entire operation */
90 guint op_timer_total;
91 /*! This timer expires the current fencing request. Many fencing
92 * requests may exist in a single operation */
93 guint op_timer_one;
94 /*! This timer expires the query request sent out to determine
95 * what nodes are contain what devices, and who those devices can fence */
96 guint query_timer;
97 /*! This is the default timeout to use for each fencing device if no
98 * custom timeout is received in the query. */
99 gint base_timeout;
100 /*! This is the calculated total timeout an operation can take before
101 * expiring. This is calculated by adding together all the timeout
102 * values associated with the devices this fencing operation may call */
103 gint total_timeout;
104
105 /*! Requested fencing delay.
106 * Value -1 means disable any static/random fencing delays. */
107 int delay;
108
109 /*! Delegate is the node being asked to perform a fencing action
110 * on behalf of the node that owns the remote operation. Some operations
111 * will involve multiple delegates. This value represents the final delegate
112 * that is used. */
113 char *delegate;
114 /*! The point at which the remote operation completed */
115 time_t completed;
116 //! Group of enum stonith_call_options associated with this operation
117 uint32_t call_options;
118
119 /*! The current state of the remote operation. This indicates
120 * what stage the op is in, query, exec, done, duplicate, failed. */
121 enum op_state state;
122 /*! The node that owns the remote operation */
123 char *originator;
124 /*! The local client id that initiated the fencing request */
125 char *client_id;
126 /*! The client's call_id that initiated the fencing request */
127 int client_callid;
128 /*! The name of client that initiated the fencing request */
129 char *client_name;
130 /*! List of the received query results for all the nodes in the cpg group */
131 GList *query_results;
132 /*! The original request that initiated the remote stonith operation */
133 xmlNode *request;
134
135 /*! The current topology level being executed */
136 guint level;
137 /*! The current operation phase being executed */
138 enum st_remap_phase phase;
139
140 /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
141 GList *automatic_list;
142 /*! List of all devices at the currently executing topology level */
143 GList *devices_list;
144 /*! Current entry in the topology device list */
145 GList *devices;
146
147 /*! List of duplicate operations attached to this operation. Once this operation
148 * completes, the duplicate operations will be closed out as well. */
149 GList *duplicates;
150
151 /*! The point at which the remote operation completed(nsec) */
152 long long completed_nsec;
153
154 /*! The (potentially intermediate) result of the operation */
155 pcmk__action_result_t result;
156 } remote_fencing_op_t;
157
158 void fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged);
159
160 // Fencer-specific client flags
161 enum st_client_flags {
162 st_callback_unknown = UINT64_C(0),
163 st_callback_notify_fence = (UINT64_C(1) << 0),
164 st_callback_device_add = (UINT64_C(1) << 2),
165 st_callback_device_del = (UINT64_C(1) << 4),
166 st_callback_notify_history = (UINT64_C(1) << 5),
167 st_callback_notify_history_synced = (UINT64_C(1) << 6)
168 };
169
170 // How the user specified the target of a topology level
171 enum fenced_target_by {
172 fenced_target_by_unknown = -1, // Invalid or not yet parsed
173 fenced_target_by_name, // By target name
174 fenced_target_by_pattern, // By a pattern matching target names
175 fenced_target_by_attribute, // By a node attribute/value on target
176 };
177
178 /*
179 * Complex fencing requirements are specified via fencing topologies.
180 * A topology consists of levels; each level is a list of fencing devices.
181 * Topologies are stored in a hash table by node name. When a node needs to be
182 * fenced, if it has an entry in the topology table, the levels are tried
183 * sequentially, and the devices in each level are tried sequentially.
184 * Fencing is considered successful as soon as any level succeeds;
185 * a level is considered successful if all its devices succeed.
186 * Essentially, all devices at a given level are "and-ed" and the
187 * levels are "or-ed".
188 *
189 * This structure is used for the topology table entries.
190 * Topology levels start from 1, so levels[0] is unused and always NULL.
191 */
192 typedef struct stonith_topology_s {
193 enum fenced_target_by kind; // How target was specified
194
195 /*! Node name regex or attribute name=value for which topology applies */
196 char *target;
197 char *target_value;
198 char *target_pattern;
199 char *target_attribute;
200
201 /*! Names of fencing devices at each topology level */
202 GList *levels[ST_LEVEL_MAX];
203
204 } stonith_topology_t;
205
206 void init_device_list(void);
207 void free_device_list(void);
208 void init_topology_list(void);
209 void free_topology_list(void);
210 void free_stonith_remote_op_list(void);
211 void init_stonith_remote_op_hash_table(GHashTable **table);
212 void free_metadata_cache(void);
213 void fenced_unregister_handlers(void);
214
215 uint64_t get_stonith_flag(const char *name);
216
217 void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags,
218 xmlNode *op_request, const char *remote_peer);
219
220 int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
221
222 void stonith_device_remove(const char *id, bool from_cib);
223
224 char *stonith_level_key(xmlNode * msg, int mode);
225 void fenced_register_level(xmlNode *msg, char **desc,
226 pcmk__action_result_t *result);
227 void fenced_unregister_level(xmlNode *msg, char **desc,
228 pcmk__action_result_t *result);
229
230 stonith_topology_t *find_topology_for_host(const char *host);
231
232 void do_local_reply(xmlNode *notify_src, pcmk__client_t *client,
233 int call_options);
234
235 xmlNode *fenced_construct_reply(xmlNode *request, xmlNode *data,
236 pcmk__action_result_t *result);
237
238 void
239 do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
240
241 void fenced_send_notification(const char *type,
242 const pcmk__action_result_t *result,
243 xmlNode *data);
244 void fenced_send_device_notification(const char *op,
245 const pcmk__action_result_t *result,
246 const char *desc);
247 void fenced_send_level_notification(const char *op,
248 const pcmk__action_result_t *result,
249 const char *desc);
250
251 remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client,
252 xmlNode *request,
253 gboolean manual_ack);
254
255 void fenced_process_fencing_reply(xmlNode *msg);
256
257 int process_remote_stonith_query(xmlNode * msg);
258
259 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
260
261 void stonith_fence_history(xmlNode *msg, xmlNode **output,
262 const char *remote_peer, int options);
263
264 void stonith_fence_history_trim(void);
265
266 bool fencing_peer_active(crm_node_t *peer);
267
268 void set_fencing_completed(remote_fencing_op_t * op);
269
270 int fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg);
271
272 gboolean node_has_attr(const char *node, const char *name, const char *value);
273
274 gboolean node_does_watchdog_fencing(const char *node);
275
276 static inline void
277 fenced_set_protocol_error(pcmk__action_result_t *result)
/* ![[previous]](../icons/n_left.png)
![[next]](../icons/n_right.png)
![[first]](../icons/n_first.png)
![[last]](../icons/n_last.png)
![[top]](../icons/top.png)
![[bottom]](../icons/bottom.png)
![[index]](../icons/index.png)
*/
278 {
279 pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID,
280 "Fencer API request missing required information (bug?)");
281 }
282
283 extern char *stonith_our_uname;
284 extern gboolean stand_alone;
285 extern GHashTable *device_list;
286 extern GHashTable *topology;
287 extern long stonith_watchdog_timeout_ms;
288 extern GList *stonith_watchdog_targets;
289
290 extern GHashTable *stonith_remote_op_list;