1 #include <crm/common/mainloop.h>
2
3 /*!
4 * \internal
5 * \brief Check to see if target was fenced in the last few seconds.
6 * \param tolerance, The number of seconds to look back in time
7 * \param target, The node to search for
8 * \param action, The action we want to match.
9 *
10 * \retval FALSE, not match
11 * \retval TRUE, fencing operation took place in the last 'tolerance' number of seconds.
12 */
13 gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action);
14
15 enum st_device_flags
16 {
17 st_device_supports_list = 0x0001,
18 st_device_supports_status = 0x0002,
19 st_device_supports_reboot = 0x0004,
20 };
21
22 typedef struct stonith_device_s {
23 char *id;
24 char *agent;
25 char *namespace;
26
27 /*! list of actions that must execute on the target node. Used for unfencing */
28 char *on_target_actions;
29 GListPtr targets;
30 time_t targets_age;
31 gboolean has_attr_map;
32 /* should nodeid parameter for victim be included in agent arguments */
33 gboolean include_nodeid;
34 /* whether the cluster should automatically unfence nodes with the device */
35 gboolean automatic_unfencing;
36 guint priority;
37
38 enum st_device_flags flags;
39
40 GHashTable *params;
41 GHashTable *aliases;
42 GList *pending_ops;
43 crm_trigger_t *work;
44 xmlNode *agent_metadata;
45
46 /*! A verified device is one that has contacted the
47 * agent successfully to perform a monitor operation */
48 gboolean verified;
49
50 gboolean cib_registered;
51 gboolean api_registered;
52 } stonith_device_t;
53
54 /* These values are used to index certain arrays by "phase". Usually an
55 * operation has only one "phase", so phase is always zero. However, some
56 * reboots are remapped to "off" then "on", in which case "reboot" will be
57 * phase 0, "off" will be phase 1 and "on" will be phase 2.
58 */
59 enum st_remap_phase {
60 st_phase_requested = 0,
61 st_phase_off = 1,
62 st_phase_on = 2,
63 st_phase_max = 3
64 };
65
66 typedef struct remote_fencing_op_s {
67 /* The unique id associated with this operation */
68 char *id;
69 /*! The node this operation will fence */
70 char *target;
71 /*! The fencing action to perform on the target. (reboot, on, off) */
72 char *action;
73
74 /*! When was the fencing action recorded (seconds since epoch) */
75 time_t created;
76
77 /*! Marks if the final notifications have been sent to local stonith clients. */
78 gboolean notify_sent;
79 /*! The number of query replies received */
80 guint replies;
81 /*! The number of query replies expected */
82 guint replies_expected;
83 /*! Does this node own control of this operation */
84 gboolean owner;
85 /*! After query is complete, This the high level timer that expires the entire operation */
86 guint op_timer_total;
87 /*! This timer expires the current fencing request. Many fencing
88 * requests may exist in a single operation */
89 guint op_timer_one;
90 /*! This timer expires the query request sent out to determine
91 * what nodes are contain what devices, and who those devices can fence */
92 guint query_timer;
93 /*! This is the default timeout to use for each fencing device if no
94 * custom timeout is received in the query. */
95 gint base_timeout;
96 /*! This is the calculated total timeout an operation can take before
97 * expiring. This is calculated by adding together all the timeout
98 * values associated with the devices this fencing operation may call */
99 gint total_timeout;
100
101 /*! Delegate is the node being asked to perform a fencing action
102 * on behalf of the node that owns the remote operation. Some operations
103 * will involve multiple delegates. This value represents the final delegate
104 * that is used. */
105 char *delegate;
106 /*! The point at which the remote operation completed */
107 time_t completed;
108 /*! The stonith_call_options associated with this remote operation */
109 long long call_options;
110
111 /*! The current state of the remote operation. This indicates
112 * what stage the op is in, query, exec, done, duplicate, failed. */
113 enum op_state state;
114 /*! The node that owns the remote operation */
115 char *originator;
116 /*! The local client id that initiated the fencing request */
117 char *client_id;
118 /*! The client's call_id that initiated the fencing request */
119 int client_callid;
120 /*! The name of client that initiated the fencing request */
121 char *client_name;
122 /*! List of the received query results for all the nodes in the cpg group */
123 GListPtr query_results;
124 /*! The original request that initiated the remote stonith operation */
125 xmlNode *request;
126
127 /*! The current topology level being executed */
128 guint level;
129 /*! The current operation phase being executed */
130 enum st_remap_phase phase;
131
132 /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */
133 GListPtr automatic_list;
134 /*! List of all devices at the currently executing topology level */
135 GListPtr devices_list;
136 /*! Current entry in the topology device list */
137 GListPtr devices;
138
139 /*! List of duplicate operations attached to this operation. Once this operation
140 * completes, the duplicate operations will be closed out as well. */
141 GListPtr duplicates;
142
143 } remote_fencing_op_t;
144
145 /*
146 * Complex fencing requirements are specified via fencing topologies.
147 * A topology consists of levels; each level is a list of fencing devices.
148 * Topologies are stored in a hash table by node name. When a node needs to be
149 * fenced, if it has an entry in the topology table, the levels are tried
150 * sequentially, and the devices in each level are tried sequentially.
151 * Fencing is considered successful as soon as any level succeeds;
152 * a level is considered successful if all its devices succeed.
153 * Essentially, all devices at a given level are "and-ed" and the
154 * levels are "or-ed".
155 *
156 * This structure is used for the topology table entries.
157 * Topology levels start from 1, so levels[0] is unused and always NULL.
158 */
159 typedef struct stonith_topology_s {
160 int kind;
161
162 /*! Node name regex or attribute name=value for which topology applies */
163 char *target;
164 char *target_value;
165 char *target_pattern;
166 char *target_attribute;
167
168 /*! Names of fencing devices at each topology level */
169 GListPtr levels[ST_LEVEL_MAX];
170
171 } stonith_topology_t;
172
173 long long get_stonith_flag(const char *name);
174
175 void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags,
176 xmlNode * op_request, const char *remote_peer);
177
178 int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib);
179
180 int stonith_device_remove(const char *id, gboolean from_cib);
181
182 char *stonith_level_key(xmlNode * msg, int mode);
183 int stonith_level_kind(xmlNode * msg);
184 int stonith_level_register(xmlNode * msg, char **desc);
185
186 int stonith_level_remove(xmlNode * msg, char **desc);
187
188 stonith_topology_t *find_topology_for_host(const char *host);
189
190 void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply,
191 gboolean from_peer);
192
193 xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data,
194 int rc);
195
196 void
197 do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout);
198
199 void do_stonith_notify(int options, const char *type, int result, xmlNode * data);
200 void do_stonith_notify_device(int options, const char *op, int rc, const char *desc);
201 void do_stonith_notify_level(int options, const char *op, int rc, const char *desc);
202
203 remote_fencing_op_t *initiate_remote_stonith_op(crm_client_t * client, xmlNode * request,
204 gboolean manual_ack);
205
206 int process_remote_stonith_exec(xmlNode * msg);
207
208 int process_remote_stonith_query(xmlNode * msg);
209
210 void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer);
211
212 int stonith_fence_history(xmlNode * msg, xmlNode ** output);
213
214 void free_device(gpointer data);
215
216 void free_topology_entry(gpointer data);
217
218 bool fencing_peer_active(crm_node_t *peer);
219
220 int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op);
221
222 void unfence_cb(GPid pid, int rc, const char *output, gpointer user_data);
223
224 gboolean string_in_list(GListPtr list, const char *item);
225
226 gboolean node_has_attr(const char *node, const char *name, const char *value);
227
228 void
229 schedule_internal_command(const char *origin,
230 stonith_device_t * device,
231 const char *action,
232 const char *victim,
233 int timeout,
234 void *internal_user_data,
235 void (*done_cb) (GPid pid, int rc, const char *output,
236 gpointer user_data));
237
238 char *stonith_get_peer_name(unsigned int nodeid);
239
240 extern char *stonith_our_uname;
241 extern gboolean stand_alone;
242 extern GHashTable *device_list;
243 extern GHashTable *topology;
244 extern long stonith_watchdog_timeout_ms;
245
246 extern GHashTable *known_peer_names;