root/crmd/join_client.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. update_dc_expected
  2. do_cl_join_query
  3. do_cl_join_announce
  4. do_cl_join_offer_respond
  5. join_query_callback
  6. set_join_state
  7. do_cl_join_finalize_respond

   1 /*
   2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This software is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 #include <crm_internal.h>
  19 
  20 #include <crm/crm.h>
  21 #include <crm/cib.h>
  22 #include <crm/msg_xml.h>
  23 #include <crm/common/xml.h>
  24 
  25 #include <crmd_fsa.h>
  26 #include <crmd_messages.h>
  27 
  28 int reannounce_count = 0;
  29 void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
  30 
  31 extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
  32 
  33 /*!
  34  * \internal
  35  * \brief Remember if DC is shutting down as we join
  36  *
  37  * If we're joining while the current DC is shutting down, update its expected
  38  * state, so we don't fence it if we become the new DC. (We weren't a peer
  39  * when it broadcast its shutdown request.)
  40  *
  41  * \param[in] msg  A join message from the DC
  42  */
  43 static void
  44 update_dc_expected(xmlNode *msg)
     /* [previous][next][first][last][top][bottom][index][help] */
  45 {
  46     if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) {
  47         crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc);
  48 
  49         crm_update_peer_expected(__FUNCTION__, dc_node, CRMD_JOINSTATE_DOWN);
  50     }
  51 }
  52 
  53 /*      A_CL_JOIN_QUERY         */
  54 /* is there a DC out there? */
  55 void
  56 do_cl_join_query(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  57                  enum crmd_fsa_cause cause,
  58                  enum crmd_fsa_state cur_state,
  59                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  60 {
  61     xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
  62                                   CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
  63 
  64     sleep(1);                   /* give the CCM time to propogate to the DC */
  65     update_dc(NULL);            /* Unset any existing value so that the result is not discarded */
  66     crm_debug("Querying for a DC");
  67     send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
  68     free_xml(req);
  69 }
  70 
  71 /*       A_CL_JOIN_ANNOUNCE     */
  72 
  73 /* this is kind of a workaround for the fact that we may not be around or
  74  * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
  75  */
  76 void
  77 do_cl_join_announce(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  78                     enum crmd_fsa_cause cause,
  79                     enum crmd_fsa_state cur_state,
  80                     enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  81 {
  82     /* Once we hear from the DC, we can stop the timer
  83      *
  84      * This timer was started either on startup or when a node
  85      * left the CCM list
  86      */
  87 
  88     /* don't announce if we're in one of these states */
  89     if (cur_state != S_PENDING) {
  90         crm_warn("Not announcing cluster join because in state %s",
  91                  fsa_state2string(cur_state));
  92         return;
  93     }
  94 
  95     if (AM_I_OPERATIONAL) {
  96         /* send as a broadcast */
  97         xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
  98                                       CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
  99 
 100         crm_debug("Announcing availability");
 101         update_dc(NULL);
 102         send_cluster_message(NULL, crm_msg_crmd, req, FALSE);
 103         free_xml(req);
 104 
 105     } else {
 106         /* Delay announce until we have finished local startup */
 107         crm_warn("Delaying announce of cluster join until local startup is complete");
 108         return;
 109     }
 110 }
 111 
 112 static int query_call_id = 0;
 113 
 114 /*       A_CL_JOIN_REQUEST      */
 115 /* aka. accept the welcome offer */
 116 void
 117 do_cl_join_offer_respond(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 118                          enum crmd_fsa_cause cause,
 119                          enum crmd_fsa_state cur_state,
 120                          enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 121 {
 122     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
 123     const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
 124     const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID);
 125 
 126 #if 0
 127     if (we are sick) {
 128         log error;
 129 
 130         /* save the request for later? */
 131         return;
 132     }
 133 #endif
 134 
 135     crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s",
 136               welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID));
 137 
 138     /* we only ever want the last one */
 139     if (query_call_id > 0) {
 140         crm_trace("Cancelling previous join query: %d", query_call_id);
 141         remove_cib_op_callback(query_call_id, FALSE);
 142         query_call_id = 0;
 143     }
 144 
 145     if (update_dc(input->msg) == FALSE) {
 146         crm_warn("Discarding cluster join offer from node %s (expected %s)",
 147                  welcome_from, fsa_our_dc);
 148         return;
 149     }
 150 
 151     update_dc_expected(input->msg);
 152 
 153     CRM_LOG_ASSERT(input != NULL);
 154     query_call_id =
 155         fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children);
 156     fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback);
 157     crm_trace("Registered join query callback: %d", query_call_id);
 158 
 159     register_fsa_action(A_DC_TIMER_STOP);
 160 }
 161 
 162 void
 163 join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 164 {
 165     char *join_id = user_data;
 166     xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE);
 167 
 168     CRM_LOG_ASSERT(join_id != NULL);
 169 
 170     if (query_call_id != call_id) {
 171         crm_trace("Query %d superseded", call_id);
 172         goto done;
 173     }
 174 
 175     query_call_id = 0;
 176     if(rc != pcmk_ok || output == NULL) {
 177         crm_err("Could not retrieve version details for join-%s: %s (%d)",
 178                 join_id, pcmk_strerror(rc), rc);
 179         register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__);
 180 
 181     } else if (fsa_our_dc == NULL) {
 182         crm_debug("Membership is in flux, not continuing join-%s", join_id);
 183 
 184     } else {
 185         xmlNode *reply = NULL;
 186 
 187         crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc);
 188         copy_in_properties(generation, output);
 189 
 190         reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc,
 191                                CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
 192 
 193         crm_xml_add(reply, F_CRM_JOIN_ID, join_id);
 194         send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
 195         free_xml(reply);
 196     }
 197 
 198   done:
 199     free_xml(generation);
 200 }
 201 
 202 static void
 203 set_join_state(const char * start_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 204 {
 205     if (safe_str_eq(start_state, "standby")) {
 206         crm_notice("Forcing node %s to join in %s state per configured environment",
 207                    fsa_our_uname, start_state);
 208         update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
 209                              NULL, NULL, NULL, "standby", "on", TRUE, NULL, NULL);
 210 
 211     } else if (safe_str_eq(start_state, "online")) {
 212         crm_notice("Forcing node %s to join in %s state per configured environment",
 213                    fsa_our_uname, start_state);
 214         update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid,
 215                              NULL, NULL, NULL, "standby", "off", TRUE, NULL, NULL);
 216 
 217     } else if (safe_str_eq(start_state, "default")) {
 218         crm_debug("Not forcing a starting state on node %s", fsa_our_uname);
 219 
 220     } else {
 221         crm_warn("Unrecognized start state '%s', using 'default' (%s)",
 222                  start_state, fsa_our_uname);
 223     }
 224 }
 225 
 226 /*      A_CL_JOIN_RESULT        */
 227 /* aka. this is notification that we have (or have not) been accepted */
 228 void
 229 do_cl_join_finalize_respond(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 230                             enum crmd_fsa_cause cause,
 231                             enum crmd_fsa_state cur_state,
 232                             enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 233 {
 234     xmlNode *tmp1 = NULL;
 235     gboolean was_nack = TRUE;
 236     static gboolean first_join = TRUE;
 237     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
 238     const char *start_state = daemon_option("node_start_state");
 239 
 240     int join_id = -1;
 241     const char *op = crm_element_value(input->msg, F_CRM_TASK);
 242     const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK);
 243     const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM);
 244 
 245     if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) {
 246         crm_trace("Ignoring op=%s message", op);
 247         return;
 248     }
 249 
 250     /* calculate if it was an ack or a nack */
 251     if (crm_is_true(ack_nack)) {
 252         was_nack = FALSE;
 253     }
 254 
 255     crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id);
 256 
 257     if (was_nack) {
 258         crm_err("Shutting down because cluster join with leader %s failed "
 259                 CRM_XS" join-%d NACK'd", welcome_from, join_id);
 260         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 261         return;
 262     }
 263 
 264     if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) {
 265         crm_warn("Discarding our own welcome - we're no longer the DC");
 266         return;
 267     }
 268 
 269     if (update_dc(input->msg) == FALSE) {
 270         crm_warn("Discarding %s from node %s (expected from %s)",
 271                  op, welcome_from, fsa_our_dc);
 272         return;
 273     }
 274 
 275     update_dc_expected(input->msg);
 276 
 277     /* send our status section to the DC */
 278     tmp1 = do_lrm_query(TRUE, fsa_our_uname);
 279     if (tmp1 != NULL) {
 280         xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc,
 281                                         CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
 282 
 283         crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id);
 284 
 285         crm_debug("Confirming join-%d: sending local operation history to %s",
 286                   join_id, fsa_our_dc);
 287 
 288         /*
 289          * If this is the node's first join since the crmd started on it,
 290          * set its initial state (standby or member) according to the user's
 291          * preference.
 292          *
 293          * We do not clear the LRM history here. Even if the DC failed to do it
 294          * when we last left, removing them here creates a race condition if the
 295          * crmd is being recovered. Instead of a list of active resources from
 296          * the lrmd, we may end up with a blank status section. If we are _NOT_
 297          * lucky, we will probe for the "wrong" instance of anonymous clones and
 298          * end up with multiple active instances on the machine.
 299          */
 300         if (first_join && is_not_set(fsa_input_register, R_SHUTDOWN)) {
 301             first_join = FALSE;
 302 #if !HAVE_ATOMIC_ATTRD
 303             /* c9d1c3cd made this unnecessary for atomic attrd.
 304              * This means that the issue addressed by that commit is still
 305              * present for legacy attrd, but given legacy attrd's imminent
 306              * demise, this is preferable to making intrusive changes to it.
 307              */
 308             erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0);
 309             update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE);
 310             update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE);
 311 #endif
 312             if (start_state) {
 313                 set_join_state(start_state);
 314             }
 315         }
 316 
 317         send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE);
 318         free_xml(reply);
 319 
 320         if (AM_I_DC == FALSE) {
 321             register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__);
 322 #if !HAVE_ATOMIC_ATTRD
 323             /* Ask attrd to write all attributes to disk. This is not needed for
 324              * atomic attrd because atomic attrd does a peer sync and write-out
 325              * when winning an election.
 326              */
 327             update_attrd(NULL, NULL, NULL, NULL, FALSE);
 328 #endif
 329         }
 330 
 331         free_xml(tmp1);
 332 
 333     } else {
 334         crm_err("Could not confirm join-%d with %s: Local operation history failed",
 335                 join_id, fsa_our_dc);
 336         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 337     }
 338 }

/* [previous][next][first][last][top][bottom][index][help] */