root/crmd/callbacks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crmd_ha_connection_destroy
  2. crmd_ha_msg_filter
  3. peer_update_callback
  4. crmd_cib_connection_destroy
  5. crm_fsa_trigger

   1 /*
   2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This software is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 
  19 #include <crm_internal.h>
  20 
  21 #include <sys/param.h>
  22 #include <crm/crm.h>
  23 #include <string.h>
  24 #include <crmd_fsa.h>
  25 
  26 #include <crm/msg_xml.h>
  27 #include <crm/common/xml.h>
  28 
  29 #include <crm/cluster.h>
  30 #include <crm/cib.h>
  31 
  32 #include <crmd.h>
  33 #include <crmd_messages.h>
  34 #include <crmd_callbacks.h>
  35 #include <crmd_lrm.h>
  36 #include <tengine.h>
  37 #include <membership.h>
  38 
  39 void crmd_ha_connection_destroy(gpointer user_data);
  40 
  41 /* From join_dc... */
  42 extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
  43 
  44 void
  45 crmd_ha_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  46 {
  47     crm_trace("Invoked");
  48     if (is_set(fsa_input_register, R_HA_DISCONNECTED)) {
  49         /* we signed out, so this is expected */
  50         crm_info("Heartbeat disconnection complete");
  51         return;
  52     }
  53 
  54     crm_crit("Lost connection to heartbeat service!");
  55     register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL);
  56     trigger_fsa(fsa_source);
  57 }
  58 
  59 void
  60 crmd_ha_msg_filter(xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
  61 {
  62     if (AM_I_DC) {
  63         const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
  64 
  65         if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) {
  66             const char *from = crm_element_value(msg, F_ORIG);
  67 
  68             if (safe_str_neq(from, fsa_our_uname)) {
  69                 int level = LOG_INFO;
  70                 const char *op = crm_element_value(msg, F_CRM_TASK);
  71 
  72                 /* make sure the election happens NOW */
  73                 if (fsa_state != S_ELECTION) {
  74                     ha_msg_input_t new_input;
  75 
  76                     level = LOG_WARNING;
  77                     new_input.msg = msg;
  78                     register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
  79                                            __FUNCTION__);
  80                 }
  81 
  82                 do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
  83                 goto done;
  84             }
  85         }
  86 
  87     } else {
  88         const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
  89 
  90         if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) {
  91             return;
  92         }
  93     }
  94 
  95     /* crm_log_xml_trace("HA[inbound]", msg); */
  96     route_message(C_HA_MESSAGE, msg);
  97 
  98   done:
  99     trigger_fsa(fsa_source);
 100 }
 101 
 102 #define state_text(state) ((state)? (const char *)(state) : "in unknown state")
 103 
 104 void
 105 peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
     /* [previous][next][first][last][top][bottom][index][help] */
 106 {
 107     uint32_t old = 0;
 108     uint32_t changed = 0;
 109     bool appeared = FALSE;
 110     bool is_remote = is_set(node->flags, crm_remote_node);
 111     const char *status = NULL;
 112 
 113     /* Crmd waits to receive some information from the membership layer before
 114      * declaring itself operational. If this is being called for a cluster node,
 115      * indicate that we have it.
 116      */
 117     if (!is_remote) {
 118         set_bit(fsa_input_register, R_PEER_DATA);
 119     }
 120 
 121     if (node->uname == NULL) {
 122         return;
 123     }
 124 
 125     switch (type) {
 126         case crm_status_uname:
 127             /* If we've never seen the node, then it also won't be in the status section */
 128             crm_info("%s node %s is now %s",
 129                      (is_remote? "Remote" : "Cluster"),
 130                      node->uname, state_text(node->state));
 131             return;
 132 
 133         case crm_status_rstate:
 134         case crm_status_nstate:
 135             /* This callback should not be called unless the state actually
 136              * changed, but here's a failsafe just in case.
 137              */
 138             CRM_CHECK(safe_str_neq(data, node->state), return);
 139 
 140             crm_info("%s node %s is now %s (was %s)",
 141                      (is_remote? "Remote" : "Cluster"),
 142                      node->uname, state_text(node->state), state_text(data));
 143 
 144             if (safe_str_eq(CRM_NODE_MEMBER, node->state)) {
 145                 appeared = TRUE;
 146                 if (!is_remote) {
 147                     remove_stonith_cleanup(node->uname);
 148                 }
 149             }
 150 
 151             crmd_alert_node_event(node);
 152             break;
 153 
 154         case crm_status_processes:
 155             if (data) {
 156                 old = *(const uint32_t *)data;
 157                 changed = node->processes ^ old;
 158             }
 159 
 160             status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS;
 161             crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)",
 162                      node->uname, peer2text(proc_flags), status,
 163                      AM_I_DC ? "true" : crm_str(fsa_our_dc), changed);
 164 
 165             if ((changed & proc_flags) == 0) {
 166                 /* Peer process did not change */
 167                 crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags);
 168                 return;
 169             } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) {
 170                 crm_trace("Not connected");
 171                 return;
 172             } else if (fsa_state == S_STOPPING) {
 173                 crm_trace("Stopping");
 174                 return;
 175             }
 176 
 177             appeared = (node->processes & proc_flags) != 0;
 178             if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) {
 179                 /* Did we get evicted? */
 180                 crm_notice("Our peer connection failed");
 181                 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
 182 
 183             } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) {
 184                 /* Did the DC leave us? */
 185                 crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc);
 186                 register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
 187 
 188                 /* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
 189                  * want to fence it. Newer DCs will send their shutdown request
 190                  * to all peers, who will update the DC's expected state to
 191                  * down, thus avoiding fencing. We can safely erase the DC's
 192                  * transient attributes when it leaves in that case. However,
 193                  * the only way to avoid fencing older DCs is to leave the
 194                  * transient attributes intact until it rejoins.
 195                  */
 196                 if (compare_version(fsa_our_dc_version, "3.0.9") > 0) {
 197                     erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
 198                 }
 199 
 200             } else if(AM_I_DC && appeared == FALSE) {
 201                 crm_info("Peer %s left us", node->uname);
 202                 erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
 203             }
 204             break;
 205     }
 206 
 207     if (AM_I_DC) {
 208         xmlNode *update = NULL;
 209         int flags = node_update_peer;
 210         gboolean alive = is_remote? appeared : crm_is_peer_active(node);
 211         crm_action_t *down = match_down_event(node->uuid, appeared);
 212 
 213         crm_trace("Alive=%d, appeared=%d, down=%d",
 214                   alive, appeared, (down? down->id : -1));
 215 
 216         if (alive && type == crm_status_processes) {
 217             register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
 218         }
 219 
 220         if (down) {
 221             const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
 222 
 223             if (safe_str_eq(task, CRM_OP_FENCE)) {
 224 
 225                 /* tengine_stonith_callback() confirms fence actions */
 226                 crm_trace("Updating CIB %s stonithd reported fencing of %s complete",
 227                           (down->confirmed? "after" : "before"), node->uname);
 228 
 229             } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
 230                 crm_notice("%s of peer %s is complete "CRM_XS" op=%d",
 231                            task, node->uname, down->id);
 232 
 233                 /* down->confirmed = TRUE; */
 234                 stop_te_timer(down->timer);
 235 
 236                 if (!is_remote) {
 237                     flags |= node_update_join | node_update_expected;
 238                     crmd_peer_down(node, FALSE);
 239                     check_join_state(fsa_state, __FUNCTION__);
 240                 }
 241 
 242                 update_graph(transition_graph, down);
 243                 trigger_graph();
 244 
 245             } else {
 246                 crm_trace("Node %s is %salive, was expected to %s (op %d)",
 247                           node->uname, (alive? "" : "not "), task, down->id);
 248             }
 249 
 250         } else if (appeared == FALSE) {
 251             crm_notice("Stonith/shutdown of %s not matched", node->uname);
 252 
 253             if (!is_remote) {
 254                 crm_update_peer_join(__FUNCTION__, node, crm_join_none);
 255                 check_join_state(fsa_state, __FUNCTION__);
 256             }
 257 
 258             abort_transition(INFINITY, tg_restart, "Node failure", NULL);
 259             fail_incompletable_actions(transition_graph, node->uuid);
 260 
 261         } else {
 262             crm_trace("Node %s came up, was not expected to be down",
 263                       node->uname);
 264         }
 265 
 266         if (is_remote) {
 267             /* A pacemaker_remote node won't have its cluster status updated
 268              * in the CIB by membership-layer callbacks, so do it here.
 269              */
 270             flags |= node_update_cluster;
 271 
 272             /* Trigger resource placement on newly integrated nodes */
 273             if (appeared) {
 274                 abort_transition(INFINITY, tg_restart,
 275                                  "pacemaker_remote node integrated", NULL);
 276             }
 277         }
 278 
 279         /* Update the CIB node state */
 280         update = create_node_state_update(node, flags, NULL, __FUNCTION__);
 281         fsa_cib_anon_update(XML_CIB_TAG_STATUS, update,
 282                             cib_scope_local | cib_quorum_override | cib_can_create);
 283         free_xml(update);
 284     }
 285 
 286     trigger_fsa(fsa_source);
 287 }
 288 
 289 void
 290 crmd_cib_connection_destroy(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292     CRM_CHECK(user_data == fsa_cib_conn,;);
 293 
 294     crm_trace("Invoked");
 295     trigger_fsa(fsa_source);
 296     fsa_cib_conn->state = cib_disconnected;
 297 
 298     if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) {
 299         crm_info("Connection to the CIB terminated...");
 300         return;
 301     }
 302 
 303     /* eventually this will trigger a reconnect, not a shutdown */
 304     crm_err("Connection to the CIB terminated...");
 305     register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
 306     clear_bit(fsa_input_register, R_CIB_CONNECTED);
 307 
 308     return;
 309 }
 310 
 311 gboolean
 312 crm_fsa_trigger(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 313 {
 314     crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
 315     s_crmd_fsa(C_FSA_INTERNAL);
 316     crm_trace("Exited  (queue len: %d)", g_list_length(fsa_message_queue));
 317     return TRUE;
 318 }

/* [previous][next][first][last][top][bottom][index][help] */