root/daemons/controld/controld_attrd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. controld_close_attrd_ipc
  2. log_attrd_error
  3. update_attrd_helper
  4. update_attrd
  5. update_attrd_remote_node_removed
  6. update_attrd_clear_failures

   1 /*
   2  * Copyright 2006-2020 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <crm/crm.h>
  13 #include <crm/common/attrd_internal.h>
  14 #include <crm/msg_xml.h>
  15 
  16 #include <pacemaker-controld.h>
  17 
  18 static crm_ipc_t *attrd_ipc = NULL;
  19 
  20 void
  21 controld_close_attrd_ipc()
     /* [previous][next][first][last][top][bottom][index][help] */
  22 {
  23     if (attrd_ipc) {
  24         crm_trace("Closing connection to pacemaker-attrd");
  25         crm_ipc_close(attrd_ipc);
  26         crm_ipc_destroy(attrd_ipc);
  27         attrd_ipc = NULL;
  28     }
  29 }
  30 
  31 static void
  32 log_attrd_error(const char *host, const char *name, const char *value,
     /* [previous][next][first][last][top][bottom][index][help] */
  33                 gboolean is_remote, char command, int rc)
  34 {
  35     const char *node_type = (is_remote? "Pacemaker Remote" : "cluster");
  36     gboolean shutting_down = pcmk_is_set(fsa_input_register, R_SHUTDOWN);
  37     const char *when = (shutting_down? " at shutdown" : "");
  38 
  39     switch (command) {
  40         case 0:
  41             crm_err("Could not clear failure attributes for %s on %s node %s%s: %s "
  42                     CRM_XS " rc=%d", (name? name : "all resources"), node_type,
  43                     host, when, pcmk_rc_str(rc), rc);
  44             break;
  45 
  46         case 'C':
  47             crm_err("Could not purge %s node %s in attribute manager%s: %s "
  48                     CRM_XS " rc=%d",
  49                     node_type, host, when, pcmk_rc_str(rc), rc);
  50             break;
  51 
  52         case 'U':
  53             /* We weren't able to update an attribute after several retries,
  54              * so something is horribly wrong with the attribute manager or the
  55              * underlying system.
  56              */
  57             do_crm_log(AM_I_DC? LOG_CRIT : LOG_ERR,
  58                        "Could not update attribute %s=%s for %s node %s%s: %s "
  59                        CRM_XS " rc=%d", name, value, node_type, host, when,
  60                        pcmk_rc_str(rc), rc);
  61 
  62 
  63             if (AM_I_DC) {
  64                 /* We are unable to provide accurate information to the
  65                  * scheduler, so allow another node to take over DC.
  66                  * @TODO Should we do this unconditionally on any failure?
  67                  */
  68                 crmd_exit(CRM_EX_FATAL);
  69 
  70             } else if (shutting_down) {
  71                 // Fast-track shutdown since unable to request via attribute
  72                 register_fsa_input(C_FSA_INTERNAL, I_FAIL, NULL);
  73             }
  74             break;
  75     }
  76 }
  77 
  78 static void
  79 update_attrd_helper(const char *host, const char *name, const char *value,
     /* [previous][next][first][last][top][bottom][index][help] */
  80                     const char *interval_spec, const char *user_name,
  81                     gboolean is_remote_node, char command)
  82 {
  83     int rc;
  84     int attrd_opts = pcmk__node_attr_none;
  85 
  86     if (is_remote_node) {
  87         pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
  88     }
  89 
  90     if (attrd_ipc == NULL) {
  91         attrd_ipc = crm_ipc_new(T_ATTRD, 0);
  92     }
  93 
  94     for (int attempt = 1; attempt <= 4; ++attempt) {
  95         rc = pcmk_rc_ok;
  96 
  97         // If we're not already connected, try to connect
  98         if (crm_ipc_connected(attrd_ipc) == FALSE) {
  99             if (attempt == 1) {
 100                 // Start with a clean slate
 101                 crm_ipc_close(attrd_ipc);
 102             }
 103             if (crm_ipc_connect(attrd_ipc) == FALSE) {
 104                 rc = errno;
 105             }
 106             crm_debug("Attribute manager connection attempt %d of 4: %s (%d)",
 107                       attempt, pcmk_rc_str(rc), rc);
 108         }
 109 
 110         if (rc == pcmk_rc_ok) {
 111             if (command) {
 112                 rc = pcmk__node_attr_request(attrd_ipc, command, host, name,
 113                                              value, XML_CIB_TAG_STATUS, NULL,
 114                                              NULL, user_name, attrd_opts);
 115             } else {
 116                  /* No command means clear fail count (name/value is really
 117                   * resource/operation)
 118                   */
 119                  rc = pcmk__node_attr_request_clear(attrd_ipc, host, name,
 120                                                     value, interval_spec,
 121                                                     user_name, attrd_opts);
 122             }
 123             crm_debug("Attribute manager request attempt %d of 4: %s (%d)",
 124                       attempt, pcmk_rc_str(rc), rc);
 125         }
 126 
 127         if (rc == pcmk_rc_ok) {
 128             // Success, we're done
 129             break;
 130 
 131         } else if ((rc != EAGAIN) && (rc != EALREADY)) {
 132             /* EAGAIN or EALREADY indicates a temporary block, so just try
 133              * again. Otherwise, close the connection for a clean slate.
 134              */
 135             crm_ipc_close(attrd_ipc);
 136         }
 137 
 138         /* @TODO If the attribute manager remains unavailable the entire time,
 139          * this function takes more than 6 seconds. Maybe set a timer for
 140          * retries, to let the main loop do other work.
 141          */
 142         if (attempt < 4) {
 143             sleep(attempt);
 144         }
 145     }
 146 
 147     if (rc != pcmk_rc_ok) {
 148         log_attrd_error(host, name, value, is_remote_node, command, rc);
 149     }
 150 }
 151 
 152 void
 153 update_attrd(const char *host, const char *name, const char *value,
     /* [previous][next][first][last][top][bottom][index][help] */
 154              const char *user_name, gboolean is_remote_node)
 155 {
 156     update_attrd_helper(host, name, value, NULL, user_name, is_remote_node,
 157                         'U');
 158 }
 159 
 160 void
 161 update_attrd_remote_node_removed(const char *host, const char *user_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 162 {
 163     crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
 164               host);
 165     update_attrd_helper(host, NULL, NULL, NULL, user_name, TRUE, 'C');
 166 }
 167 
 168 void
 169 update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 170                             const char *interval_spec, gboolean is_remote_node)
 171 {
 172     const char *op_desc = NULL;
 173     const char *interval_desc = NULL;
 174     const char *node_type = is_remote_node? "Pacemaker Remote" : "cluster";
 175 
 176     if (op) {
 177         interval_desc = interval_spec? interval_spec : "nonrecurring";
 178         op_desc = op;
 179     } else {
 180         interval_desc = "all";
 181         op_desc = "operations";
 182     }
 183     crm_info("Asking pacemaker-attrd to clear failure of %s %s for %s on %s node %s",
 184              interval_desc, op_desc, rsc, node_type, host);
 185     update_attrd_helper(host, rsc, op, interval_spec, NULL, is_remote_node, 0);
 186 }

/* [previous][next][first][last][top][bottom][index][help] */