blob: 9c865607b3fbe30dfb2b1631c6747d41a90e3fd4 [file] [log] [blame]
#!/bin/bash
USAGE_TEXT="Usage: crm_failcount <command> [<options>]
Common options:
--help Display this text, then exit
--version Display version information, then exit
-V, --verbose Specify multiple times to increase debug output
-q, --quiet Print only the value (if querying)
Commands:
-G, --query Query the current value of the resource's fail count
-D, --delete Delete resource's recorded failures
Additional Options:
-r, --resource=value Name of the resource to use (required)
-n, --operation=value Name of operation to use (instead of all operations)
-I, --interval=value If operation is specified, its interval (MUST be in milliseconds)
-N, --node=value Use failcount on named node (instead of local node)"
HELP_TEXT="crm_failcount - Query or delete resource fail counts
$USAGE_TEXT"
exit_usage() {
if [ $# -gt 0 ]; then
echo "error: $@" >&2
fi
echo
echo "$USAGE_TEXT"
exit 1
}
warn() {
echo "warning: $@" >&2
}
interval_re() {
echo "^[[:blank:]]*([0-9]+)[[:blank:]]*(${1})[[:blank:]]*$"
}
# This function should follow crm_get_interval() as closely as possible
parse_interval() {
INT_S="$1"
INT_8601RE="^P(([0-9]+)Y)?(([0-9]+)M)?(([0-9]+)D)?T?(([0-9]+)H)?(([0-9]+)M)?(([0-9]+)S)?$"
if [[ $INT_S =~ $(interval_re "s|sec|") ]]; then
echo $(( ${BASH_REMATCH[1]} * 1000 ))
elif [[ $INT_S =~ $(interval_re "ms|msec") ]]; then
echo "${BASH_REMATCH[1]}"
elif [[ $INT_S =~ $(interval_re "m|min") ]]; then
echo $(( ${BASH_REMATCH[1]} * 60000 ))
elif [[ $INT_S =~ $(interval_re "h|hr") ]]; then
echo $(( ${BASH_REMATCH[1]} * 3600000 ))
elif [[ $INT_S =~ $(interval_re "us|usec") ]]; then
echo $(( ${BASH_REMATCH[1]} / 1000 ))
elif [[ $INT_S =~ ^P([0-9]+)W$ ]]; then
echo $(( ${BASH_REMATCH[1]} * 604800000 ))
elif [[ $INT_S =~ $INT_8601RE ]]; then
echo $(( ( ${BASH_REMATCH[2]:-0} * 31536000000 ) \
+ ( ${BASH_REMATCH[4]:-0} * 2592000000 ) \
+ ( ${BASH_REMATCH[6]:-0} * 86400000 ) \
+ ( ${BASH_REMATCH[8]:-0} * 3600000 ) \
+ ( ${BASH_REMATCH[10]:-0} * 60000 ) \
+ ( ${BASH_REMATCH[12]:-0} * 1000 ) ))
else
warn "Unrecognized interval, using 0"
echo "0"
fi
}
query_single_attr() {
QSR_TARGET="$1"
QSR_ATTR="$2"
crm_attribute $VERBOSE -Q --query -t status -d 0 \
-N "$QSR_TARGET" -n "$QSR_ATTR"
}
query_attr_sum() {
QAS_TARGET="$1"
QAS_PREFIX="$2"
# Build xpath to match all transient node attributes with prefix
QAS_XPATH="/cib/status/node_state[@uname='${QAS_TARGET}']"
QAS_XPATH="${QAS_XPATH}/transient_attributes/instance_attributes"
QAS_XPATH="${QAS_XPATH}/nvpair[starts-with(@name,'$QAS_PREFIX')]"
# Query attributes that match xpath
# @TODO We ignore stderr because we don't want "no results" to look
# like an error, but that also makes $VERBOSE pointless.
QAS_ALL=$(cibadmin --query --sync-call --local \
--xpath="$QAS_XPATH" 2>/dev/null)
# @TODO There is currently no reliable way to distinguish "no results"
# from actual CIB errors. For now, treat any error as "no results".
#
#if [ $? -ne 0 ]; then
# echo error >&2
# return
#fi
# Extract the attribute values (one per line) from the output
QAS_VALUE=$(echo "$QAS_ALL" | sed -n -e \
's/.*<nvpair.*value="\([0-9][0-9]*\|INFINITY\)".*>.*/\1/p')
# Sum the values
QAS_SUM=0
for i in 0 $QAS_VALUE; do
if [ "$i" = "INFINITY" ]; then
QAS_SUM="INFINITY"
break
else
QAS_SUM=$(($QAS_SUM + $i))
fi
done
if [ "$QAS_SUM" = "INFINITY" ]; then
echo $QAS_SUM
elif [ "$QAS_SUM" -ge 1000000 ]; then
echo "INFINITY"
else
echo $QAS_SUM
fi
}
query_failcount() {
QF_TARGET="$1"
QF_RESOURCE="$2"
QF_OPERATION="$3"
QF_INTERVAL="$4"
QF_ATTR_RSC="fail-count-${QF_RESOURCE}"
if [ -n "$QF_OPERATION" ]; then
QF_ATTR_DISPLAY="${QF_ATTR_RSC}#${QF_OPERATION}_${QF_INTERVAL}"
QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_DISPLAY")
else
QF_ATTR_DISPLAY="$QF_ATTR_RSC"
QF_COUNT=$(query_attr_sum "$QF_TARGET" "${QF_ATTR_RSC}#")
fi
# @COMPAT attributes set < 1.1.17:
# If we didn't find any per-operation failcount,
# check whether there is a legacy per-resource failcount.
if [ "$QF_COUNT" = "0" ]; then
QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_RSC")
if [ "$QF_COUNT" != "0" ]; then
QF_ATTR_DISPLAY="$QF_ATTR_RSC"
fi
fi
# Echo result (comparable to crm_attribute, for backward compatibility)
if [ -n "$QUIET" ]; then
echo $QF_COUNT
else
echo "scope=status name=$QF_ATTR_DISPLAY value=$QF_COUNT"
fi
}
clear_failcount() {
CF_TARGET="$1"
CF_RESOURCE="$2"
CF_OPERATION="$3"
CF_INTERVAL="$4"
if [ -n "$CF_OPERATION" ]; then
CF_OPERATION="-n $CF_OPERATION -I ${CF_INTERVAL}ms"
fi
crm_resource $QUIET $VERBOSE --cleanup \
-N "$CF_TARGET" -r "$CF_RESOURCE" $CF_OPERATION
}
QUIET=""
VERBOSE=""
command=""
resource=""
operation=""
interval="0"
target=$(crm_node -n 2>/dev/null)
SHORTOPTS="qDGQVN:U:v:i:l:r:n:I:"
LONGOPTS_COMMON="help,version,verbose,quiet"
LONGOPTS_COMMANDS="query,delete"
LONGOPTS_OTHER="resource:,node:,operation:,interval:"
LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:"
LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT"
TEMP=$(getopt -o $SHORTOPTS --long $LONGOPTS -n crm_failcount -- "$@")
if [ $? -ne 0 ]; then
exit_usage
fi
eval set -- "$TEMP" # Quotes around $TEMP are essential
while true ; do
case "$1" in
--help)
echo "$HELP_TEXT"
exit 0
;;
--version)
crm_attribute --version
exit $?
;;
-q|-Q|--quiet)
QUIET="--quiet"
shift
;;
-V|--verbose)
VERBOSE="$VERBOSE $1"
shift
;;
-G|--query|--get-value)
command="--query"
shift
;;
-D|--delete|--delete-attr)
command="--delete"
shift
;;
-r|--resource|--resource-id)
resource="$2"
shift 2
;;
-n|--operation)
operation="$2"
shift 2
;;
-I|--interval)
interval="$2"
shift 2
;;
-N|--node|-U|--uname)
target="$2"
shift 2
;;
-v|--attr-value)
if [ "$2" = "0" ]; then
command="--delete"
else
warn "ignoring deprecated option '$1' with nonzero value"
fi
shift 2
;;
-i|--attr-id|-l|--lifetime)
warn "ignoring deprecated option '$1'"
shift 2
;;
--)
shift
break
;;
*)
exit_usage "unknown option '$1'"
;;
esac
done
[ -n "$command" ] || exit_usage "must specify a command"
[ -n "$resource" ] || exit_usage "resource name required"
[ -n "$target" ] || exit_usage "node name required"
interval=$(parse_interval $interval)
if [ "$command" = "--query" ]; then
query_failcount "$target" "$resource" "$operation" "$interval"
else
clear_failcount "$target" "$resource" "$operation" "$interval"
fi