| #!/bin/bash |
| |
| USAGE_TEXT="Usage: crm_failcount <command> [<options>] |
| Common options: |
| --help Display this text, then exit |
| --version Display version information, then exit |
| -V, --verbose Specify multiple times to increase debug output |
| -q, --quiet Print only the value (if querying) |
| |
| Commands: |
| -G, --query Query the current value of the resource's fail count |
| -D, --delete Delete resource's recorded failures |
| |
| Additional Options: |
| -r, --resource=value Name of the resource to use (required) |
| -n, --operation=value Name of operation to use (instead of all operations) |
| -I, --interval=value If operation is specified, its interval (MUST be in milliseconds) |
| -N, --node=value Use failcount on named node (instead of local node)" |
| |
| |
| HELP_TEXT="crm_failcount - Query or delete resource fail counts |
| |
| $USAGE_TEXT" |
| |
| |
| exit_usage() { |
| if [ $# -gt 0 ]; then |
| echo "error: $@" >&2 |
| fi |
| echo |
| echo "$USAGE_TEXT" |
| exit 1 |
| } |
| |
| warn() { |
| echo "warning: $@" >&2 |
| } |
| |
| interval_re() { |
| echo "^[[:blank:]]*([0-9]+)[[:blank:]]*(${1})[[:blank:]]*$" |
| } |
| |
| # This function should follow crm_get_interval() as closely as possible |
| parse_interval() { |
| INT_S="$1" |
| |
| INT_8601RE="^P(([0-9]+)Y)?(([0-9]+)M)?(([0-9]+)D)?T?(([0-9]+)H)?(([0-9]+)M)?(([0-9]+)S)?$" |
| |
| if [[ $INT_S =~ $(interval_re "s|sec|") ]]; then |
| echo $(( ${BASH_REMATCH[1]} * 1000 )) |
| |
| elif [[ $INT_S =~ $(interval_re "ms|msec") ]]; then |
| echo "${BASH_REMATCH[1]}" |
| |
| elif [[ $INT_S =~ $(interval_re "m|min") ]]; then |
| echo $(( ${BASH_REMATCH[1]} * 60000 )) |
| |
| elif [[ $INT_S =~ $(interval_re "h|hr") ]]; then |
| echo $(( ${BASH_REMATCH[1]} * 3600000 )) |
| |
| elif [[ $INT_S =~ $(interval_re "us|usec") ]]; then |
| echo $(( ${BASH_REMATCH[1]} / 1000 )) |
| |
| elif [[ $INT_S =~ ^P([0-9]+)W$ ]]; then |
| echo $(( ${BASH_REMATCH[1]} * 604800000 )) |
| |
| elif [[ $INT_S =~ $INT_8601RE ]]; then |
| echo $(( ( ${BASH_REMATCH[2]:-0} * 31536000000 ) \ |
| + ( ${BASH_REMATCH[4]:-0} * 2592000000 ) \ |
| + ( ${BASH_REMATCH[6]:-0} * 86400000 ) \ |
| + ( ${BASH_REMATCH[8]:-0} * 3600000 ) \ |
| + ( ${BASH_REMATCH[10]:-0} * 60000 ) \ |
| + ( ${BASH_REMATCH[12]:-0} * 1000 ) )) |
| |
| else |
| warn "Unrecognized interval, using 0" |
| echo "0" |
| fi |
| } |
| |
| query_single_attr() { |
| QSR_TARGET="$1" |
| QSR_ATTR="$2" |
| |
| crm_attribute $VERBOSE -Q --query -t status -d 0 \ |
| -N "$QSR_TARGET" -n "$QSR_ATTR" |
| } |
| |
| query_attr_sum() { |
| QAS_TARGET="$1" |
| QAS_PREFIX="$2" |
| |
| # Build xpath to match all transient node attributes with prefix |
| QAS_XPATH="/cib/status/node_state[@uname='${QAS_TARGET}']" |
| QAS_XPATH="${QAS_XPATH}/transient_attributes/instance_attributes" |
| QAS_XPATH="${QAS_XPATH}/nvpair[starts-with(@name,'$QAS_PREFIX')]" |
| |
| # Query attributes that match xpath |
| # @TODO We ignore stderr because we don't want "no results" to look |
| # like an error, but that also makes $VERBOSE pointless. |
| QAS_ALL=$(cibadmin --query --sync-call --local \ |
| --xpath="$QAS_XPATH" 2>/dev/null) |
| |
| # @TODO There is currently no reliable way to distinguish "no results" |
| # from actual CIB errors. For now, treat any error as "no results". |
| # |
| #if [ $? -ne 0 ]; then |
| # echo error >&2 |
| # return |
| #fi |
| |
| # Extract the attribute values (one per line) from the output |
| QAS_VALUE=$(echo "$QAS_ALL" | sed -n -e \ |
| 's/.*<nvpair.*value="\([0-9][0-9]*\|INFINITY\)".*>.*/\1/p') |
| |
| # Sum the values |
| QAS_SUM=0 |
| for i in 0 $QAS_VALUE; do |
| if [ "$i" = "INFINITY" ]; then |
| QAS_SUM="INFINITY" |
| break |
| else |
| QAS_SUM=$(($QAS_SUM + $i)) |
| fi |
| done |
| if [ "$QAS_SUM" = "INFINITY" ]; then |
| echo $QAS_SUM |
| elif [ "$QAS_SUM" -ge 1000000 ]; then |
| echo "INFINITY" |
| else |
| echo $QAS_SUM |
| fi |
| } |
| |
| query_failcount() { |
| QF_TARGET="$1" |
| QF_RESOURCE="$2" |
| QF_OPERATION="$3" |
| QF_INTERVAL="$4" |
| |
| QF_ATTR_RSC="fail-count-${QF_RESOURCE}" |
| |
| if [ -n "$QF_OPERATION" ]; then |
| QF_ATTR_DISPLAY="${QF_ATTR_RSC}#${QF_OPERATION}_${QF_INTERVAL}" |
| QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_DISPLAY") |
| else |
| QF_ATTR_DISPLAY="$QF_ATTR_RSC" |
| QF_COUNT=$(query_attr_sum "$QF_TARGET" "${QF_ATTR_RSC}#") |
| fi |
| |
| # @COMPAT attributes set < 1.1.17: |
| # If we didn't find any per-operation failcount, |
| # check whether there is a legacy per-resource failcount. |
| if [ "$QF_COUNT" = "0" ]; then |
| QF_COUNT=$(query_single_attr "$QF_TARGET" "$QF_ATTR_RSC") |
| if [ "$QF_COUNT" != "0" ]; then |
| QF_ATTR_DISPLAY="$QF_ATTR_RSC" |
| fi |
| fi |
| |
| # Echo result (comparable to crm_attribute, for backward compatibility) |
| if [ -n "$QUIET" ]; then |
| echo $QF_COUNT |
| else |
| echo "scope=status name=$QF_ATTR_DISPLAY value=$QF_COUNT" |
| fi |
| } |
| |
| clear_failcount() { |
| CF_TARGET="$1" |
| CF_RESOURCE="$2" |
| CF_OPERATION="$3" |
| CF_INTERVAL="$4" |
| |
| if [ -n "$CF_OPERATION" ]; then |
| CF_OPERATION="-n $CF_OPERATION -I ${CF_INTERVAL}ms" |
| fi |
| crm_resource $QUIET $VERBOSE --cleanup \ |
| -N "$CF_TARGET" -r "$CF_RESOURCE" $CF_OPERATION |
| } |
| |
| QUIET="" |
| VERBOSE="" |
| |
| command="" |
| resource="" |
| operation="" |
| interval="0" |
| target=$(crm_node -n 2>/dev/null) |
| |
| SHORTOPTS="qDGQVN:U:v:i:l:r:n:I:" |
| |
| LONGOPTS_COMMON="help,version,verbose,quiet" |
| LONGOPTS_COMMANDS="query,delete" |
| LONGOPTS_OTHER="resource:,node:,operation:,interval:" |
| LONGOPTS_COMPAT="delete-attr,get-value,resource-id:,uname:,lifetime:,attr-value:,attr-id:" |
| |
| LONGOPTS="$LONGOPTS_COMMON,$LONGOPTS_COMMANDS,$LONGOPTS_OTHER,$LONGOPTS_COMPAT" |
| |
| TEMP=$(getopt -o $SHORTOPTS --long $LONGOPTS -n crm_failcount -- "$@") |
| if [ $? -ne 0 ]; then |
| exit_usage |
| fi |
| eval set -- "$TEMP" # Quotes around $TEMP are essential |
| |
| while true ; do |
| case "$1" in |
| --help) |
| echo "$HELP_TEXT" |
| exit 0 |
| ;; |
| --version) |
| crm_attribute --version |
| exit $? |
| ;; |
| -q|-Q|--quiet) |
| QUIET="--quiet" |
| shift |
| ;; |
| -V|--verbose) |
| VERBOSE="$VERBOSE $1" |
| shift |
| ;; |
| -G|--query|--get-value) |
| command="--query" |
| shift |
| ;; |
| -D|--delete|--delete-attr) |
| command="--delete" |
| shift |
| ;; |
| -r|--resource|--resource-id) |
| resource="$2" |
| shift 2 |
| ;; |
| -n|--operation) |
| operation="$2" |
| shift 2 |
| ;; |
| -I|--interval) |
| interval="$2" |
| shift 2 |
| ;; |
| -N|--node|-U|--uname) |
| target="$2" |
| shift 2 |
| ;; |
| -v|--attr-value) |
| if [ "$2" = "0" ]; then |
| command="--delete" |
| else |
| warn "ignoring deprecated option '$1' with nonzero value" |
| fi |
| shift 2 |
| ;; |
| -i|--attr-id|-l|--lifetime) |
| warn "ignoring deprecated option '$1'" |
| shift 2 |
| ;; |
| --) |
| shift |
| break |
| ;; |
| *) |
| exit_usage "unknown option '$1'" |
| ;; |
| esac |
| done |
| |
| [ -n "$command" ] || exit_usage "must specify a command" |
| [ -n "$resource" ] || exit_usage "resource name required" |
| [ -n "$target" ] || exit_usage "node name required" |
| |
| interval=$(parse_interval $interval) |
| |
| if [ "$command" = "--query" ]; then |
| query_failcount "$target" "$resource" "$operation" "$interval" |
| else |
| clear_failcount "$target" "$resource" "$operation" "$interval" |
| fi |