blob: aa06308d81fffa618b82eba4d7b22aa2309c9cc6 [file] [log] [blame] [edit]
#!/bin/bash
# This command counts the approximate frequency of each distinct reason for
# warning suppressions, in all files under the current directory.
# To invoke it, pass a type system name; for example:
# count-suppressions nullness
# If warning suppression text contains a colon, this script prints only
# the text before the colon, under the assumption that the initial text
# is a category name.
# This script is useful to determine the most frequent reasons for warning
# suppressions, to help checker developers decide what featuers to add to
# their type systems. However, use common.util.count.AnnotationStatistics
# to count the total number of warning suppressions (for example, to report
# in a paper), because this script gives only an approximate count.
# To debug, pass --debug as the first command-line argument.
debug=0
if [[ "$OSTYPE" == "darwin"* ]]; then
SED="gsed"
GREP="ggrep"
else
SED="sed"
GREP="grep"
fi
if [ "$1" == "--debug" ] ; then
debug=1
shift
fi
if [ "$#" -ne 1 ]; then
echo "Usage: $0 TYPESYSTEM" >&2
exit 1
fi
regex="$1"
# If argument is a compound checker, make the regex match them all.
if [ "$regex" = "nullness" ]; then
regex="\(nullness\|initialization\|keyfor\)"
fi
if [ "$regex" = "index" ]; then
regex="\(index\|lessthan\|lowerbound\|samelen\|searchindex\|substringindex\|upperbound\)"
fi
# Diagnostics
# echo "regex=${regex}"
greplines=$(mktemp /tmp/count-suppressions."$(date +%Y%m%d-%H%M%S)"-XXX)
countedreasons=$(mktemp /tmp/count-suppressions."$(date +%Y%m%d-%H%M%S)"-XXX)
# These are the two types of matching lines:
# * "checkername" or "chekername:..."
# This matches occurrences within @SuppressWarnings. The regex does not
# include "@SuppressWarnings" because it might appear on the previous line.
# * @AssumeAssertion(checkername)
# This grep command captures a few stray lines; users should ignore them.
# This grep command assumes that tests are not annotated, and it hard-codes ignoring "annotated-jdk", "jdk", and "true positive".
${GREP} -n --recursive --include='*.java' "\"${regex}[:\"]\(.*[^;]\)\?\(\$\|//\)\|@AssumeAssertion(${regex})" \
| grep -v "@AnnotatedFor" | grep -v "/tests/" \
| grep -v "/annotated-jdk/" | grep -v "/jdk/" | grep -v "^jdk/" | grep -v "true positive" > "${greplines}"
total=$(wc -l < "${greplines}")
## Don't output a total, to avoid people using this approximate count.
# echo "Total: $total"
# shellcheck disable=SC2002
cat "${greplines}" \
| ${SED} 's/.*\/\/ //g' \
| ${SED} "s/.*@AssumeAssertion([^)])[ :]*\([^\"]\+\)\";/\1/g" \
| ${SED} 's/\([^0-9]\): [^:].*/\1/' \
| ${SED} 's/ \+$//' \
| sort | uniq -c | sort -rg > "${countedreasons}"
# Add leading percentages to `uniq -c` output. Note that it rounds down to the nearest integer.
# (Digits afert the decimal don't make a practical difference.)
while read -r line; do
count=$(echo "$line" | cut -f1 -d " ");
content=$(echo "$line" | cut -f2- -d " ");
percent=$(echo "scale=0; (100*$count/$total);" | bc);
if [ "$debug" -eq "0" ]; then
printf "%d%%\t%s\n" "$percent" "$content";
else
printf "%d%%\t%s\t%s\n" "$percent" "$count" "$content";
fi;
done < "${countedreasons}";
if [ "$debug" -eq "0" ]; then
rm -f "${greplines}" "${countedreasons}"
else
echo "Total is $total, Intermediate output is in: ${greplines} ${countedreasons}"
fi