| #!/bin/bash |
| # Check common misspellings |
| # input file format: |
| # word->word1, ... |
| # Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines |
| |
| input=${INPUT:-List_of_common_misspellings.txt} |
| |
| function check() { |
| cat $1 | awk 'BEGIN{maxord=0;FS="\t"}FILENAME=="-"{for (i=1; i<=NF; i++){a[NR,$(i)]=i};max=NR;next}{x1=a[NR-max,$2];x2=a[NR-max,$3];sug++;if($3)sug++;if (!x1&&!x2){mis2++;misrow=misrow"\n"$0};if(!x1||($3 && !x2))mis++;ord+=x1+x2;}END{ |
| print "Missed rows", misrow; |
| print "=======================================" |
| print maxord, "max. suggestion for a word"; |
| print max, "input rows"; |
| print mis2, "missing rows"; |
| print sug, "expected suggestions"; |
| print mis, "missing suggestions"; |
| print ord/(sug-mis), "average ranking"; |
| }' - $2 |
| } |
| |
| test -f $input.4 && check $input.4 $input.3 >result.aspell |
| check $input.5 $input.3 >result.hunspell |
| test -f result.aspell && tail -6 result.aspell |
| tail -6 result.hunspell |