add header analysis scripts to the utils directory

Maybe someone will find them useful...
This commit is contained in:
Chris Beck 2014-07-06 22:56:03 -04:00
parent 0ebaae5acf
commit d7ea8f9656
3 changed files with 185 additions and 0 deletions

117
utils/headers/build_headers.sh Executable file
View file

@ -0,0 +1,117 @@
#!/bin/bash
usage()
{
echo "Usage:" $0 "[OPTIONS]"
echo
echo "Computes all the (deep) header dependencies for each file (compilation unit) in"
echo "the wesnoth project."
echo
echo "The calculated dependency lists are placed out of tree, in a subdirectory"
echo "'headers' of the root of the repostory, in order that they may be conveniently"
echo "grepped or similar."
echo
echo "A ranking of most commonly used headers is generated, in header_rank.log, based"
echo "on the number of compilation units which use the header."
echo
echo "The tool expects the current working directory to be the root directory of the"
echo "repository."
echo
echo -e "Options:"
echo -e "\t-h\tShows this help."
echo -e "\t-s\tShow source dependencies."
echo -e "\t-b\tShow boost dependencies."
echo -e "\t-i\tShow all /usr/include dependencies."
echo -e "\t-y\tShow all /usr/bin (system) dependencies."
echo
echo -e "\tBy default *all* dependencies are shown."
echo -e "\tIf multiple flags are passed, the OR of these is shown."
echo
echo -e "\t-m arg\tUse a custom pattern. Pass a regexp as an argument to"
echo -e "\t\tmatch against the paths of included files."
echo -e "\t\tCan't use this with other options."
echo
echo
echo "Example Usage:"
echo
echo -e "\t./build_headers.sh -s"
echo
exit 1;
}
echo "Reading options..."
dir_pattern=""
src_pattern="\(src\/\)"
boost_pattern="\(\/usr\/include\/boost\/\)"
incl_pattern="\(\/usr\/include\/\)"
bin_pattern="\(\/usr\/bin\/\)"
while getopts ":hsbiym:" Option
do
case $Option in
h )
usage
exit 0;
;;
s )
echo "Adding source includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$src_pattern"
;;
b )
echo "Adding boost includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$boost_pattern"
;;
i )
echo "Adding /usr/include includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$incl_pattern"
;;
y )
echo "Adding bin includes..."
if [ -n "$dir_pattern" ]; then
dir_pattern+="\|"
fi
dir_pattern+="$bin_pattern"
;;
m )
echo "Matching against pattern:"
dir_pattern="$OPTARG"
echo "$dir_pattern"
;;
esac
done
shift $(($OPTIND - 1))
echo "Final pattern:" "$dir_pattern"
INCLUDE_STR="-Isrc -I/usr/include/SDL -I/usr/include -I/usr/include/pango-1.0 -I/usr/include/cairo -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/freetype2 -I/usr/include/libpng12 -I/usr/include/dbus-1.0 -I/usr/lib/x86_64-linux-gnu/dbus-1.0/include -I/usr/include/fribidi"
echo "Building header include database in wesnoth/headers/..."
[ -d headers ] || mkdir headers
pwd
#find src/ -type f -print0 | xargs -0 ./build_header.sh
cd src
for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
if [ ! -f ../headers/"$file" ]; then
mkdir -p ../headers/"$file"
rmdir ../headers/"$file"
fi
cd ..
echo "src/${file:2}"
#read -p "asdf"
clang++ -H $INCLUDE_STR "src/${file:2}" 2>&1 >/dev/null | sed -n '/^\.*\. / p' | sed -e 's/^\.* //g' -e ':loop' -e 's|/[[:alnum:]_-\.]*/\.\./|/|g' -e 't loop' | sed -n '/^'"$dir_pattern"'/ p' | sort | uniq >headers/"${file:2}"
cd src
done
cd ..
echo "ranking headers"
find headers/ -type f -exec cat {} + | sort | uniq -c | sort -k1 --numeric --reverse > "header_rank.log"
echo "wrote to header_rank.log"
echo "Finished."

53
utils/headers/header_times.sh Executable file
View file

@ -0,0 +1,53 @@
#!/bin/bash
#Compute header times. Takes an scons build log with debug=time on, file name as first and only arg.
set -e
if [[ "$#" -ne 1 ]]; then
echo "Usage:" $0 " [scons-log-file]"
echo
echo "Ranks headers according to the aggregate build time of compilation units which"
echo "read them. In other words, the ranking answers the question 'if I stopped a"
echo "wesnoth build at a random point in time, which headers are most likely to have"
echo "been read by that compilation unit?'"
echo
echo "Expects to take the name of a log file from an scons build (with debug=time"
echo "option passed in) as first and only arg. This file must be located at the root"
echo "of the wesnoth repository directory, and the argument should just be its name"
echo "and extension."
echo
echo "Expects the current working directory to be the root directory of the repo."
echo
echo "Example Usage:"
echo
echo -e "\t./build_headers.sh -s"
echo -e "\t./header_times.sh travis_log_sample.log"
echo
exit 1;
fi
rm -r headers-annotated
cp -fR headers headers-annotated
cd headers-annotated/
for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
echo "src/${file:2}"
if grep -q "src/${file:2}$" "../$1"; then
#echo "match:" '\_ src/'"${file:2}$"'_ { N; s/.*\n//p; }'
header_time=$(cat "../$1" | sed -n '\| src/'"${file:2}$"'| { N; s|.*\n||p; }' | sed -n 's/.*\( [0-9\.]* \).*/\1/p' )
#echo "header time:" "$header_time"
sed -i 's/^.*$/& '"$header_time"'/' "${file:2}"
else
rm "${file:2}"
fi
done
cd ..
echo "Summing results..."
find headers-annotated/ -name "*.cpp" -type f -exec cat {} + | sort -s -g -k 1,1 | awk '{
arr[$1]+=$2
}
END {
for (key in arr) printf("%s\t%s\n", arr[key], key )
}' \
| sort -k1 --numeric --reverse > "header_time_rank.log"
echo "wrote to header_time_rank.log"
echo "Finished."
less header_time_rank.log

15
utils/headers/readme Normal file
View file

@ -0,0 +1,15 @@
This directory contains tools to help analyze header dependencies.
The first script is "build_headers". It runs clang with -H option,
to generate header dependencies, over the entire source directory,
and builds out of tree copies of all files with a listing of all
headers they directly or indirectly include. You may run it with
various filters for level of detail.
Build headers will generate a sorted "rank" file which indicates
how many compilation units include each header.
The second script is "header_times". It takes a compilation log,
generated by scons with debug=time option (such as found on
travis), and builds a second ranking in which compilation units
are weighted by the total time that they take.