add header analysis scripts to the utils directory

Maybe someone will find them useful...
2014-07-06 22:56:03 -04:00 · 2014-07-06 22:56:03 -04:00 · d7ea8f9656
commit d7ea8f9656
parent 0ebaae5acf
3 changed files with 185 additions and 0 deletions
--- a/utils/headers/build_headers.sh
+++ b/utils/headers/build_headers.sh
@ -0,0 +1,117 @@
+#!/bin/bash
+
+usage()
+{
+  echo "Usage:" $0 "[OPTIONS]"
+  echo
+  echo "Computes all the (deep) header dependencies for each file (compilation unit) in"
+  echo "the wesnoth project."
+  echo
+  echo "The calculated dependency lists are placed out of tree, in a subdirectory"
+  echo "'headers' of the root of the repostory, in order that they may be conveniently"
+  echo "grepped or similar."
+  echo
+  echo "A ranking of most commonly used headers is generated, in header_rank.log, based"
+  echo "on the number of compilation units which use the header."
+  echo
+  echo "The tool expects the current working directory to be the root directory of the"
+  echo "repository."
+  echo
+  echo -e "Options:"
+  echo -e "\t-h\tShows this help."
+  echo -e "\t-s\tShow source dependencies."
+  echo -e "\t-b\tShow boost dependencies."
+  echo -e "\t-i\tShow all /usr/include dependencies."
+  echo -e "\t-y\tShow all /usr/bin (system) dependencies."
+  echo
+  echo -e "\tBy default *all* dependencies are shown."
+  echo -e "\tIf multiple flags are passed, the OR of these is shown."
+  echo
+  echo -e "\t-m arg\tUse a custom pattern. Pass a regexp as an argument to"
+  echo -e "\t\tmatch against the paths of included files."
+  echo -e "\t\tCan't use this with other options."
+  echo
+  echo
+  echo "Example Usage:"
+  echo
+  echo -e "\t./build_headers.sh -s"
+  echo
+  exit 1;
+}
+
+echo "Reading options..."
+dir_pattern=""
+src_pattern="\(src\/\)"
+boost_pattern="\(\/usr\/include\/boost\/\)"
+incl_pattern="\(\/usr\/include\/\)"
+bin_pattern="\(\/usr\/bin\/\)"
+
+while getopts ":hsbiym:" Option
+do
+  case $Option in
+    h )
+      usage
+      exit 0;
+      ;;
+    s )
+      echo "Adding source includes..."
+      if [ -n "$dir_pattern" ]; then
+          dir_pattern+="\|"
+      fi
+      dir_pattern+="$src_pattern"
+      ;;
+    b )
+      echo "Adding boost includes..."
+      if [ -n "$dir_pattern" ]; then
+          dir_pattern+="\|"
+      fi
+      dir_pattern+="$boost_pattern"
+      ;;
+    i )
+      echo "Adding /usr/include includes..."
+      if [ -n "$dir_pattern" ]; then
+          dir_pattern+="\|"
+      fi
+      dir_pattern+="$incl_pattern"
+      ;;
+    y )
+      echo "Adding bin includes..."
+      if [ -n "$dir_pattern" ]; then
+          dir_pattern+="\|"
+      fi
+      dir_pattern+="$bin_pattern"
+      ;;
+    m )
+      echo "Matching against pattern:" 
+      dir_pattern="$OPTARG"
+      echo "$dir_pattern"
+      ;;
+  esac
+done
+shift $(($OPTIND - 1))
+
+echo "Final pattern:" "$dir_pattern"
+
+INCLUDE_STR="-Isrc -I/usr/include/SDL -I/usr/include -I/usr/include/pango-1.0 -I/usr/include/cairo -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/usr/include/pixman-1 -I/usr/include/freetype2 -I/usr/include/libpng12 -I/usr/include/dbus-1.0 -I/usr/lib/x86_64-linux-gnu/dbus-1.0/include -I/usr/include/fribidi"
+
+echo "Building header include database in wesnoth/headers/..."
+[ -d headers ] || mkdir headers
+pwd
+#find src/ -type f -print0 | xargs -0 ./build_header.sh
+cd src
+for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
+    if [ ! -f ../headers/"$file" ]; then
+        mkdir -p ../headers/"$file"
+        rmdir ../headers/"$file"
+    fi
+    cd ..
+    echo "src/${file:2}"
+    #read -p "asdf"
+    clang++ -H $INCLUDE_STR "src/${file:2}" 2>&1 >/dev/null | sed -n '/^\.*\. / p' | sed -e 's/^\.* //g' -e ':loop' -e 's|/[[:alnum:]_-\.]*/\.\./|/|g' -e 't loop' | sed -n '/^'"$dir_pattern"'/ p' | sort | uniq >headers/"${file:2}"
+    cd src
+done
+cd ..
+echo "ranking headers"
+find headers/ -type f -exec cat {} + | sort | uniq -c | sort -k1 --numeric --reverse > "header_rank.log"
+echo "wrote to header_rank.log"
+echo "Finished."
--- a/utils/headers/header_times.sh
+++ b/utils/headers/header_times.sh
@ -0,0 +1,53 @@
+#!/bin/bash
+#Compute header times. Takes an scons build log with debug=time on, file name as first and only arg.
+set -e
+
+if [[ "$#" -ne 1 ]]; then
+    echo "Usage:" $0 " [scons-log-file]"
+    echo
+    echo "Ranks headers according to the aggregate build time of compilation units which"
+    echo "read them. In other words, the ranking answers the question 'if I stopped a"
+    echo "wesnoth build at a random point in time, which headers are most likely to have"
+    echo "been read by that compilation unit?'"
+    echo
+    echo "Expects to take the name of a log file from an scons build (with debug=time"
+    echo "option passed in) as first and only arg. This file must be located at the root"
+    echo "of the wesnoth repository directory, and the argument should just be its name"
+    echo "and extension."
+    echo 
+    echo "Expects the current working directory to be the root directory of the repo."
+    echo
+    echo "Example Usage:"
+    echo
+    echo -e "\t./build_headers.sh -s"
+    echo -e "\t./header_times.sh travis_log_sample.log"
+    echo
+    exit 1;
+fi
+
+rm -r headers-annotated
+cp -fR headers headers-annotated
+cd headers-annotated/
+for file in `find . -name "*.cpp" -type f -print0 | xargs -0`; do
+    echo "src/${file:2}"
+    if grep -q "src/${file:2}$" "../$1"; then
+	#echo "match:" '\_ src/'"${file:2}$"'_ { N; s/.*\n//p; }'
+        header_time=$(cat "../$1" | sed -n '\| src/'"${file:2}$"'| { N; s|.*\n||p; }' | sed -n 's/.*\( [0-9\.]* \).*/\1/p' )
+        #echo "header time:" "$header_time"
+        sed -i 's/^.*$/& '"$header_time"'/' "${file:2}"
+    else
+        rm "${file:2}"
+    fi
+done
+cd ..
+echo "Summing results..."
+find headers-annotated/ -name "*.cpp" -type f -exec cat {} + | sort -s -g -k 1,1 | awk '{
+    arr[$1]+=$2
+   }
+   END {
+     for (key in arr) printf("%s\t%s\n", arr[key], key )
+   }' \
+   | sort -k1 --numeric --reverse > "header_time_rank.log"
+echo "wrote to header_time_rank.log"
+echo "Finished."
+less header_time_rank.log
--- a/utils/headers/readme
+++ b/utils/headers/readme
@ -0,0 +1,15 @@
+This directory contains tools to help analyze header dependencies.
+
+The first script is "build_headers". It runs clang with -H option,
+to generate header dependencies, over the entire source directory,
+and builds out of tree copies of all files with a listing of all
+headers they directly or indirectly include. You may run it with
+various filters for level of detail.
+
+Build headers will generate a sorted "rank" file which indicates
+how many compilation units include each header.
+
+The second script is "header_times". It takes a compilation log,
+generated by scons with debug=time option (such as found on
+travis), and builds a second ranking in which compilation units
+are weighted by the total time that they take.