Travis: Cache toolchain

This should give a significant boost to Travis speeds, because most of the
compile time is spent building the toolchain over and over again.
However, the toolchain (or libc or libm) changes only rarely,
so most rebuilds can skip this step.

The hashing has been put into a separate file to keep it
as decoupled as possible from BuiltIt.sh.
This commit is contained in:
Ben Wiederhake 2020-03-07 17:38:16 +01:00 committed by Andreas Kling
parent 0edae63cc0
commit 36ba0a35ee
Notes: sideshowbarker 2024-07-19 08:50:07 +09:00
4 changed files with 182 additions and 3 deletions

View file

@ -9,6 +9,7 @@ compiler:
cache:
directories:
- /var/cache/apt/archives/*.deb
- Toolchain/Cache/
notifications:
irc:
@ -30,7 +31,8 @@ before_install:
script:
- cd Toolchain
- ./BuildIt.sh
- TRY_USE_LOCAL_TOOLCHAIN=y ./BuildIt.sh
- cd ../Kernel
- ./makeall.sh
- ../Meta/lint-shell-scripts.sh
- du -ch ../Toolchain/Cache/* || true

View file

@ -1,3 +1,5 @@
# Created by QEMU build
config-temp
config.log
# For caching the entire toolchain (useful on Travis)
Cache/

View file

@ -1,8 +1,10 @@
#!/usr/bin/env bash
set -e
# This file will need to be run in bash, for now.
# === CONFIGURATION AND SETUP ===
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo "$DIR"
@ -43,6 +45,44 @@ GCC_NAME="gcc-$GCC_VERSION"
GCC_PKG="${GCC_NAME}.tar.gz"
GCC_BASE_URL="http://ftp.gnu.org/gnu/gcc"
# === CHECK CACHE AND REUSE ===
pushd "$DIR"
if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then
echo "Checking cached toolchain:"
DEPS_CONFIG="
uname=$(uname),TARGET=${TARGET},
BuildItHash=$($MD5SUM $(basename $0)),
MAKE=${MAKE},MD5SUM=${MD5SUM},NPROC=${NPROC},
CC=${CC},CXX=${CXX},with_gmp=${with_gmp},LDFLAGS=${LDFLAGS},
BINUTILS_VERSION=${BINUTILS_VERSION},BINUTILS_MD5SUM=${BINUTILS_MD5SUM},
GCC_VERSION=${GCC_VERSION},GCC_MD5SUM=${GCC_MD5SUM}"
echo "Config is:${DEPS_CONFIG}"
if ! DEPS_HASH=$($DIR/ComputeDependenciesHash.sh $MD5SUM <<<"${DEPS_CONFIG}"); then
echo "Dependency hashing failed"
echo "Will rebuild toolchain from scratch, and NOT SAVE THE RESULT."
echo "Someone should look into this, but for now it'll work, albeit inefficient."
# Should be empty anyway, but just to make sure:
DEPS_HASH=""
elif [ -r "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then
echo "Cache at Cache/ToolchainLocal_${DEPS_HASH}.tar.gz exists!"
echo "Extracting toolchain from cache:"
tar xzf "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz"
echo "Done 'building' the toolchain."
exit 0
else
echo "Cache at Cache/ToolchainLocal_${DEPS_HASH}.tar.gz does not exist."
echo "Will rebuild toolchain from scratch, and save the result."
fi
fi
popd
# === DOWNLOAD AND PATCH ===
pushd "$DIR/Tarballs"
md5="$($MD5SUM $BINUTILS_PKG | cut -f1 -d' ')"
echo "bu md5='$md5'"
@ -98,8 +138,10 @@ pushd "$DIR/Tarballs"
popd
mkdir -p "$PREFIX"
# === COMPILE AND INSTALL ===
mkdir -p "$PREFIX"
mkdir -p "$DIR/Build/binutils"
mkdir -p "$DIR/Build/gcc"
@ -162,3 +204,25 @@ pushd "$DIR/Build/"
popd
popd
# == SAVE TO CACHE ==
pushd "$DIR"
if [ "${TRY_USE_LOCAL_TOOLCHAIN}" = "y" ] ; then
# TODO: Compress with -z. It's factor 3, and costs no time.
echo "Caching toolchain:"
if [ -z "${DEPS_HASH}" ] ; then
echo "NOT SAVED, because hashing failed."
echo "It's computed in the beginning; see there for the error message."
elif [ -e "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" ] ; then
# Note: This checks for *existence*. Initially we checked for
# *readability*. If Travis borks permissions, there's not much we can do.
echo "Cache exists but was not used?!"
echo "Not touching cache then."
else
mkdir -p Cache/
tar czf "Cache/ToolchainLocal_${DEPS_HASH}.tar.gz" Local/
fi
fi
popd

View file

@ -0,0 +1,111 @@
#!/usr/bin/env bash
set -eu
# This file will need to be run in bash, for now.
if [ $# -lt 1 ] ; then
echo "USAGE: echo \"YOURCONFIG\" | $0 <HASH-INVOCATION>" >&2
echo "Example: echo \"uname=Linux,TARGET=i686-pc-serenity\" | $0 md5sum" >&2
echo "Example: echo \"uname=OpenBSD,TARGET=i686-pc-serenity\" | $0 md5 -q" >&2
exit 1
fi
DIR=$( cd "$( dirname "$0" )" && pwd )
cd "${DIR}/.."
if [ ! -r LICENSE ] ; then
echo "$0: Got confused by the directories, giving up." >&2
exit 1
fi
# Ensure cleanup
DEPLIST_FILE=$(mktemp /tmp/serenity_deps_XXXXXXXX.lst)
function finish {
rm -f "${DEPLIST_FILE}"
}
trap finish EXIT
# libstdc++ depends on libc and libm, so we pessimistically assume it depends
# on *all* of their implementation and recursive dependencies.
# Scan all files for potential dependencies.
# Thinking in graphs, this computes the edge list:
cat <(find AK/ Libraries/ Servers/ Kernel/ -name '*.h') \
<(find Libraries/LibC/ Libraries/LibM/ -name '*.cpp' ! -name 'Test*.cpp' ) | \
xargs grep -F '#include ' | \
sed -r \
-e 's,^(.*/)([^/]+:)#include "(.*)",\1\2\1\3,' \
-e 's^#include <(Kernel/.*)>^\1^' \
-e 's^#include <(AK/.*)>^\1^' \
-e 's^#include <(Lib[A-Za-z]+/.*)>^Libraries/\1^' \
-e 's^#include <((bits|netinet|sys|arpa|net)/.*)>^Libraries/LibC/\1^' \
-e 's^#include <fd_set.h>^Libraries/LibC/fd_set.h^' \
-e 's^#include <([a-z]{3,10}(_numbers)?\.h)>^Libraries/LibC/\1^' \
-e 's^#include <([A-Z][a-z]+Server/.*)>^Servers/\1^' \
-e 's^#include <(.*)>^UNRESOLVED_I/\1^' \
-e 's^#include "(.*)"^UNRESOLVED_L/\1^' > "${DEPLIST_FILE}"
# Some #include's cannot be resolved, like <chrono>. However, these are only
# a problem if they turn up as a transitive dependency of libc and libm.
# We will check for that when the time comes.
# The initial guess is pessimistic: *all* of libc and libm.
FILE_LIST=$(find Libraries/LibC/ Libraries/LibM/ \( -name '*.cpp' -o -name '*.c' -o -name '*.h' \) ! -name 'Test*')
echo "$0: Exploring dependencies of libstdc++" >&2
FILE_LIST_COMPLETE="n"
# In each iteration, we extend FILE_LIST by the dependencies not listed yet in
# FILE_LIST. Note that the results are always semantically the same,
# but the order depends on the initial `find` runs.
for _ in $(seq 10) ; do
FILE_REGEX=$(echo "${FILE_LIST}" | sed -zr -e 's,\n$,,' -e 's,\.,\\.,g' -e 's,\n,|,g')
FURTHER_FILE_LIST=$(grep -P "^(${FILE_REGEX}):" "${DEPLIST_FILE}" | grep -Pv ":(${FILE_REGEX})\$" | sed -re 's,^.*:(.*)$,\1,' | sort -u)
if [ -n "${FURTHER_FILE_LIST}" ] ; then
# FILE_LIST should grow to a maximum of "number of all .cpp and .c and .h files",
# i.e. roughly 700 lines. This should be managable, even as the project grows.
FILE_LIST="${FILE_LIST}
${FURTHER_FILE_LIST}"
else
FILE_LIST_COMPLETE="y"
break
fi
done
FURTHER_FILE_LIST=""
FILE_REGEX=""
if [ "${FILE_LIST_COMPLETE}" != "y" ] ; then
# Dependency chains might grow very long. Also, if for some reason we fail
# to filter out the already listed files, the FILE_LIST would grow
# exponentially. Both of these unpleasant cases are handled by capping the
# iteration count to 10 and giving up:
echo "$0: Dependencies don't seem to converge, giving up." >&2
exit 1
fi
# Sort for reproducability,
FILE_LIST=$(echo "${FILE_LIST}" | LC_ALL=C sort -u)
if grep -F 'UNRESOLVED' <<EOLIST >&2 ; then
${FILE_LIST}
EOLIST
echo "$0: Unresolved dependency, giving up."
exit 1
fi
echo "$0: Computing hashes" >&2
# "$@" is the md5sum invocation. The piping might hide non-zero exit-codes,
# but thankfully only the first command can reasonably fail.
# Also, abuse the deplist file as a temporary buffer.
cat /dev/stdin > "${DEPLIST_FILE}"
HASHES=$(xargs "$@" <<EOLIST
${FILE_LIST}
Toolchain/ComputeDependenciesHash.sh
${DEPLIST_FILE}
EOLIST
)
# Caller (probably BuildIt.sh) should inject it's own hash via stdin.
# Mask the temporary (= non-reproducable) name of the DEPLIST_FILE:
HASHES=$(echo "${HASHES}" | sed -re 's,/tmp/serenity_deps_........\.lst,CONFIG,')
echo "$0: Hashes are:" >&2
echo "${HASHES}" >&2
echo "$0: Toolchain hash:" >&2
cat <<EOHASH | "$@" - | cut -f1 -d' ' | tee /dev/stderr
${HASHES}
EOHASH
echo "$0: Great success!" >&2