aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoel Rosdahl <joel@rosdahl.net>2020-02-20 21:24:15 +0100
committerJoel Rosdahl <joel@rosdahl.net>2020-02-23 21:31:55 +0100
commite8444a3893cb339d97e25fd70496e448b3da18bb (patch)
tree6a0704b3ccfb7cfefc98d3339b79ccabb859c136
parentcc26cb33156f53cb66b1f70a9f2430f7a8ceae57 (diff)
downloadccache-e8444a3893cb339d97e25fd70496e448b3da18bb.tar.gz
ccache-e8444a3893cb339d97e25fd70496e448b3da18bb.tar.bz2
ccache-e8444a3893cb339d97e25fd70496e448b3da18bb.zip
Don’t use realpath(3) for normalization when computing relative paths
The current working directory (CWD) can come from two sources: Either the return value of getcwd(3) (“actual CWD” below) or the environment variable $PWD (“apparent CWD” below). The former is returned by e.g. $(CURDIR) in Makefiles and by “pwd -P” and is always in normalized form (no “.” or “..” parts or extra slashes). The latter is returned by “echo $PWD” or “pwd” and can potentially be in unnormalized form on some systems. The actual CWD and apparent CWD may also differ if there are symlinks in the path. Absolute paths to files given to ccache can therefore be based on either of these CWD forms. When computing relative paths under the base directory the CWD needs be in normalized form for the algorithm to be reasonably simple. 2df269a3 solved a bug with an unnormalized apparent CWD by using realpath(3) for normalization. Using realpath also makes the algorithm correct in the presence of symlinks. It however also means that all symlinks (both in CWD and in command line arguments) are dereferenced. The downside of this is that if either of the symlink targets contain specific names (such as build ID, date, username or similar) then the relative paths will also contain those specific path names, leading to cache misses. Solve this by: - Performing normalization without using realpath, i.e. without expanding symlinks. - Computing a relative path based on normalized CWD and normalized path. - Checking whether the relative path resolves to the same i-node as the original path. If it does, use it, otherwise just use the original path (and take a potential cache miss). - Doing the above calculation both for the actual and the apparent CWD and choose the best one. This solves the problem that PR #491 intended to address in a better way.
-rw-r--r--src/Util.hpp4
-rw-r--r--src/ccache.cpp46
-rw-r--r--test/suites/basedir.bash87
3 files changed, 109 insertions, 28 deletions
diff --git a/src/Util.hpp b/src/Util.hpp
index 447f7f88..26d3af55 100644
--- a/src/Util.hpp
+++ b/src/Util.hpp
@@ -115,7 +115,7 @@ void for_each_level_1_subdir(const std::string& cache_dir,
const ProgressReceiver& progress_receiver);
// Return current working directory (CWD) as returned from getcwd(3) (i.e.,
-// canonical path without symlink parts). Returns the empty string on error.
+// normalized path without symlink parts). Returns the empty string on error.
std::string get_actual_cwd();
// Return current working directory (CWD) by reading the environment variable
@@ -228,7 +228,7 @@ std::string read_file(const std::string& path);
std::string read_link(const std::string& path);
#endif
-// Return a canonicalized absolute path of `path`. On error (e.g. if the `path`
+// Return a normalized absolute path of `path`. On error (e.g. if the `path`
// doesn't exist) the empty string is returned if return_empty_on_error is true,
// otherwise `path` unmodified.
std::string real_path(const std::string& path,
diff --git a/src/ccache.cpp b/src/ccache.cpp
index 8ceb6c0b..70707c00 100644
--- a/src/ccache.cpp
+++ b/src/ccache.cpp
@@ -634,32 +634,36 @@ make_relative_path(const Context& ctx, string_view path)
}
#endif
- // Util::real_path only works for existing paths, so if path doesn't exist,
- // try x_dirname(path) and assemble the path afterwards. We only bother to try
- // canonicalizing one of these two paths since a compiler path argument
- // typically only makes sense if path or x_dirname(path) exists.
-
+ // The algorithm for computing relative paths below only works for existing
+ // paths. If the path doesn't exist, find the first ancestor directory that
+ // does exist and assemble the path again afterwards.
+ string_view original_path = path;
std::string path_suffix;
+ Stat path_stat;
+ while (!(path_stat = Stat::stat(std::string(path)))) {
+ path = Util::dir_name(path);
+ }
+ path_suffix = std::string(original_path.substr(path.length()));
- if (!Stat::stat(std::string(path))) {
- // path doesn't exist.
- string_view dir = Util::dir_name(path);
- // Find the nearest existing directory in path.
- while (!Stat::stat(std::string(dir))) {
- dir = Util::dir_name(dir);
- }
- path_suffix = std::string(path.substr(dir.length()));
- path = dir;
+ std::string path_str(path);
+ std::string normalized_path = Util::normalize_absolute_path(path_str);
+ std::vector<std::string> relpath_candidates = {
+ Util::get_relative_path(ctx.actual_cwd, normalized_path),
+ Util::get_relative_path(ctx.apparent_cwd, normalized_path),
+ };
+ // Move best (= shortest) match first:
+ if (relpath_candidates[0].length() > relpath_candidates[1].length()) {
+ std::swap(relpath_candidates[0], relpath_candidates[1]);
}
- std::string canon_path = Util::real_path(std::string(path), true);
- if (!canon_path.empty()) {
- std::string relpath = Util::get_relative_path(ctx.actual_cwd, canon_path);
- return relpath + path_suffix;
- } else {
- // path doesn't exist, so leave it as it is.
- return std::string(path);
+ for (const auto& relpath : relpath_candidates) {
+ if (Stat::stat(relpath).same_inode_as(path_stat)) {
+ return relpath + path_suffix;
+ }
}
+
+ // No match so nothing else to do than to return the unmodified path.
+ return std::string(original_path);
}
// This function reads and hashes a file. While doing this, it also does these
diff --git a/test/suites/basedir.bash b/test/suites/basedir.bash
index decfa893..d8513b36 100644
--- a/test/suites/basedir.bash
+++ b/test/suites/basedir.bash
@@ -49,18 +49,17 @@ if ! $HOST_OS_WINDOWS && ! $HOST_OS_CYGWIN; then
TEST "Path normalization"
cd dir1
- CCACHE_BASEDIR="`pwd`" $CCACHE_COMPILE -I`pwd`/include -c src/test.c
+ CCACHE_DEBUG=1 CCACHE_BASEDIR="`pwd`" $CCACHE_COMPILE -I$(pwd)/include -c src/test.c
+ mv test.o*text first.text
expect_stat 'cache hit (direct)' 0
expect_stat 'cache hit (preprocessed)' 0
expect_stat 'cache miss' 1
mkdir subdir
- ln -s `pwd`/include subdir/symlink
# Rewriting triggered by CCACHE_BASEDIR should handle paths with multiple
- # slashes, redundant "/." parts and "foo/.." parts correctly. Note that the
- # ".." part of the path is resolved after the symlink has been resolved.
- CCACHE_BASEDIR=`pwd` $CCACHE_COMPILE -I`pwd`//./subdir/symlink/../include -c `pwd`/src/test.c
+ # slashes, redundant "/." parts and "foo/.." parts correctly.
+ CCACHE_DEBUG=1 CCACHE_BASEDIR=$(pwd) $CCACHE_COMPILE -I$(pwd)//./subdir/../include -c $(pwd)/src/test.c
expect_stat 'cache hit (direct)' 1
expect_stat 'cache hit (preprocessed)' 0
expect_stat 'cache miss' 1
@@ -113,6 +112,84 @@ EOF
fi
# -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS && ! $HOST_OS_CYGWIN; then
+ TEST "Symlinked build dir inside source dir"
+
+ mkdir build1
+ ln -s $(pwd)/build1 dir1/src/build
+
+ mkdir build2
+ ln -s $(pwd)/build2 dir2/src/build
+
+ # The file structure now looks like this:
+ #
+ # build1
+ # dir1/include/test.h
+ # dir1/src/test.c
+ # dir1/src/build -> /absolute/path/to/build1
+ #
+ # build2
+ # dir2/include/test.h
+ # dir2/src/test.c
+ # dir2/src/build -> /absolute/path/to/build2
+
+ cd dir1/src
+ CCACHE_DEBUG=1 CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd)/../include -c $(pwd)/test.c -o $(pwd)/build/test.o
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache miss' 1
+
+ cd ../../dir2/src
+ # Apparent CWD:
+ CCACHE_DEBUG=1 CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd)/../include -c $(pwd)/test.c -o $(pwd)/build/test.o
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache miss' 1
+
+ # Actual CWD (e.g. from $(CURDIR) in a Makefile):
+ CCACHE_DEBUG=1 CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd -P)/../include -c $(pwd -P)/test.c -o $(pwd -P)/build/test.o
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache miss' 1
+fi
+
+ # -------------------------------------------------------------------------
+if ! $HOST_OS_WINDOWS && ! $HOST_OS_CYGWIN; then
+ TEST "Symlinked source dir inside build dir"
+
+ mkdir build1
+ ln -s $(pwd)/dir1 build1/src
+
+ mkdir build2
+ ln -s $(pwd)/dir2 build2/src
+
+ # The file structure now looks like this:
+ #
+ # build1
+ # build1/src -> /absolute/path/to/dir1
+ # dir1/include/test.h
+ # dir1/src/test.c
+ #
+ # build2
+ # build2/src -> /absolute/path/to/dir2
+ # dir2/include/test.h
+ # dir2/src/test.c
+
+ cd build1
+ CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd)/src/include -c $(pwd)/src/src/test.c -o $(pwd)/test.o
+ expect_stat 'cache hit (direct)' 0
+ expect_stat 'cache miss' 1
+
+ cd ../build2
+ # Apparent CWD:
+ CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd)/src/include -c $(pwd)/src/src/test.c -o $(pwd)/test.o
+ expect_stat 'cache hit (direct)' 1
+ expect_stat 'cache miss' 1
+
+ # Actual CWD:
+ CCACHE_BASEDIR=/ $CCACHE_COMPILE -I$(pwd -P)/src/include -c $(pwd -P)/src/src/test.c -o $(pwd -P)/test.o
+ expect_stat 'cache hit (direct)' 2
+ expect_stat 'cache miss' 1
+fi
+
+ # -------------------------------------------------------------------------
TEST "Rewriting in stderr"
cat <<EOF >stderr.h