Revert "Revert "Cacheable, multithreaded finder.""

Bug: 62455338 Test: m -j This reverts commit d1abeb9d982b11fdf4047176d213acc8197c375f. Change-Id: I9f73031636157511b5f1c6ce8a205e9bc91669ff
author: Jeff Gaston <jeffrygaston@google.com> 2017-08-09 18:25:28 -0700
committer: Jeff Gaston <jeffrygaston@google.com> 2017-08-10 12:20:30 -0700
commit: f1fd45e7848bd5a0be26ee61579d110637449930 (patch)
tree: a5fe80c254e534e0ce99c355f4a4ea0b69b66d7a /finder
parent: c3f32070dc7ba229a6f883b12e4799995d9f9526 (diff)
download: build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.tar.gz
build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.tar.bz2
build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.zip
5 files changed, 3187 insertions, 0 deletions
diff --git a/finder/Android.bp b/finder/Android.bp
new file mode 100644
index 00000000..b5c0e130
--- /dev/null
+++ b/finder/Android.bp
@@ -0,0 +1,37 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//
+// fast, parallel, caching implementation of `find`
+//
+
+subdirs = [
+    "cmd"
+]
+
+bootstrap_go_package {
+    name: "soong-finder",
+    pkgPath: "android/soong/finder",
+    srcs: [
+        "finder.go",
+    ],
+    testSrcs: [
+        "finder_test.go",
+    ],
+    deps: [
+      "soong-fs",
+    ],
+}
+
+
diff --git a/finder/cmd/Android.bp b/finder/cmd/Android.bp
new file mode 100644
index 00000000..9dc84ae4
--- /dev/null
+++ b/finder/cmd/Android.bp
@@ -0,0 +1,29 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//
+// fast, parallel, caching implementation of `find`
+//
+
+blueprint_go_binary {
+    name: "finder",
+    srcs: [
+        "finder.go",
+    ],
+    deps: [
+        "soong-finder"
+    ],
+}
+
+
diff --git a/finder/cmd/finder.go b/finder/cmd/finder.go
new file mode 100644
index 00000000..9da16602
--- /dev/null
+++ b/finder/cmd/finder.go
@@ -0,0 +1,149 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"runtime/pprof"
+	"sort"
+	"strings"
+	"time"
+
+	"android/soong/finder"
+	"android/soong/fs"
+)
+
+var (
+	// configuration of what to find
+	excludeDirs     string
+	filenamesToFind string
+	pruneFiles      string
+
+	// other configuration
+	cpuprofile    string
+	verbose       bool
+	dbPath        string
+	numIterations int
+)
+
+func init() {
+	flag.StringVar(&cpuprofile, "cpuprofile", "",
+		"filepath of profile file to write (optional)")
+	flag.BoolVar(&verbose, "v", false, "log additional information")
+	flag.StringVar(&dbPath, "db", "", "filepath of cache db")
+
+	flag.StringVar(&excludeDirs, "exclude-dirs", "",
+		"comma-separated list of directory names to exclude from search")
+	flag.StringVar(&filenamesToFind, "names", "",
+		"comma-separated list of filenames to find")
+	flag.StringVar(&pruneFiles, "prune-files", "",
+		"filenames that if discovered will exclude their entire directory "+
+			"(including sibling files and directories)")
+	flag.IntVar(&numIterations, "count", 1,
+		"number of times to run. This is intended for use with --cpuprofile"+
+			" , to increase profile accuracy")
+}
+
+var usage = func() {
+	fmt.Printf("usage: finder -name <fileName> --db <dbPath> <searchDirectory> [<searchDirectory>...]\n")
+	flag.PrintDefaults()
+}
+
+func main() {
+	err := run()
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "%v\n", err.Error())
+		os.Exit(1)
+	}
+}
+
+func stringToList(input string) []string {
+	return strings.Split(input, ",")
+}
+
+func run() error {
+	startTime := time.Now()
+	flag.Parse()
+
+	if cpuprofile != "" {
+		f, err := os.Create(cpuprofile)
+		if err != nil {
+			return fmt.Errorf("Error opening cpuprofile: %s", err)
+		}
+		pprof.StartCPUProfile(f)
+		defer f.Close()
+		defer pprof.StopCPUProfile()
+	}
+
+	var writer io.Writer
+	if verbose {
+		writer = os.Stderr
+	} else {
+		writer = ioutil.Discard
+	}
+
+	// TODO: replace Lshortfile with Llongfile when bug 63821638 is done
+	logger := log.New(writer, "", log.Ldate|log.Lmicroseconds|log.Lshortfile)
+
+	logger.Printf("Finder starting at %v\n", startTime)
+
+	rootPaths := flag.Args()
+	if len(rootPaths) < 1 {
+		usage()
+		return fmt.Errorf(
+			"Must give at least one <searchDirectory>")
+	}
+
+	workingDir, err := os.Getwd()
+	if err != nil {
+		return err
+	}
+	params := finder.CacheParams{
+		WorkingDirectory: workingDir,
+		RootDirs:         rootPaths,
+		ExcludeDirs:      stringToList(excludeDirs),
+		PruneFiles:       stringToList(pruneFiles),
+		IncludeFiles:     stringToList(filenamesToFind),
+	}
+	if dbPath == "" {
+		usage()
+		return errors.New("Param 'db' must be nonempty")
+	}
+	matches := []string{}
+	for i := 0; i < numIterations; i++ {
+		matches = runFind(params, logger)
+	}
+	findDuration := time.Since(startTime)
+	logger.Printf("Found these %v inodes in %v :\n", len(matches), findDuration)
+	sort.Strings(matches)
+	for _, match := range matches {
+		fmt.Println(match)
+	}
+	logger.Printf("End of %v inodes\n", len(matches))
+	logger.Printf("Finder completed in %v\n", time.Since(startTime))
+	return nil
+}
+
+func runFind(params finder.CacheParams, logger *log.Logger) (paths []string) {
+	service := finder.New(params, fs.OsFs, logger, dbPath)
+	defer service.Shutdown()
+	return service.FindAll()
+}
diff --git a/finder/finder.go b/finder/finder.go
new file mode 100644
index 00000000..ad85ee9a
--- /dev/null
+++ b/finder/finder.go
@@ -0,0 +1,1399 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package finder
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"android/soong/fs"
+	"errors"
+)
+
+// This file provides a Finder struct that can quickly search for files satisfying
+// certain criteria.
+// This Finder gets its speed partially from parallelism and partially from caching.
+// If a Stat call returns the same result as last time, then it means Finder
+// can skip the ReadDir call for that dir.
+
+// The primary data structure used by the finder is the field Finder.nodes ,
+// which is a tree of nodes of type *pathMap .
+// Each node represents a directory on disk, along with its stats, subdirectories,
+// and contained files.
+
+// The common use case for the Finder is that the caller creates a Finder and gives
+// it the same query that was given to it in the previous execution.
+// In this situation, the major events that take place are:
+// 1. The Finder begins to load its db
+// 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
+//    Calling Stat on each of these directories is generally a large fraction of the total time
+// 3. The Finder begins to construct a separate tree of nodes in each of its threads
+// 4. The Finder merges the individual node trees into the main node tree
+// 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
+//    These ReadDir calls might prompt additional Stat calls, etc
+// 6. The Finder waits for all loading to complete
+// 7. The Finder searches the cache for files matching the user's query (using multiple threads)
+
+// These are the invariants regarding concurrency:
+// 1. The public methods of Finder are threadsafe.
+//      The public methods are only performance-optimized for one caller at a time, however.
+//      For the moment, multiple concurrent callers shouldn't expect any better performance than
+//      multiple serial callers.
+// 2. While building the node tree, only one thread may ever access the <children> collection of a
+//    *pathMap at once.
+//    a) The thread that accesses the <children> collection is the thread that discovers the
+//       children (by reading them from the cache or by having received a response to ReadDir).
+//       1) Consequently, the thread that discovers the children also spawns requests to stat
+//          subdirs.
+//    b) Consequently, while building the node tree, no thread may do a lookup of its
+//       *pathMap via filepath because another thread may be adding children to the
+//       <children> collection of an ancestor node. Additionally, in rare cases, another thread
+//       may be removing children from an ancestor node if the children were only discovered to
+//       be irrelevant after calling ReadDir (which happens if a prune-file was just added).
+// 3. No query will begin to be serviced until all loading (both reading the db
+//    and scanning the filesystem) is complete.
+//    Tests indicate that it only takes about 10% as long to search the in-memory cache as to
+//    generate it, making this not a huge loss in performance.
+// 4. The parsing of the db and the initial setup of the pathMap tree must complete before
+//      beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
+
+// see cmd/finder.go or finder_test.go for usage examples
+
+// Update versionString whenever making a backwards-incompatible change to the cache file format
+const versionString = "Android finder version 1"
+
+// a CacheParams specifies which files and directories the user wishes be scanned and
+// potentially added to the cache
+type CacheParams struct {
+	// WorkingDirectory is used as a base for any relative file paths given to the Finder
+	WorkingDirectory string
+
+	// RootDirs are the root directories used to initiate the search
+	RootDirs []string
+
+	// ExcludeDirs are directory names that if encountered are removed from the search
+	ExcludeDirs []string
+
+	// PruneFiles are file names that if encountered prune their entire directory
+	// (including siblings)
+	PruneFiles []string
+
+	// IncludeFiles are file names to include as matches
+	IncludeFiles []string
+}
+
+// a cacheConfig stores the inputs that determine what should be included in the cache
+type cacheConfig struct {
+	CacheParams
+
+	// FilesystemView is a unique identifier telling which parts of which file systems
+	// are readable by the Finder. In practice its value is essentially username@hostname.
+	// FilesystemView is set to ensure that a cache file copied to another host or
+	// found by another user doesn't inadvertently get reused.
+	FilesystemView string
+}
+
+func (p *cacheConfig) Dump() ([]byte, error) {
+	bytes, err := json.Marshal(p)
+	return bytes, err
+}
+
+// a cacheMetadata stores version information about the cache
+type cacheMetadata struct {
+	// The Version enables the Finder to determine whether it can even parse the file
+	// If the version changes, the entire cache file must be regenerated
+	Version string
+
+	// The CacheParams enables the Finder to determine whether the parameters match
+	// If the CacheParams change, the Finder can choose how much of the cache file to reuse
+	// (although in practice, the Finder will probably choose to ignore the entire file anyway)
+	Config cacheConfig
+}
+
+type Logger interface {
+	Output(calldepth int, s string) error
+}
+
+// the Finder is the main struct that callers will want to use
+type Finder struct {
+	// configuration
+	DbPath              string
+	numDbLoadingThreads int
+	numSearchingThreads int
+	cacheMetadata       cacheMetadata
+	logger              Logger
+	filesystem          fs.FileSystem
+
+	// temporary state
+	threadPool *threadPool
+	mutex      sync.Mutex
+
+	// non-temporary state
+	modifiedFlag int32
+	nodes        pathMap
+}
+
+// New creates a new Finder for use
+func New(cacheParams CacheParams, filesystem fs.FileSystem,
+	logger Logger, dbPath string) *Finder {
+
+	numThreads := runtime.NumCPU() * 2
+	numDbLoadingThreads := numThreads
+	numSearchingThreads := numThreads
+
+	metadata := cacheMetadata{
+		Version: versionString,
+		Config: cacheConfig{
+			CacheParams:    cacheParams,
+			FilesystemView: filesystem.ViewId(),
+		},
+	}
+
+	finder := &Finder{
+		numDbLoadingThreads: numDbLoadingThreads,
+		numSearchingThreads: numSearchingThreads,
+		cacheMetadata:       metadata,
+		logger:              logger,
+		filesystem:          filesystem,
+
+		nodes:  *newPathMap("/"),
+		DbPath: dbPath,
+	}
+
+	finder.loadFromFilesystem()
+
+	finder.verbosef("Done parsing db\n")
+	return finder
+}
+
+// FindNamed searches for every cached file
+func (f *Finder) FindAll() []string {
+	return f.FindAt("/")
+}
+
+// FindNamed searches for every cached file under <rootDir>
+func (f *Finder) FindAt(rootDir string) []string {
+	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
+		return entries.DirNames, entries.FileNames
+	}
+	return f.FindMatching(rootDir, filter)
+}
+
+// FindNamed searches for every cached file named <fileName>
+func (f *Finder) FindNamed(fileName string) []string {
+	return f.FindNamedAt("/", fileName)
+}
+
+// FindNamedAt searches under <rootPath> for every file named <fileName>
+// The reason a caller might use FindNamedAt instead of FindNamed is if they want
+// to limit their search to a subset of the cache
+func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
+	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
+		matches := []string{}
+		for _, foundName := range entries.FileNames {
+			if foundName == fileName {
+				matches = append(matches, foundName)
+			}
+		}
+		return entries.DirNames, matches
+
+	}
+	return f.FindMatching(rootPath, filter)
+}
+
+// FindFirstNamed searches for every file named <fileName>
+// Whenever it finds a match, it stops search subdirectories
+func (f *Finder) FindFirstNamed(fileName string) []string {
+	return f.FindFirstNamedAt("/", fileName)
+}
+
+// FindFirstNamedAt searches for every file named <fileName>
+// Whenever it finds a match, it stops search subdirectories
+func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
+	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
+		matches := []string{}
+		for _, foundName := range entries.FileNames {
+			if foundName == fileName {
+				matches = append(matches, foundName)
+			}
+		}
+
+		if len(matches) > 0 {
+			return []string{}, matches
+		}
+		return entries.DirNames, matches
+	}
+	return f.FindMatching(rootPath, filter)
+}
+
+// FindMatching is the most general exported function for searching for files in the cache
+// The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
+// in place, removing file paths and directories as desired.
+// WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
+func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
+	// set up some parameters
+	scanStart := time.Now()
+	var isRel bool
+	workingDir := f.cacheMetadata.Config.WorkingDirectory
+
+	isRel = !filepath.IsAbs(rootPath)
+	if isRel {
+		rootPath = filepath.Join(workingDir, rootPath)
+	}
+
+	rootPath = filepath.Clean(rootPath)
+
+	// ensure nothing else is using the Finder
+	f.verbosef("FindMatching waiting for finder to be idle\n")
+	f.lock()
+	defer f.unlock()
+
+	node := f.nodes.GetNode(rootPath, false)
+	if node == nil {
+		f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
+			rootPath, f.cacheMetadata.Config.CacheParams)
+		// path is not found; don't do a search
+		return []string{}
+	}
+
+	// search for matching files
+	f.verbosef("Finder finding %v using cache\n", rootPath)
+	results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
+
+	// format and return results
+	if isRel {
+		for i := 0; i < len(results); i++ {
+			results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
+		}
+	}
+	sort.Strings(results)
+	f.verbosef("Found %v files under %v in %v using cache\n",
+		len(results), rootPath, time.Since(scanStart))
+	return results
+}
+
+// Shutdown saves the contents of the Finder to its database file
+func (f *Finder) Shutdown() {
+	f.verbosef("Shutting down\n")
+	if f.wasModified() {
+		err := f.dumpDb()
+		if err != nil {
+			f.verbosef("%v\n", err)
+		}
+	} else {
+		f.verbosef("Skipping dumping unmodified db\n")
+	}
+}
+
+// End of public api
+
+// joinCleanPaths is like filepath.Join but is faster because
+// joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
+func joinCleanPaths(base string, leaf string) string {
+	if base == "" {
+		return leaf
+	}
+	if base == "/" {
+		return base + leaf
+	}
+	if leaf == "" {
+		return base
+	}
+	return base + "/" + leaf
+}
+
+func (f *Finder) verbosef(format string, args ...interface{}) {
+	f.logger.Output(2, fmt.Sprintf(format, args...))
+}
+
+// loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
+func (f *Finder) loadFromFilesystem() {
+	f.threadPool = newThreadPool(f.numDbLoadingThreads)
+
+	err := f.startFromExternalCache()
+	if err != nil {
+		f.startWithoutExternalCache()
+	}
+
+	startTime := time.Now()
+	f.verbosef("Waiting for pending requests to complete\n")
+	f.threadPool.Wait()
+	f.verbosef("Is idle after %v\n", time.Now().Sub(startTime))
+	f.threadPool = nil
+}
+
+func (f *Finder) startFind(path string) {
+	if !filepath.IsAbs(path) {
+		path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
+	}
+	node := f.nodes.GetNode(path, true)
+	f.statDirAsync(node)
+}
+
+func (f *Finder) lock() {
+	f.mutex.Lock()
+}
+
+func (f *Finder) unlock() {
+	f.mutex.Unlock()
+}
+
+// a statResponse is the relevant portion of the response from the filesystem to a Stat call
+type statResponse struct {
+	ModTime int64
+	Inode   uint64
+	Device  uint64
+}
+
+// a pathAndStats stores a path and its stats
+type pathAndStats struct {
+	statResponse
+
+	Path string
+}
+
+// a dirFullInfo stores all of the relevant information we know about a directory
+type dirFullInfo struct {
+	pathAndStats
+
+	FileNames []string
+}
+
+// a PersistedDirInfo is the information about a dir that we save to our cache on disk
+type PersistedDirInfo struct {
+	// These field names are short because they are repeated many times in the output json file
+	P string   // path
+	T int64    // modification time
+	I uint64   // inode number
+	F []string // relevant filenames contained
+}
+
+// a PersistedDirs is the information that we persist for a group of dirs
+type PersistedDirs struct {
+	// the device on which each directory is stored
+	Device uint64
+	// the common root path to which all contained dirs are relative
+	Root string
+	// the directories themselves
+	Dirs []PersistedDirInfo
+}
+
+// a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
+type CacheEntry []PersistedDirs
+
+// a DirEntries lists the files and directories contained directly within a specific directory
+type DirEntries struct {
+	Path string
+
+	// elements of DirNames are just the dir names; they don't include any '/' character
+	DirNames []string
+	// elements of FileNames are just the file names; they don't include '/' character
+	FileNames []string
+}
+
+// a WalkFunc is the type that is passed into various Find functions for determining which
+// directories the caller wishes be walked. The WalkFunc is expected to decide which
+// directories to walk and which files to consider as matches to the original query.
+type WalkFunc func(DirEntries) (dirs []string, files []string)
+
+// a mapNode stores the relevant stats about a directory to be stored in a pathMap
+type mapNode struct {
+	statResponse
+	FileNames []string
+}
+
+// a pathMap implements the directory tree structure of nodes
+type pathMap struct {
+	mapNode
+
+	path string
+
+	children map[string]*pathMap
+
+	// number of descendent nodes, including self
+	approximateNumDescendents int
+}
+
+func newPathMap(path string) *pathMap {
+	result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
+		approximateNumDescendents: 1}
+	return result
+}
+
+// GetNode returns the node at <path>
+func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
+	if len(path) > 0 && path[0] == '/' {
+		path = path[1:]
+	}
+
+	node := m
+	for {
+		if path == "" {
+			return node
+		}
+
+		index := strings.Index(path, "/")
+		var firstComponent string
+		if index >= 0 {
+			firstComponent = path[:index]
+			path = path[index+1:]
+		} else {
+			firstComponent = path
+			path = ""
+		}
+
+		child, found := node.children[firstComponent]
+
+		if !found {
+			if createIfNotFound {
+				child = node.newChild(firstComponent)
+			} else {
+				return nil
+			}
+		}
+
+		node = child
+	}
+}
+
+func (m *pathMap) newChild(name string) (child *pathMap) {
+	path := joinCleanPaths(m.path, name)
+	newChild := newPathMap(path)
+	m.children[name] = newChild
+
+	return m.children[name]
+}
+
+func (m *pathMap) UpdateNumDescendents() int {
+	count := 1
+	for _, child := range m.children {
+		count += child.approximateNumDescendents
+	}
+	m.approximateNumDescendents = count
+	return count
+}
+
+func (m *pathMap) UpdateNumDescendentsRecursive() {
+	for _, child := range m.children {
+		child.UpdateNumDescendentsRecursive()
+	}
+	m.UpdateNumDescendents()
+}
+
+func (m *pathMap) MergeIn(other *pathMap) {
+	for key, theirs := range other.children {
+		ours, found := m.children[key]
+		if found {
+			ours.MergeIn(theirs)
+		} else {
+			m.children[key] = theirs
+		}
+	}
+	if other.ModTime != 0 {
+		m.mapNode = other.mapNode
+	}
+	m.UpdateNumDescendents()
+}
+
+func (m *pathMap) DumpAll() []dirFullInfo {
+	results := []dirFullInfo{}
+	m.dumpInto("", &results)
+	return results
+}
+
+func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
+	*results = append(*results,
+		dirFullInfo{
+			pathAndStats{statResponse: m.statResponse, Path: path},
+			m.FileNames},
+	)
+	for key, child := range m.children {
+		childPath := joinCleanPaths(path, key)
+		if len(childPath) == 0 || childPath[0] != '/' {
+			childPath = "/" + childPath
+		}
+		child.dumpInto(childPath, results)
+	}
+}
+
+// a semaphore can be locked by up to <capacity> callers at once
+type semaphore struct {
+	pool chan bool
+}
+
+func newSemaphore(capacity int) *semaphore {
+	return &semaphore{pool: make(chan bool, capacity)}
+}
+
+func (l *semaphore) Lock() {
+	l.pool <- true
+}
+
+func (l *semaphore) Unlock() {
+	<-l.pool
+}
+
+// A threadPool runs goroutines and supports throttling and waiting.
+// Without throttling, Go may exhaust the maximum number of various resources, such as
+// threads or file descriptors, and crash the program.
+type threadPool struct {
+	receivedRequests sync.WaitGroup
+	activeRequests   semaphore
+}
+
+func newThreadPool(maxNumConcurrentThreads int) *threadPool {
+	return &threadPool{
+		receivedRequests: sync.WaitGroup{},
+		activeRequests:   *newSemaphore(maxNumConcurrentThreads),
+	}
+}
+
+// Run requests to run the given function in its own goroutine
+func (p *threadPool) Run(function func()) {
+	p.receivedRequests.Add(1)
+	// If Run() was called from within a goroutine spawned by this threadPool,
+	// then we may need to return from Run() before having capacity to actually
+	// run <function>.
+	//
+	// It's possible that the body of <function> contains a statement (such as a syscall)
+	// that will cause Go to pin it to a thread, or will contain a statement that uses
+	// another resource that is in short supply (such as a file descriptor), so we can't
+	// actually run <function> until we have capacity.
+	//
+	// However, the semaphore used for synchronization is implemented via a channel and
+	// shouldn't require a new thread for each access.
+	go func() {
+		p.activeRequests.Lock()
+		function()
+		p.activeRequests.Unlock()
+		p.receivedRequests.Done()
+	}()
+}
+
+// Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
+func (p *threadPool) Wait() {
+	p.receivedRequests.Wait()
+}
+
+func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
+	// group each dirFullInfo by its Device, to avoid having to repeat it in the output
+	dirsByDevice := map[uint64][]PersistedDirInfo{}
+	for _, entry := range dirInfos {
+		_, found := dirsByDevice[entry.Device]
+		if !found {
+			dirsByDevice[entry.Device] = []PersistedDirInfo{}
+		}
+		dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
+			PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
+	}
+
+	cacheEntry := CacheEntry{}
+
+	for device, infos := range dirsByDevice {
+		// find common prefix
+		prefix := ""
+		if len(infos) > 0 {
+			prefix = infos[0].P
+		}
+		for _, info := range infos {
+			for !strings.HasPrefix(info.P+"/", prefix+"/") {
+				prefix = filepath.Dir(prefix)
+			}
+		}
+		// remove common prefix
+		for i := range infos {
+			suffix := strings.Replace(infos[i].P, prefix, "", 1)
+			if len(suffix) > 0 && suffix[0] == '/' {
+				suffix = suffix[1:]
+			}
+			infos[i].P = suffix
+		}
+
+		// turn the map (keyed by device) into a list of structs with labeled fields
+		// this is to improve readability of the output
+		cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
+	}
+
+	// convert to json.
+	// it would save some space to use a different format than json for the db file,
+	// but the space and time savings are small, and json is easy for humans to read
+	bytes, err := json.Marshal(cacheEntry)
+	return bytes, err
+}
+
+func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
+	var cacheEntry CacheEntry
+	err := json.Unmarshal(bytes, &cacheEntry)
+	if err != nil {
+		return nil, err
+	}
+
+	// convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
+	capacity := 0
+	for _, element := range cacheEntry {
+		capacity += len(element.Dirs)
+	}
+	nodes := make([]dirFullInfo, capacity)
+	count := 0
+	for _, element := range cacheEntry {
+		for _, dir := range element.Dirs {
+			path := joinCleanPaths(element.Root, dir.P)
+
+			nodes[count] = dirFullInfo{
+				pathAndStats: pathAndStats{
+					statResponse: statResponse{
+						ModTime: dir.T, Inode: dir.I, Device: element.Device,
+					},
+					Path: path},
+				FileNames: dir.F}
+			count++
+		}
+	}
+	return nodes, nil
+}
+
+// We use the following separator byte to distinguish individually parseable blocks of json
+// because we know this separator won't appear in the json that we're parsing.
+//
+// The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
+// - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
+// - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
+//   character.
+//
+// We know that the newline character will never appear in our json string, because:
+// - If a newline character appears as part of a data string, then json encoding will
+//   emit two characters instead: '\' and 'n'.
+// - The json encoder that we use doesn't emit the optional newlines between any of its
+//   other outputs.
+const lineSeparator = byte('\n')
+
+func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
+	return reader.ReadBytes(lineSeparator)
+}
+
+// validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
+func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
+	cacheVersionBytes, err := f.readLine(cacheReader)
+	if err != nil {
+		f.verbosef("Failed to read database header; database is invalid\n")
+		return false
+	}
+	if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
+		cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
+	}
+	cacheVersionString := string(cacheVersionBytes)
+	currentVersion := f.cacheMetadata.Version
+	if cacheVersionString != currentVersion {
+		f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
+		return false
+	}
+
+	cacheParamBytes, err := f.readLine(cacheReader)
+	if err != nil {
+		f.verbosef("Failed to read database search params; database is invalid\n")
+		return false
+	}
+
+	if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
+		cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
+	}
+
+	currentParamBytes, err := f.cacheMetadata.Config.Dump()
+	if err != nil {
+		panic("Finder failed to serialize its parameters")
+	}
+	cacheParamString := string(cacheParamBytes)
+	currentParamString := string(currentParamBytes)
+	if cacheParamString != currentParamString {
+		f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
+		return false
+	}
+	return true
+}
+
+// loadBytes compares the cache info in <data> to the state of the filesystem
+// loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
+func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
+
+	helperStartTime := time.Now()
+
+	cachedNodes, err := f.parseCacheEntry(data)
+	if err != nil {
+		return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
+	}
+
+	unmarshalDate := time.Now()
+	f.verbosef("Unmarshaled %v objects for %v in %v\n",
+		len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
+
+	tempMap := newPathMap("/")
+	stats := make([]statResponse, len(cachedNodes))
+
+	for i, node := range cachedNodes {
+		// check the file system for an updated timestamp
+		stats[i] = f.statDirSync(node.Path)
+	}
+
+	dirsToWalk = []string{}
+	for i, cachedNode := range cachedNodes {
+		updated := stats[i]
+		// save the cached value
+		container := tempMap.GetNode(cachedNode.Path, true)
+		container.mapNode = mapNode{statResponse: updated}
+
+		// if the metadata changed and the directory still exists, then
+		// make a note to walk it later
+		if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
+			f.setModified()
+			// make a note that the directory needs to be walked
+			dirsToWalk = append(dirsToWalk, cachedNode.Path)
+		} else {
+			container.mapNode.FileNames = cachedNode.FileNames
+		}
+	}
+	// count the number of nodes to improve our understanding of the shape of the tree,
+	// thereby improving parallelism of subsequent searches
+	tempMap.UpdateNumDescendentsRecursive()
+
+	f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
+	return tempMap, dirsToWalk, nil
+}
+
+// startFromExternalCache loads the cache database from disk
+// startFromExternalCache waits to return until the load of the cache db is complete, but
+// startFromExternalCache does not wait for all every listDir() or statDir() request to complete
+func (f *Finder) startFromExternalCache() (err error) {
+	startTime := time.Now()
+	dbPath := f.DbPath
+
+	// open cache file and validate its header
+	reader, err := f.filesystem.Open(dbPath)
+	if err != nil {
+		return errors.New("No data to load from database\n")
+	}
+	bufferedReader := bufio.NewReader(reader)
+	if !f.validateCacheHeader(bufferedReader) {
+		return errors.New("Cache header does not match")
+	}
+	f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
+
+	// read the file and spawn threads to process it
+	nodesToWalk := [][]*pathMap{}
+	mainTree := newPathMap("/")
+
+	// read the blocks and stream them into <blockChannel>
+	type dataBlock struct {
+		id   int
+		err  error
+		data []byte
+	}
+	blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
+	readBlocks := func() {
+		index := 0
+		for {
+			// It takes some time to unmarshal the input from json, so we want
+			// to unmarshal it in parallel. In order to find valid places to
+			// break the input, we scan for the line separators that we inserted
+			// (for this purpose) when we dumped the database.
+			data, err := f.readLine(bufferedReader)
+			var response dataBlock
+			done := false
+			if err != nil && err != io.EOF {
+				response = dataBlock{id: index, err: err, data: nil}
+				done = true
+			} else {
+				done = (err == io.EOF)
+				response = dataBlock{id: index, err: nil, data: data}
+			}
+			blockChannel <- response
+			index++
+			duration := time.Since(startTime)
+			f.verbosef("Read block %v after %v\n", index, duration)
+			if done {
+				f.verbosef("Read %v blocks in %v\n", index, duration)
+				close(blockChannel)
+				return
+			}
+		}
+	}
+	go readBlocks()
+
+	// Read from <blockChannel> and stream the responses into <resultChannel>.
+	type workResponse struct {
+		id          int
+		err         error
+		tree        *pathMap
+		updatedDirs []string
+	}
+	resultChannel := make(chan workResponse)
+	processBlocks := func() {
+		numProcessed := 0
+		threadPool := newThreadPool(f.numDbLoadingThreads)
+		for {
+			// get a block to process
+			block, received := <-blockChannel
+			if !received {
+				break
+			}
+
+			if block.err != nil {
+				resultChannel <- workResponse{err: block.err}
+				break
+			}
+			numProcessed++
+			// wait until there is CPU available to process it
+			threadPool.Run(
+				func() {
+					processStartTime := time.Now()
+					f.verbosef("Starting to process block %v after %v\n",
+						block.id, processStartTime.Sub(startTime))
+					tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
+					var response workResponse
+					if err != nil {
+						f.verbosef(
+							"Block %v failed to parse with error %v\n",
+							block.id, err)
+						response = workResponse{err: err}
+					} else {
+						response = workResponse{
+							id:          block.id,
+							err:         nil,
+							tree:        tempMap,
+							updatedDirs: updatedDirs,
+						}
+					}
+					f.verbosef("Processed block %v in %v\n",
+						block.id, time.Since(processStartTime),
+					)
+					resultChannel <- response
+				},
+			)
+		}
+		threadPool.Wait()
+		f.verbosef("Finished processing %v blocks in %v\n",
+			numProcessed, time.Since(startTime))
+		close(resultChannel)
+	}
+	go processBlocks()
+
+	// Read from <resultChannel> and use the results
+	combineResults := func() (err error) {
+		for {
+			result, received := <-resultChannel
+			if !received {
+				break
+			}
+			if err != nil {
+				// In case of an error, wait for work to complete before
+				// returning the error. This ensures that any subsequent
+				// work doesn't need to compete for resources (and possibly
+				// fail due to, for example, a filesystem limit on the number of
+				// concurrently open files) with past work.
+				continue
+			}
+			if result.err != nil {
+				err = result.err
+				continue
+			}
+			// update main tree
+			mainTree.MergeIn(result.tree)
+			// record any new directories that we will need to Stat()
+			updatedNodes := make([]*pathMap, len(result.updatedDirs))
+			for j, dir := range result.updatedDirs {
+				node := mainTree.GetNode(dir, false)
+				updatedNodes[j] = node
+			}
+			nodesToWalk = append(nodesToWalk, updatedNodes)
+		}
+		return err
+	}
+	err = combineResults()
+	if err != nil {
+		return err
+	}
+
+	f.nodes = *mainTree
+
+	// after having loaded the entire db and therefore created entries for
+	// the directories we know of, now it's safe to start calling ReadDir on
+	// any updated directories
+	for i := range nodesToWalk {
+		f.listDirsAsync(nodesToWalk[i])
+	}
+	f.verbosef("Loaded db and statted its contents in %v\n", time.Since(startTime))
+	return err
+}
+
+// startWithoutExternalCache starts scanning the filesystem according to the cache config
+// startWithoutExternalCache should be called if startFromExternalCache is not applicable
+func (f *Finder) startWithoutExternalCache() {
+	configDirs := f.cacheMetadata.Config.RootDirs
+
+	// clean paths
+	candidates := make([]string, len(configDirs))
+	for i, dir := range configDirs {
+		candidates[i] = filepath.Clean(dir)
+	}
+	// remove duplicates
+	dirsToScan := make([]string, 0, len(configDirs))
+	for _, candidate := range candidates {
+		include := true
+		for _, included := range dirsToScan {
+			if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
+				include = false
+				break
+			}
+		}
+		if include {
+			dirsToScan = append(dirsToScan, candidate)
+		}
+	}
+
+	// start searching finally
+	for _, path := range dirsToScan {
+		f.verbosef("Starting find of %v\n", path)
+		f.startFind(path)
+	}
+}
+
+// isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
+func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
+	if old.Inode != new.Inode {
+		return false
+	}
+	if old.ModTime != new.ModTime {
+		return false
+	}
+	if old.Device != new.Device {
+		return false
+	}
+	return true
+}
+
+func (f *Finder) wasModified() bool {
+	return atomic.LoadInt32(&f.modifiedFlag) > 0
+}
+
+func (f *Finder) setModified() {
+	var newVal int32
+	newVal = 1
+	atomic.StoreInt32(&f.modifiedFlag, newVal)
+}
+
+// sortedDirEntries exports directory entries to facilitate dumping them to the external cache
+func (f *Finder) sortedDirEntries() []dirFullInfo {
+	startTime := time.Now()
+	nodes := make([]dirFullInfo, 0)
+	for _, node := range f.nodes.DumpAll() {
+		if node.ModTime != 0 {
+			nodes = append(nodes, node)
+		}
+	}
+	discoveryDate := time.Now()
+	f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
+	less := func(i int, j int) bool {
+		return nodes[i].Path < nodes[j].Path
+	}
+	sort.Slice(nodes, less)
+	sortDate := time.Now()
+	f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
+
+	return nodes
+}
+
+// serializeDb converts the cache database into a form to save to disk
+func (f *Finder) serializeDb() ([]byte, error) {
+	// sort dir entries
+	var entryList = f.sortedDirEntries()
+
+	// Generate an output file that can be conveniently loaded using the same number of threads
+	// as were used in this execution (because presumably that will be the number of threads
+	// used in the next execution too)
+
+	// generate header
+	header := []byte{}
+	header = append(header, []byte(f.cacheMetadata.Version)...)
+	header = append(header, lineSeparator)
+	configDump, err := f.cacheMetadata.Config.Dump()
+	if err != nil {
+		return nil, err
+	}
+	header = append(header, configDump...)
+
+	// serialize individual blocks in parallel
+	numBlocks := f.numDbLoadingThreads
+	if numBlocks > len(entryList) {
+		numBlocks = len(entryList)
+	}
+	blocks := make([][]byte, 1+numBlocks)
+	blocks[0] = header
+	blockMin := 0
+	wg := sync.WaitGroup{}
+	var errLock sync.Mutex
+
+	for i := 1; i <= numBlocks; i++ {
+		// identify next block
+		blockMax := len(entryList) * i / numBlocks
+		block := entryList[blockMin:blockMax]
+
+		// process block
+		wg.Add(1)
+		go func(index int, block []dirFullInfo) {
+			byteBlock, subErr := f.serializeCacheEntry(block)
+			f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
+			if subErr != nil {
+				f.verbosef("%v\n", subErr.Error())
+				errLock.Lock()
+				err = subErr
+				errLock.Unlock()
+			} else {
+				blocks[index] = byteBlock
+			}
+			wg.Done()
+		}(i, block)
+
+		blockMin = blockMax
+	}
+
+	wg.Wait()
+
+	if err != nil {
+		return nil, err
+	}
+
+	content := bytes.Join(blocks, []byte{lineSeparator})
+
+	return content, nil
+}
+
+// dumpDb saves the cache database to disk
+func (f *Finder) dumpDb() error {
+	startTime := time.Now()
+	f.verbosef("Dumping db\n")
+
+	tempPath := f.DbPath + ".tmp"
+
+	bytes, err := f.serializeDb()
+	if err != nil {
+		return err
+	}
+	serializeDate := time.Now()
+	f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
+	// dump file and atomically move
+	err = f.filesystem.WriteFile(tempPath, bytes, 0777)
+	if err != nil {
+		return err
+	}
+	err = f.filesystem.Rename(tempPath, f.DbPath)
+	if err != nil {
+		return err
+	}
+
+	f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
+	return nil
+}
+
+func (f *Finder) statDirAsync(dir *pathMap) {
+	node := dir
+	path := dir.path
+	f.threadPool.Run(
+		func() {
+			updatedStats := f.statDirSync(path)
+
+			if !f.isInfoUpToDate(node.statResponse, updatedStats) {
+				node.mapNode = mapNode{
+					statResponse: updatedStats,
+					FileNames:    []string{},
+				}
+				f.setModified()
+				if node.statResponse.ModTime != 0 {
+					// modification time was updated, so re-scan for
+					// child directories
+					f.listDirAsync(dir)
+				}
+			}
+		},
+	)
+}
+
+func (f *Finder) statDirSync(path string) statResponse {
+
+	fileInfo, err := f.filesystem.Lstat(path)
+
+	var stats statResponse
+	if err != nil {
+		// in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
+		return stats
+	}
+	modTime := fileInfo.ModTime()
+	stats = statResponse{}
+	inode, err := f.filesystem.InodeNumber(fileInfo)
+	if err != nil {
+		panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
+	}
+	stats.Inode = inode
+	device, err := f.filesystem.DeviceNumber(fileInfo)
+	if err != nil {
+		panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
+	}
+	stats.Device = device
+	permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
+
+	if err != nil {
+		panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
+	}
+	// We're only interested in knowing whether anything about the directory
+	// has changed since last check, so we use the latest of the two
+	// modification times (content modification (mtime) and
+	// permission modification (ctime))
+	if permissionsChangeTime.After(modTime) {
+		modTime = permissionsChangeTime
+	}
+	stats.ModTime = modTime.UnixNano()
+
+	return stats
+}
+
+// pruneCacheCandidates removes the items that we don't want to include in our persistent cache
+func (f *Finder) pruneCacheCandidates(items *DirEntries) {
+
+	for _, fileName := range items.FileNames {
+		for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
+			if fileName == abortedName {
+				items.FileNames = []string{}
+				items.DirNames = []string{}
+				return
+			}
+		}
+	}
+
+	// remove any files that aren't the ones we want to include
+	writeIndex := 0
+	for _, fileName := range items.FileNames {
+		// include only these files
+		for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
+			if fileName == includedName {
+				items.FileNames[writeIndex] = fileName
+				writeIndex++
+				break
+			}
+		}
+	}
+	// resize
+	items.FileNames = items.FileNames[:writeIndex]
+
+	writeIndex = 0
+	for _, dirName := range items.DirNames {
+		items.DirNames[writeIndex] = dirName
+		// ignore other dirs that are known to not be inputs to the build process
+		include := true
+		for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
+			if dirName == excludedName {
+				// don't include
+				include = false
+				break
+			}
+		}
+		if include {
+			writeIndex++
+		}
+	}
+	// resize
+	items.DirNames = items.DirNames[:writeIndex]
+}
+
+func (f *Finder) listDirsAsync(nodes []*pathMap) {
+	f.threadPool.Run(
+		func() {
+			for i := range nodes {
+				f.listDirSync(nodes[i])
+			}
+		},
+	)
+}
+
+func (f *Finder) listDirAsync(node *pathMap) {
+	f.threadPool.Run(
+		func() {
+			f.listDirSync(node)
+		},
+	)
+}
+
+func (f *Finder) listDirSync(dir *pathMap) {
+	path := dir.path
+	children, err := f.filesystem.ReadDir(path)
+
+	if err != nil {
+		// if listing the contents of the directory fails (presumably due to
+		// permission denied), then treat the directory as empty
+		children = []os.FileInfo{}
+	}
+
+	var subdirs []string
+	var subfiles []string
+
+	for _, child := range children {
+		linkBits := child.Mode() & os.ModeSymlink
+		isLink := linkBits != 0
+		if child.IsDir() {
+			if !isLink {
+				// Skip symlink dirs.
+				// We don't have to support symlink dirs because
+				// that would cause duplicates.
+				subdirs = append(subdirs, child.Name())
+			}
+		} else {
+			// We do have to support symlink files because the link name might be
+			// different than the target name
+			// (for example, Android.bp -> build/soong/root.bp)
+			subfiles = append(subfiles, child.Name())
+		}
+
+	}
+	parentNode := dir
+
+	entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
+	f.pruneCacheCandidates(entry)
+
+	// create a pathMap node for each relevant subdirectory
+	relevantChildren := map[string]*pathMap{}
+	for _, subdirName := range entry.DirNames {
+		childNode, found := parentNode.children[subdirName]
+		// if we already knew of this directory, then we already have a request pending to Stat it
+		// if we didn't already know of this directory, then we must Stat it now
+		if !found {
+			childNode = parentNode.newChild(subdirName)
+			f.statDirAsync(childNode)
+		}
+		relevantChildren[subdirName] = childNode
+	}
+	// Note that in rare cases, it's possible that we're reducing the set of
+	// children via this statement, if these are all true:
+	// 1. we previously had a cache that knew about subdirectories of parentNode
+	// 2. the user created a prune-file (described in pruneCacheCandidates)
+	//    inside <parentNode>, which specifies that the contents of parentNode
+	//    are to be ignored.
+	// The fact that it's possible to remove children here means that *pathMap structs
+	// must not be looked up from f.nodes by filepath (and instead must be accessed by
+	// direct pointer) until after every listDirSync completes
+	parentNode.FileNames = entry.FileNames
+	parentNode.children = relevantChildren
+
+}
+
+// listMatches takes a node and a function that specifies which subdirectories and
+// files to include, and listMatches returns the matches
+func (f *Finder) listMatches(node *pathMap,
+	filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
+	entries := DirEntries{
+		FileNames: node.FileNames,
+	}
+	entries.DirNames = make([]string, 0, len(node.children))
+	for childName := range node.children {
+		entries.DirNames = append(entries.DirNames, childName)
+	}
+
+	dirNames, fileNames := filter(entries)
+
+	subDirs = []*pathMap{}
+	filePaths = make([]string, 0, len(fileNames))
+	for _, fileName := range fileNames {
+		filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
+	}
+	subDirs = make([]*pathMap, 0, len(dirNames))
+	for _, childName := range dirNames {
+		child, ok := node.children[childName]
+		if ok {
+			subDirs = append(subDirs, child)
+		}
+	}
+
+	return subDirs, filePaths
+}
+
+// findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
+func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
+	approxNumThreads int) []string {
+
+	if approxNumThreads < 2 {
+		// Done spawning threads; process remaining directories
+		return f.findInCacheSinglethreaded(node, filter)
+	}
+
+	totalWork := 0
+	for _, child := range node.children {
+		totalWork += child.approximateNumDescendents
+	}
+	childrenResults := make(chan []string, len(node.children))
+
+	subDirs, filePaths := f.listMatches(node, filter)
+
+	// process child directories
+	for _, child := range subDirs {
+		numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
+		childProcessor := func(child *pathMap) {
+			childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
+			childrenResults <- childResults
+		}
+		// If we're allowed to use more than 1 thread to process this directory,
+		// then instead we use 1 thread for each subdirectory.
+		// It would be strange to spawn threads for only some subdirectories.
+		go childProcessor(child)
+	}
+
+	// collect results
+	for i := 0; i < len(subDirs); i++ {
+		childResults := <-childrenResults
+		filePaths = append(filePaths, childResults...)
+	}
+	close(childrenResults)
+
+	return filePaths
+}
+
+// findInCacheSinglethreaded synchronously searches the cache for all matching file paths
+// note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
+func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
+	if node == nil {
+		return []string{}
+	}
+
+	nodes := []*pathMap{node}
+	matches := []string{}
+
+	for len(nodes) > 0 {
+		currentNode := nodes[0]
+		nodes = nodes[1:]
+
+		subDirs, filePaths := f.listMatches(currentNode, filter)
+
+		nodes = append(nodes, subDirs...)
+
+		matches = append(matches, filePaths...)
+	}
+	return matches
+}
diff --git a/finder/finder_test.go b/finder/finder_test.go
new file mode 100644
index 00000000..60e5eb28
--- /dev/null
+++ b/finder/finder_test.go
@@ -0,0 +1,1573 @@
+// Copyright 2017 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package finder
+
+import (
+	"fmt"
+	"log"
+	"path/filepath"
+	"reflect"
+	"testing"
+
+	"sort"
+
+	"io/ioutil"
+
+	"android/soong/fs"
+	"runtime/debug"
+	"time"
+)
+
+// some utils for tests to use
+func newFs() *fs.MockFs {
+	return fs.NewMockFs(map[string][]byte{})
+}
+
+func newFinder(t *testing.T, filesystem *fs.MockFs, cacheParams CacheParams) *Finder {
+	cachePath := "/finder/finder-db"
+	cacheDir := filepath.Dir(cachePath)
+	filesystem.MkDirs(cacheDir)
+	if cacheParams.WorkingDirectory == "" {
+		cacheParams.WorkingDirectory = "/cwd"
+	}
+
+	logger := log.New(ioutil.Discard, "", 0)
+	finder := New(cacheParams, filesystem, logger, cachePath)
+	return finder
+}
+
+func finderWithSameParams(t *testing.T, original *Finder) *Finder {
+	return New(
+		original.cacheMetadata.Config.CacheParams,
+		original.filesystem,
+		original.logger,
+		original.DbPath)
+}
+
+func write(t *testing.T, path string, content string, filesystem *fs.MockFs) {
+	parent := filepath.Dir(path)
+	filesystem.MkDirs(parent)
+	err := filesystem.WriteFile(path, []byte(content), 0777)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+
+func create(t *testing.T, path string, filesystem *fs.MockFs) {
+	write(t, path, "hi", filesystem)
+}
+
+func delete(t *testing.T, path string, filesystem *fs.MockFs) {
+	err := filesystem.Remove(path)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+
+func removeAll(t *testing.T, path string, filesystem *fs.MockFs) {
+	err := filesystem.RemoveAll(path)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+
+func move(t *testing.T, oldPath string, newPath string, filesystem *fs.MockFs) {
+	err := filesystem.Rename(oldPath, newPath)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+
+func link(t *testing.T, newPath string, oldPath string, filesystem *fs.MockFs) {
+	parentPath := filepath.Dir(newPath)
+	err := filesystem.MkDirs(parentPath)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+	err = filesystem.Symlink(oldPath, newPath)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+func read(t *testing.T, path string, filesystem *fs.MockFs) string {
+	reader, err := filesystem.Open(path)
+	if err != nil {
+		t.Fatalf(err.Error())
+	}
+	bytes, err := ioutil.ReadAll(reader)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+	return string(bytes)
+}
+func modTime(t *testing.T, path string, filesystem *fs.MockFs) time.Time {
+	stats, err := filesystem.Lstat(path)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+	return stats.ModTime()
+}
+func setReadable(t *testing.T, path string, readable bool, filesystem *fs.MockFs) {
+	err := filesystem.SetReadable(path, readable)
+	if err != nil {
+		t.Fatal(err.Error())
+	}
+}
+func fatal(t *testing.T, message string) {
+	t.Error(message)
+	debug.PrintStack()
+	t.FailNow()
+}
+func assertSameResponse(t *testing.T, actual []string, expected []string) {
+	sort.Strings(actual)
+	sort.Strings(expected)
+	if !reflect.DeepEqual(actual, expected) {
+		fatal(
+			t,
+			fmt.Sprintf(
+				"Expected Finder to return these %v paths:\n  %v,\ninstead returned these %v paths:  %v\n",
+				len(expected), expected, len(actual), actual),
+		)
+	}
+}
+
+func assertSameStatCalls(t *testing.T, actual []string, expected []string) {
+	sort.Strings(actual)
+	sort.Strings(expected)
+
+	if !reflect.DeepEqual(actual, expected) {
+		fatal(
+			t,
+			fmt.Sprintf(
+				"Finder made incorrect Stat calls.\n"+
+					"Actual:\n"+
+					"%v\n"+
+					"Expected:\n"+
+					"%v\n"+
+					"\n",
+				actual, expected),
+		)
+	}
+}
+func assertSameReadDirCalls(t *testing.T, actual []string, expected []string) {
+	sort.Strings(actual)
+	sort.Strings(expected)
+
+	if !reflect.DeepEqual(actual, expected) {
+		fatal(
+			t,
+			fmt.Sprintf(
+				"Finder made incorrect ReadDir calls.\n"+
+					"Actual:\n"+
+					"%v\n"+
+					"Expected:\n"+
+					"%v\n"+
+					"\n",
+				actual, expected),
+		)
+	}
+}
+
+// runSimpleTests creates a few files, searches for findme.txt, and checks for the expected matches
+func runSimpleTest(t *testing.T, existentPaths []string, expectedMatches []string) {
+	filesystem := newFs()
+	root := "/tmp"
+	filesystem.MkDirs(root)
+	for _, path := range existentPaths {
+		create(t, filepath.Join(root, path), filesystem)
+	}
+
+	finder := newFinder(t,
+		filesystem,
+		CacheParams{
+			"/cwd",
+			[]string{root},
+			nil,
+			nil,
+			[]string{"findme.txt", "skipme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt(root, "findme.txt")
+	absoluteMatches := []string{}
+	for i := range expectedMatches {
+		absoluteMatches = append(absoluteMatches, filepath.Join(root, expectedMatches[i]))
+	}
+	assertSameResponse(t, foundPaths, absoluteMatches)
+}
+
+// end of utils, start of individual tests
+
+func TestSingleFile(t *testing.T) {
+	runSimpleTest(t,
+		[]string{"findme.txt"},
+		[]string{"findme.txt"},
+	)
+}
+
+func TestIncludeFiles(t *testing.T) {
+	runSimpleTest(t,
+		[]string{"findme.txt", "skipme.txt"},
+		[]string{"findme.txt"},
+	)
+}
+
+func TestNestedDirectories(t *testing.T) {
+	runSimpleTest(t,
+		[]string{"findme.txt", "skipme.txt", "subdir/findme.txt", "subdir/skipme.txt"},
+		[]string{"findme.txt", "subdir/findme.txt"},
+	)
+}
+
+func TestEmptyDirectory(t *testing.T) {
+	runSimpleTest(t,
+		[]string{},
+		[]string{},
+	)
+}
+
+func TestEmptyPath(t *testing.T) {
+	filesystem := newFs()
+	root := "/tmp"
+	create(t, filepath.Join(root, "findme.txt"), filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{root},
+			IncludeFiles: []string{"findme.txt", "skipme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("", "findme.txt")
+
+	assertSameResponse(t, foundPaths, []string{})
+}
+
+func TestFilesystemRoot(t *testing.T) {
+	filesystem := newFs()
+	root := "/"
+	createdPath := "/findme.txt"
+	create(t, createdPath, filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{root},
+			IncludeFiles: []string{"findme.txt", "skipme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt(root, "findme.txt")
+
+	assertSameResponse(t, foundPaths, []string{createdPath})
+}
+
+func TestNonexistentPath(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp/IDontExist"},
+			IncludeFiles: []string{"findme.txt", "skipme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp/IAlsoDontExist", "findme.txt")
+
+	assertSameResponse(t, foundPaths, []string{})
+}
+
+func TestExcludeDirs(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/exclude/findme.txt", filesystem)
+	create(t, "/tmp/exclude/subdir/findme.txt", filesystem)
+	create(t, "/tmp/subdir/exclude/findme.txt", filesystem)
+	create(t, "/tmp/subdir/subdir/findme.txt", filesystem)
+	create(t, "/tmp/subdir/findme.txt", filesystem)
+	create(t, "/tmp/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			ExcludeDirs:  []string{"exclude"},
+			IncludeFiles: []string{"findme.txt", "skipme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/subdir/findme.txt",
+			"/tmp/subdir/subdir/findme.txt"})
+}
+
+func TestPruneFiles(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/out/findme.txt", filesystem)
+	create(t, "/tmp/out/.ignore-out-dir", filesystem)
+	create(t, "/tmp/out/child/findme.txt", filesystem)
+
+	create(t, "/tmp/out2/.ignore-out-dir", filesystem)
+	create(t, "/tmp/out2/sub/findme.txt", filesystem)
+
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/include/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			PruneFiles:   []string{".ignore-out-dir"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/include/findme.txt"})
+}
+
+func TestRootDir(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/subdir/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+	create(t, "/tmp/b/subdir/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp/a"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/findme.txt",
+			"/tmp/a/subdir/findme.txt"})
+}
+
+func TestUncachedDir(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/subdir/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+	create(t, "/tmp/b/subdir/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/IDoNotExist"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+
+	foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
+	// If the caller queries for a file that is in the cache, then computing the
+	// correct answer won't be fast, and it would be easy for the caller to
+	// fail to notice its slowness. Instead, we only ever search the cache for files
+	// to return, which enforces that we can determine which files will be
+	// interesting upfront.
+	assertSameResponse(t, foundPaths, []string{})
+
+	finder.Shutdown()
+}
+
+func TestSearchingForFilesExcludedFromCache(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/misc.txt", filesystem)
+
+	// set up the finder and run it
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "misc.txt")
+	// If the caller queries for a file that is in the cache, then computing the
+	// correct answer won't be fast, and it would be easy for the caller to
+	// fail to notice its slowness. Instead, we only ever search the cache for files
+	// to return, which enforces that we can determine which files will be
+	// interesting upfront.
+	assertSameResponse(t, foundPaths, []string{})
+
+	finder.Shutdown()
+}
+
+func TestRelativeFilePaths(t *testing.T) {
+	filesystem := newFs()
+
+	create(t, "/tmp/ignore/hi.txt", filesystem)
+	create(t, "/tmp/include/hi.txt", filesystem)
+	create(t, "/cwd/hi.txt", filesystem)
+	create(t, "/cwd/a/hi.txt", filesystem)
+	create(t, "/cwd/a/a/hi.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/cwd", "/tmp/include"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("a", "hi.txt")
+	assertSameResponse(t, foundPaths,
+		[]string{"a/hi.txt",
+			"a/a/hi.txt"})
+
+	foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt")
+	assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"})
+
+	foundPaths = finder.FindNamedAt(".", "hi.txt")
+	assertSameResponse(t, foundPaths,
+		[]string{"hi.txt",
+			"a/hi.txt",
+			"a/a/hi.txt"})
+
+	foundPaths = finder.FindNamedAt("/tmp/include", "hi.txt")
+	assertSameResponse(t, foundPaths, []string{"/tmp/include/hi.txt"})
+}
+
+// have to run this test with the race-detector (`go test -race src/android/soong/finder/*.go`)
+// for there to be much chance of the test actually detecting any error that may be present
+func TestRootDirsContainedInOtherRootDirs(t *testing.T) {
+	filesystem := newFs()
+
+	create(t, "/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/", "/a/b/c", "/a/b/c/d/e/f", "/a/b/c/d/e/f/g/h/i"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp/a", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/b/c/d/e/f/g/h/i/j/findme.txt"})
+}
+
+func TestFindFirst(t *testing.T) {
+	filesystem := newFs()
+	create(t, "/tmp/a/hi.txt", filesystem)
+	create(t, "/tmp/b/hi.txt", filesystem)
+	create(t, "/tmp/b/a/hi.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindFirstNamed("hi.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/hi.txt",
+			"/tmp/b/hi.txt"},
+	)
+}
+
+func TestConcurrentFindSameDirectory(t *testing.T) {
+	filesystem := newFs()
+
+	// create a bunch of files and directories
+	paths := []string{}
+	for i := 0; i < 10; i++ {
+		parentDir := fmt.Sprintf("/tmp/%v", i)
+		for j := 0; j < 10; j++ {
+			filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j))
+			paths = append(paths, filePath)
+		}
+	}
+	sort.Strings(paths)
+	for _, path := range paths {
+		create(t, path, filesystem)
+	}
+
+	// set up a finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	numTests := 20
+	results := make(chan []string, numTests)
+	// make several parallel calls to the finder
+	for i := 0; i < numTests; i++ {
+		go func() {
+			foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+			results <- foundPaths
+		}()
+	}
+
+	// check that each response was correct
+	for i := 0; i < numTests; i++ {
+		foundPaths := <-results
+		assertSameResponse(t, foundPaths, paths)
+	}
+}
+
+func TestConcurrentFindDifferentDirectories(t *testing.T) {
+	filesystem := newFs()
+
+	// create a bunch of files and directories
+	allFiles := []string{}
+	numSubdirs := 10
+	rootPaths := []string{}
+	queryAnswers := [][]string{}
+	for i := 0; i < numSubdirs; i++ {
+		parentDir := fmt.Sprintf("/tmp/%v", i)
+		rootPaths = append(rootPaths, parentDir)
+		queryAnswers = append(queryAnswers, []string{})
+		for j := 0; j < 10; j++ {
+			filePath := filepath.Join(parentDir, fmt.Sprintf("%v/findme.txt", j))
+			queryAnswers[i] = append(queryAnswers[i], filePath)
+			allFiles = append(allFiles, filePath)
+		}
+		sort.Strings(queryAnswers[i])
+	}
+	sort.Strings(allFiles)
+	for _, path := range allFiles {
+		create(t, path, filesystem)
+	}
+
+	// set up a finder
+	finder := newFinder(
+		t,
+		filesystem,
+
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	type testRun struct {
+		path           string
+		foundMatches   []string
+		correctMatches []string
+	}
+
+	numTests := numSubdirs + 1
+	testRuns := make(chan testRun, numTests)
+
+	searchAt := func(path string, correctMatches []string) {
+		foundPaths := finder.FindNamedAt(path, "findme.txt")
+		testRuns <- testRun{path, foundPaths, correctMatches}
+	}
+
+	// make several parallel calls to the finder
+	go searchAt("/tmp", allFiles)
+	for i := 0; i < len(rootPaths); i++ {
+		go searchAt(rootPaths[i], queryAnswers[i])
+	}
+
+	// check that each response was correct
+	for i := 0; i < numTests; i++ {
+		testRun := <-testRuns
+		assertSameResponse(t, testRun.foundMatches, testRun.correctMatches)
+	}
+}
+
+func TestStrangelyFormattedPaths(t *testing.T) {
+	filesystem := newFs()
+
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"//tmp//a//.."},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("//tmp//a//..", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/findme.txt",
+			"/tmp/b/findme.txt",
+			"/tmp/findme.txt"})
+}
+
+func TestCorruptedCacheHeader(t *testing.T) {
+	filesystem := newFs()
+
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	write(t, "/finder/finder-db", "sample header", filesystem)
+
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	defer finder.Shutdown()
+
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/findme.txt",
+			"/tmp/findme.txt"})
+}
+
+func TestCanUseCache(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	// check the response of the first finder
+	correctResponse := []string{"/tmp/a/findme.txt",
+		"/tmp/findme.txt"}
+	assertSameResponse(t, foundPaths, correctResponse)
+	finder.Shutdown()
+
+	// check results
+	cacheText := read(t, finder.DbPath, filesystem)
+	if len(cacheText) < 1 {
+		t.Fatalf("saved cache db is empty\n")
+	}
+	if len(filesystem.StatCalls) == 0 {
+		t.Fatal("No Stat calls recorded by mock filesystem")
+	}
+	if len(filesystem.ReadDirCalls) == 0 {
+		t.Fatal("No ReadDir calls recorded by filesystem")
+	}
+	statCalls := filesystem.StatCalls
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+	// check results
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
+	assertSameReadDirCalls(t, filesystem.StatCalls, statCalls)
+
+	finder2.Shutdown()
+}
+
+func TestCorruptedCacheBody(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+
+	// check the response of the first finder
+	correctResponse := []string{"/tmp/a/findme.txt",
+		"/tmp/findme.txt"}
+	assertSameResponse(t, foundPaths, correctResponse)
+	numStatCalls := len(filesystem.StatCalls)
+	numReadDirCalls := len(filesystem.ReadDirCalls)
+
+	// load the cache file, corrupt it, and save it
+	cacheReader, err := filesystem.Open(finder.DbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	cacheData, err := ioutil.ReadAll(cacheReader)
+	if err != nil {
+		t.Fatal(err)
+	}
+	cacheData = append(cacheData, []byte("DontMindMe")...)
+	filesystem.WriteFile(finder.DbPath, cacheData, 0777)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+	// check results
+	assertSameResponse(t, foundPaths, correctResponse)
+	numNewStatCalls := len(filesystem.StatCalls)
+	numNewReadDirCalls := len(filesystem.ReadDirCalls)
+	// It's permissable to make more Stat calls with a corrupted cache because
+	// the Finder may restart once it detects corruption.
+	// However, it may have already issued many Stat calls.
+	// Because a corrupted db is not expected to be a common (or even a supported case),
+	// we don't care to optimize it and don't cache the already-issued Stat calls
+	if numNewReadDirCalls < numReadDirCalls {
+		t.Fatalf(
+			"Finder made fewer ReadDir calls with a corrupted cache (%v calls) than with no cache"+
+				" (%v calls)",
+			numNewReadDirCalls, numReadDirCalls)
+	}
+	if numNewStatCalls < numStatCalls {
+		t.Fatalf(
+			"Finder made fewer Stat calls with a corrupted cache (%v calls) than with no cache (%v calls)",
+			numNewStatCalls, numStatCalls)
+	}
+	finder2.Shutdown()
+}
+
+func TestStatCalls(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/a/findme.txt", filesystem)
+
+	// run finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+
+	// check response
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"})
+}
+
+func TestFileAdded(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/ignoreme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/b/ignore.txt", filesystem)
+	create(t, "/tmp/b/c/nope.txt", filesystem)
+	create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	filesystem.Clock.Tick()
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	create(t, "/tmp/b/c/findme.txt", filesystem)
+	filesystem.Clock.Tick()
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/c/findme.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c"})
+	finder2.Shutdown()
+
+}
+
+func TestDirectoriesAdded(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/ignoreme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/b/ignore.txt", filesystem)
+	create(t, "/tmp/b/c/nope.txt", filesystem)
+	create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	create(t, "/tmp/b/c/new/findme.txt", filesystem)
+	create(t, "/tmp/b/c/new/new2/findme.txt", filesystem)
+	create(t, "/tmp/b/c/new/new2/ignoreme.txt", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/a/findme.txt", "/tmp/b/c/new/findme.txt", "/tmp/b/c/new/new2/findme.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d", "/tmp/b/c/new", "/tmp/b/c/new/new2"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b/c", "/tmp/b/c/new", "/tmp/b/c/new/new2"})
+
+	finder2.Shutdown()
+}
+
+func TestDirectoryAndSubdirectoryBothUpdated(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/hi1.txt", filesystem)
+	create(t, "/tmp/a/hi1.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi1.txt", "hi2.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "hi1.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths, []string{"/tmp/hi1.txt", "/tmp/a/hi1.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	create(t, "/tmp/hi2.txt", filesystem)
+	create(t, "/tmp/a/hi2.txt", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindAll()
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/hi1.txt", "/tmp/hi2.txt", "/tmp/a/hi1.txt", "/tmp/a/hi2.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls,
+		[]string{"/tmp", "/tmp/a"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a"})
+
+	finder2.Shutdown()
+}
+
+func TestFileDeleted(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/ignoreme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+	create(t, "/tmp/b/c/nope.txt", filesystem)
+	create(t, "/tmp/b/c/d/irrelevant.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt", "/tmp/b/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	delete(t, "/tmp/b/findme.txt", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/findme.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/c", "/tmp/b/c/d"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"})
+
+	finder2.Shutdown()
+}
+
+func TestDirectoriesDeleted(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/1/findme.txt", filesystem)
+	create(t, "/tmp/a/1/2/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/a/findme.txt",
+			"/tmp/a/1/findme.txt",
+			"/tmp/a/1/2/findme.txt",
+			"/tmp/b/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	removeAll(t, "/tmp/a/1", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/b/findme.txt"})
+	// Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
+	// if the Finder detects the nonexistence of /tmp/a/1
+	// However, when resuming from cache, we don't want the Finder to necessarily wait
+	// to stat a directory until after statting its parent.
+	// So here we just include /tmp/a/1/2 in the list.
+	// The Finder is currently implemented to always restat every dir and
+	// to not short-circuit due to nonexistence of parents (but it will remove
+	// missing dirs from the cache for next time)
+	assertSameStatCalls(t, filesystem.StatCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/a"})
+
+	finder2.Shutdown()
+}
+
+func TestDirectoriesMoved(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/1/findme.txt", filesystem)
+	create(t, "/tmp/a/1/2/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/a/findme.txt",
+			"/tmp/a/1/findme.txt",
+			"/tmp/a/1/2/findme.txt",
+			"/tmp/b/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	move(t, "/tmp/a", "/tmp/c", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/b/findme.txt",
+			"/tmp/c/findme.txt",
+			"/tmp/c/1/findme.txt",
+			"/tmp/c/1/2/findme.txt"})
+	// Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
+	// if the Finder detects the nonexistence of /tmp/a/1
+	// However, when resuming from cache, we don't want the Finder to necessarily wait
+	// to stat a directory until after statting its parent.
+	// So here we just include /tmp/a/1/2 in the list.
+	// The Finder is currently implemented to always restat every dir and
+	// to not short-circuit due to nonexistence of parents (but it will remove
+	// missing dirs from the cache for next time)
+	assertSameStatCalls(t, filesystem.StatCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/c", "/tmp/c/1", "/tmp/c/1/2"})
+	finder2.Shutdown()
+}
+
+func TestDirectoriesSwapped(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/1/findme.txt", filesystem)
+	create(t, "/tmp/a/1/2/findme.txt", filesystem)
+	create(t, "/tmp/b/findme.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/a/findme.txt",
+			"/tmp/a/1/findme.txt",
+			"/tmp/a/1/2/findme.txt",
+			"/tmp/b/findme.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	move(t, "/tmp/a", "/tmp/temp", filesystem)
+	move(t, "/tmp/b", "/tmp/a", filesystem)
+	move(t, "/tmp/temp", "/tmp/b", filesystem)
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt",
+			"/tmp/a/findme.txt",
+			"/tmp/b/findme.txt",
+			"/tmp/b/1/findme.txt",
+			"/tmp/b/1/2/findme.txt"})
+	// Technically, we don't care whether /tmp/a/1/2 gets Statted or gets skipped
+	// if the Finder detects the nonexistence of /tmp/a/1
+	// However, when resuming from cache, we don't want the Finder to necessarily wait
+	// to stat a directory until after statting its parent.
+	// So here we just include /tmp/a/1/2 in the list.
+	// The Finder is currently implemented to always restat every dir and
+	// to not short-circuit due to nonexistence of parents (but it will remove
+	// missing dirs from the cache for next time)
+	assertSameStatCalls(t, filesystem.StatCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/a/1", "/tmp/a/1/2", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp", "/tmp/a", "/tmp/b", "/tmp/b/1", "/tmp/b/1/2"})
+	finder2.Shutdown()
+}
+
+// runFsReplacementTest tests a change modifying properties of the filesystem itself:
+// runFsReplacementTest tests changing the user, the hostname, or the device number
+// runFsReplacementTest is a helper method called by other tests
+func runFsReplacementTest(t *testing.T, fs1 *fs.MockFs, fs2 *fs.MockFs) {
+	// setup fs1
+	create(t, "/tmp/findme.txt", fs1)
+	create(t, "/tmp/a/findme.txt", fs1)
+	create(t, "/tmp/a/a/findme.txt", fs1)
+
+	// setup fs2 to have the same directories but different files
+	create(t, "/tmp/findme.txt", fs2)
+	create(t, "/tmp/a/findme.txt", fs2)
+	create(t, "/tmp/a/a/ignoreme.txt", fs2)
+	create(t, "/tmp/a/b/findme.txt", fs2)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		fs1,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+	// check the response of the first finder
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/a/findme.txt"})
+
+	// copy the cache data from the first filesystem to the second
+	cacheContent := read(t, finder.DbPath, fs1)
+	write(t, finder.DbPath, cacheContent, fs2)
+
+	// run the second finder, with the same config and same cache contents but a different filesystem
+	finder2 := newFinder(
+		t,
+		fs2,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths = finder2.FindNamedAt("/tmp", "findme.txt")
+
+	// check results
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/findme.txt", "/tmp/a/findme.txt", "/tmp/a/b/findme.txt"})
+	assertSameStatCalls(t, fs2.StatCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"})
+	assertSameReadDirCalls(t, fs2.ReadDirCalls,
+		[]string{"/tmp", "/tmp/a", "/tmp/a/a", "/tmp/a/b"})
+	finder2.Shutdown()
+}
+
+func TestChangeOfDevice(t *testing.T) {
+	fs1 := newFs()
+	// not as fine-grained mounting controls as a real filesystem, but should be adequate
+	fs1.SetDeviceNumber(0)
+
+	fs2 := newFs()
+	fs2.SetDeviceNumber(1)
+
+	runFsReplacementTest(t, fs1, fs2)
+}
+
+func TestChangeOfUserOrHost(t *testing.T) {
+	fs1 := newFs()
+	fs1.SetViewId("me@here")
+
+	fs2 := newFs()
+	fs2.SetViewId("you@there")
+
+	runFsReplacementTest(t, fs1, fs2)
+}
+
+func TestConsistentCacheOrdering(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	for i := 0; i < 5; i++ {
+		create(t, fmt.Sprintf("/tmp/%v/findme.txt", i), filesystem)
+	}
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	finder.FindNamedAt("/tmp", "findme.txt")
+	finder.Shutdown()
+
+	// read db file
+	string1 := read(t, finder.DbPath, filesystem)
+
+	err := filesystem.Remove(finder.DbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// run another finder
+	finder2 := finderWithSameParams(t, finder)
+	finder2.FindNamedAt("/tmp", "findme.txt")
+	finder2.Shutdown()
+
+	string2 := read(t, finder.DbPath, filesystem)
+
+	if string1 != string2 {
+		t.Errorf("Running Finder twice generated two dbs not having identical contents.\n"+
+			"Content of first file:\n"+
+			"\n"+
+			"%v"+
+			"\n"+
+			"\n"+
+			"Content of second file:\n"+
+			"\n"+
+			"%v\n"+
+			"\n",
+			string1,
+			string2,
+		)
+	}
+
+}
+
+func TestNumSyscallsOfSecondFind(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/misc.txt", filesystem)
+
+	// set up the finder and run it once
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"})
+
+	filesystem.ClearMetrics()
+
+	// run the finder again and confirm it doesn't check the filesystem
+	refoundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	assertSameResponse(t, refoundPaths, foundPaths)
+	assertSameStatCalls(t, filesystem.StatCalls, []string{})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
+
+	finder.Shutdown()
+}
+
+func TestChangingParamsOfSecondFind(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/findme.txt", filesystem)
+	create(t, "/tmp/a/findme.txt", filesystem)
+	create(t, "/tmp/a/metoo.txt", filesystem)
+
+	// set up the finder and run it once
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"findme.txt", "metoo.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "findme.txt")
+	assertSameResponse(t, foundPaths, []string{"/tmp/findme.txt", "/tmp/a/findme.txt"})
+
+	filesystem.ClearMetrics()
+
+	// run the finder again and confirm it gets the right answer without asking the filesystem
+	refoundPaths := finder.FindNamedAt("/tmp", "metoo.txt")
+	assertSameResponse(t, refoundPaths, []string{"/tmp/a/metoo.txt"})
+	assertSameStatCalls(t, filesystem.StatCalls, []string{})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
+
+	finder.Shutdown()
+}
+
+func TestSymlinkPointingToFile(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/a/hi.txt", filesystem)
+	create(t, "/tmp/a/ignoreme.txt", filesystem)
+	link(t, "/tmp/hi.txt", "a/hi.txt", filesystem)
+	link(t, "/tmp/b/hi.txt", "../a/hi.txt", filesystem)
+	link(t, "/tmp/c/hi.txt", "/tmp/hi.txt", filesystem)
+	link(t, "/tmp/d/hi.txt", "../a/bye.txt", filesystem)
+	link(t, "/tmp/d/bye.txt", "../a/hi.txt", filesystem)
+	link(t, "/tmp/e/bye.txt", "../a/bye.txt", filesystem)
+	link(t, "/tmp/f/hi.txt", "somethingThatDoesntExist", filesystem)
+
+	// set up the finder and run it once
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
+	// should search based on the name of the link rather than the destination or validity of the link
+	correctResponse := []string{
+		"/tmp/a/hi.txt",
+		"/tmp/hi.txt",
+		"/tmp/b/hi.txt",
+		"/tmp/c/hi.txt",
+		"/tmp/d/hi.txt",
+		"/tmp/f/hi.txt",
+	}
+	assertSameResponse(t, foundPaths, correctResponse)
+
+}
+
+func TestSymlinkPointingToDirectory(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/dir/hi.txt", filesystem)
+	create(t, "/tmp/dir/ignoreme.txt", filesystem)
+
+	link(t, "/tmp/links/dir", "../dir", filesystem)
+	link(t, "/tmp/links/link", "../dir", filesystem)
+	link(t, "/tmp/links/broken", "nothingHere", filesystem)
+	link(t, "/tmp/links/recursive", "recursive", filesystem)
+
+	// set up the finder and run it once
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+
+	foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
+
+	// should completely ignore symlinks that point to directories
+	correctResponse := []string{
+		"/tmp/dir/hi.txt",
+	}
+	assertSameResponse(t, foundPaths, correctResponse)
+
+}
+
+// TestAddPruneFile confirms that adding a prune-file (into a directory for which we
+// already had a cache) causes the directory to be ignored
+func TestAddPruneFile(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/out/hi.txt", filesystem)
+	create(t, "/tmp/out/a/hi.txt", filesystem)
+	create(t, "/tmp/hi.txt", filesystem)
+
+	// do find
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			PruneFiles:   []string{".ignore-out-dir"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+
+	foundPaths := finder.FindNamedAt("/tmp", "hi.txt")
+
+	// check result
+	assertSameResponse(t, foundPaths,
+		[]string{"/tmp/hi.txt",
+			"/tmp/out/hi.txt",
+			"/tmp/out/a/hi.txt"},
+	)
+	finder.Shutdown()
+
+	// modify filesystem
+	filesystem.Clock.Tick()
+	create(t, "/tmp/out/.ignore-out-dir", filesystem)
+	// run another find and check its result
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindNamedAt("/tmp", "hi.txt")
+	assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"})
+	finder2.Shutdown()
+}
+
+func TestUpdatingDbIffChanged(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/a/hi.txt", filesystem)
+	create(t, "/tmp/b/bye.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	foundPaths := finder.FindAll()
+	filesystem.Clock.Tick()
+	finder.Shutdown()
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt"})
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+	create(t, "/tmp/b/hi.txt", filesystem)
+	filesystem.Clock.Tick()
+	filesystem.ClearMetrics()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindAll()
+	finder2.Shutdown()
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{"/tmp/b"})
+	expectedDbWriteTime := filesystem.Clock.Time()
+	actualDbWriteTime := modTime(t, finder2.DbPath, filesystem)
+	if actualDbWriteTime != expectedDbWriteTime {
+		t.Fatalf("Expected to write db at %v, actually wrote db at %v\n",
+			expectedDbWriteTime, actualDbWriteTime)
+	}
+
+	// reset metrics
+	filesystem.ClearMetrics()
+
+	// run the third finder
+	finder3 := finderWithSameParams(t, finder2)
+	foundPaths = finder3.FindAll()
+
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/a/hi.txt", "/tmp/b/hi.txt"})
+	assertSameReadDirCalls(t, filesystem.ReadDirCalls, []string{})
+	finder3.Shutdown()
+	actualDbWriteTime = modTime(t, finder3.DbPath, filesystem)
+	if actualDbWriteTime != expectedDbWriteTime {
+		t.Fatalf("Re-wrote db even when contents did not change")
+	}
+
+}
+
+func TestDirectoryNotPermitted(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/hi.txt", filesystem)
+	create(t, "/tmp/a/hi.txt", filesystem)
+	create(t, "/tmp/a/a/hi.txt", filesystem)
+	create(t, "/tmp/b/hi.txt", filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	foundPaths := finder.FindAll()
+	filesystem.Clock.Tick()
+	finder.Shutdown()
+	allPaths := []string{"/tmp/hi.txt", "/tmp/a/hi.txt", "/tmp/a/a/hi.txt", "/tmp/b/hi.txt"}
+	// check results
+	assertSameResponse(t, foundPaths, allPaths)
+
+	// modify the filesystem
+	filesystem.Clock.Tick()
+
+	setReadable(t, "/tmp/a", false, filesystem)
+	filesystem.Clock.Tick()
+
+	// run the second finder
+	finder2 := finderWithSameParams(t, finder)
+	foundPaths = finder2.FindAll()
+	finder2.Shutdown()
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt", "/tmp/b/hi.txt"})
+
+	// modify the filesystem back
+	setReadable(t, "/tmp/a", true, filesystem)
+
+	// run the third finder
+	finder3 := finderWithSameParams(t, finder2)
+	foundPaths = finder3.FindAll()
+	finder3.Shutdown()
+	// check results
+	assertSameResponse(t, foundPaths, allPaths)
+}
+
+func TestFileNotPermitted(t *testing.T) {
+	// setup filesystem
+	filesystem := newFs()
+	create(t, "/tmp/hi.txt", filesystem)
+	setReadable(t, "/tmp/hi.txt", false, filesystem)
+
+	// run the first finder
+	finder := newFinder(
+		t,
+		filesystem,
+		CacheParams{
+			RootDirs:     []string{"/tmp"},
+			IncludeFiles: []string{"hi.txt"},
+		},
+	)
+	foundPaths := finder.FindAll()
+	filesystem.Clock.Tick()
+	finder.Shutdown()
+	// check results
+	assertSameResponse(t, foundPaths, []string{"/tmp/hi.txt"})
+}
author	Jeff Gaston <jeffrygaston@google.com>	2017-08-09 18:25:28 -0700
committer	Jeff Gaston <jeffrygaston@google.com>	2017-08-10 12:20:30 -0700
commit	f1fd45e7848bd5a0be26ee61579d110637449930 (patch)
tree	a5fe80c254e534e0ce99c355f4a4ea0b69b66d7a /finder
parent	c3f32070dc7ba229a6f883b12e4799995d9f9526 (diff)
download	build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.tar.gz build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.tar.bz2 build_soong-f1fd45e7848bd5a0be26ee61579d110637449930.zip