From 6bbf9e29fcb069871a9153e845242ed6fe0e1b94 Mon Sep 17 00:00:00 2001 From: Shinichiro Hamaji Date: Tue, 19 Jan 2016 18:59:13 +0900 Subject: [C++] Run the regeneration check in parallel From ~1.5s to ~0.4s for an Android target. --- Makefile.ckati | 1 + regen.cc | 539 ++++++++++++++++++++++++++++++++++----------------------- stats.cc | 1 + thread.cc | 84 +++++++++ thread.h | 35 ++++ 5 files changed, 447 insertions(+), 213 deletions(-) create mode 100644 thread.cc create mode 100644 thread.h diff --git a/Makefile.ckati b/Makefile.ckati index 0ba8385..cd8a0f8 100644 --- a/Makefile.ckati +++ b/Makefile.ckati @@ -46,6 +46,7 @@ KATI_CXX_SRCS := \ stringprintf.cc \ strutil.cc \ symtab.cc \ + thread.cc \ timeutil.cc \ var.cc diff --git a/regen.cc b/regen.cc index 2c7cc73..81fd189 100644 --- a/regen.cc +++ b/regen.cc @@ -17,6 +17,9 @@ #include #include +#include +#include +#include #include "fileutil.h" #include "find.h" @@ -25,273 +28,383 @@ #include "ninja.h" #include "stats.h" #include "strutil.h" +#include "thread.h" -static bool ShouldIgnoreDirty(StringPiece s) { +namespace { + +#define RETURN_TRUE do { \ + if (g_flags.dump_kati_stamp) \ + needs_regen_ = true; \ + else \ + return true; \ + } while (0) + +bool ShouldIgnoreDirty(StringPiece s) { Pattern pat(g_flags.ignore_dirty_pattern); Pattern nopat(g_flags.no_ignore_dirty_pattern); return pat.Match(s) && !nopat.Match(s); } -bool NeedsRegen(double start_time, const string& orig_args) { - bool retval = false; -#define RETURN_TRUE do { \ - if (g_flags.dump_kati_stamp) \ - retval = true; \ - else \ - return true; \ - } while (0) +class StampChecker { + struct GlobResult { + string pat; + vector result; + }; -#define LOAD_INT(fp) ({ \ - int v = LoadInt(fp); \ - if (v < 0) { \ - fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); \ - RETURN_TRUE; \ - } \ - v; \ - }) + struct ShellResult { + string cmd; + string result; + vector missing_dirs; + vector read_dirs; + bool has_condition; + }; -#define LOAD_STRING(fp, s) ({ \ - if (!LoadString(fp, s)) { \ - fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); \ - RETURN_TRUE; \ - } \ - }) - - if (!Exists(GetNinjaFilename())) { - fprintf(stderr, "%s is missing, regenerating...\n", - GetNinjaFilename().c_str()); - return true; + public: + StampChecker() + : needs_regen_(false) { } - if (!Exists(GetNinjaShellScriptFilename())) { - fprintf(stderr, "%s is missing, regenerating...\n", - GetNinjaShellScriptFilename().c_str()); - return true; + + ~StampChecker() { + for (GlobResult* gr : globs_) { + delete gr; + } + for (ShellResult* sr : commands_) { + delete sr; + } } - const string& stamp_filename = GetNinjaStampFilename(); - FILE* fp = fopen(stamp_filename.c_str(), "rb+"); - if (!fp) { - if (g_flags.dump_kati_stamp) - printf("%s: %s\n", stamp_filename.c_str(), strerror(errno)); - return true; + bool NeedsRegen(double start_time, const string& orig_args) { + if (IsMissingOutputs()) + RETURN_TRUE; + + if (CheckStep1(orig_args)) + RETURN_TRUE; + + if (CheckStep2()) + RETURN_TRUE; + + if (!needs_regen_) { + FILE* fp = fopen(GetNinjaStampFilename().c_str(), "rb+"); + if (!fp) + return true; + ScopedFile sfp(fp); + if (fseek(fp, 0, SEEK_SET) < 0) + PERROR("fseek"); + size_t r = fwrite(&start_time, sizeof(start_time), 1, fp); + CHECK(r == 1); + } + return needs_regen_; } - ScopedFile sfp(fp); - double gen_time; - size_t r = fread(&gen_time, sizeof(gen_time), 1, fp); - if (r != 1) { - fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); - RETURN_TRUE; + private: + bool IsMissingOutputs() { + if (!Exists(GetNinjaFilename())) { + fprintf(stderr, "%s is missing, regenerating...\n", + GetNinjaFilename().c_str()); + return true; + } + if (!Exists(GetNinjaShellScriptFilename())) { + fprintf(stderr, "%s is missing, regenerating...\n", + GetNinjaShellScriptFilename().c_str()); + return true; + } + return false; } - if (g_flags.dump_kati_stamp) - printf("Generated time: %f\n", gen_time); - - string s, s2; - int num_files = LOAD_INT(fp); - for (int i = 0; i < num_files; i++) { - LOAD_STRING(fp, &s); - double ts = GetTimestamp(s); - if (gen_time < ts) { - if (g_flags.regen_ignoring_kati_binary) { - string kati_binary; - GetExecutablePath(&kati_binary); - if (s == kati_binary) { - fprintf(stderr, "%s was modified, ignored.\n", s.c_str()); + + bool CheckStep1(const string& orig_args) { +#define LOAD_INT(fp) ({ \ + int v = LoadInt(fp); \ + if (v < 0) { \ + fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); \ + RETURN_TRUE; \ + } \ + v; \ + }) + +#define LOAD_STRING(fp, s) ({ \ + if (!LoadString(fp, s)) { \ + fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); \ + RETURN_TRUE; \ + } \ + }) + + const string& stamp_filename = GetNinjaStampFilename(); + FILE* fp = fopen(stamp_filename.c_str(), "rb"); + if (!fp) { + if (g_flags.dump_kati_stamp) + printf("%s: %s\n", stamp_filename.c_str(), strerror(errno)); + return true; + } + ScopedFile sfp(fp); + + double gen_time; + size_t r = fread(&gen_time, sizeof(gen_time), 1, fp); + gen_time_ = gen_time; + if (r != 1) { + fprintf(stderr, "incomplete kati_stamp, regenerating...\n"); + RETURN_TRUE; + } + if (g_flags.dump_kati_stamp) + printf("Generated time: %f\n", gen_time); + + string s, s2; + int num_files = LOAD_INT(fp); + for (int i = 0; i < num_files; i++) { + LOAD_STRING(fp, &s); + double ts = GetTimestamp(s); + if (gen_time < ts) { + if (g_flags.regen_ignoring_kati_binary) { + string kati_binary; + GetExecutablePath(&kati_binary); + if (s == kati_binary) { + fprintf(stderr, "%s was modified, ignored.\n", s.c_str()); + continue; + } + } + if (ShouldIgnoreDirty(s)) { + if (g_flags.dump_kati_stamp) + printf("file %s: ignored (%f)\n", s.c_str(), ts); continue; } - } - if (ShouldIgnoreDirty(s)) { if (g_flags.dump_kati_stamp) - printf("file %s: ignored (%f)\n", s.c_str(), ts); - continue; + printf("file %s: dirty (%f)\n", s.c_str(), ts); + else + fprintf(stderr, "%s was modified, regenerating...\n", s.c_str()); + RETURN_TRUE; + } else if (g_flags.dump_kati_stamp) { + printf("file %s: clean (%f)\n", s.c_str(), ts); } - if (g_flags.dump_kati_stamp) - printf("file %s: dirty (%f)\n", s.c_str(), ts); - else - fprintf(stderr, "%s was modified, regenerating...\n", s.c_str()); - RETURN_TRUE; - } else if (g_flags.dump_kati_stamp) { - printf("file %s: clean (%f)\n", s.c_str(), ts); } - } - int num_undefineds = LOAD_INT(fp); - for (int i = 0; i < num_undefineds; i++) { - LOAD_STRING(fp, &s); - if (getenv(s.c_str())) { - if (g_flags.dump_kati_stamp) { - printf("env %s: dirty (unset => %s)\n", s.c_str(), getenv(s.c_str())); - } else { - fprintf(stderr, "Environment variable %s was set, regenerating...\n", - s.c_str()); + int num_undefineds = LOAD_INT(fp); + for (int i = 0; i < num_undefineds; i++) { + LOAD_STRING(fp, &s); + if (getenv(s.c_str())) { + if (g_flags.dump_kati_stamp) { + printf("env %s: dirty (unset => %s)\n", s.c_str(), getenv(s.c_str())); + } else { + fprintf(stderr, "Environment variable %s was set, regenerating...\n", + s.c_str()); + } + RETURN_TRUE; + } else if (g_flags.dump_kati_stamp) { + printf("env %s: clean (unset)\n", s.c_str()); } - RETURN_TRUE; - } else if (g_flags.dump_kati_stamp) { - printf("env %s: clean (unset)\n", s.c_str()); } - } - int num_envs = LOAD_INT(fp); - for (int i = 0; i < num_envs; i++) { - LOAD_STRING(fp, &s); - StringPiece val(getenv(s.c_str())); - LOAD_STRING(fp, &s2); - if (val != s2) { - if (g_flags.dump_kati_stamp) { - printf("env %s: dirty (%s => %.*s)\n", - s.c_str(), s2.c_str(), SPF(val)); - } else { - fprintf(stderr, "Environment variable %s was modified (%s => %.*s), " - "regenerating...\n", - s.c_str(), s2.c_str(), SPF(val)); + int num_envs = LOAD_INT(fp); + for (int i = 0; i < num_envs; i++) { + LOAD_STRING(fp, &s); + StringPiece val(getenv(s.c_str())); + LOAD_STRING(fp, &s2); + if (val != s2) { + if (g_flags.dump_kati_stamp) { + printf("env %s: dirty (%s => %.*s)\n", + s.c_str(), s2.c_str(), SPF(val)); + } else { + fprintf(stderr, "Environment variable %s was modified (%s => %.*s), " + "regenerating...\n", + s.c_str(), s2.c_str(), SPF(val)); + } + RETURN_TRUE; + } else if (g_flags.dump_kati_stamp) { + printf("env %s: clean (%.*s)\n", s.c_str(), SPF(val)); } - RETURN_TRUE; - } else if (g_flags.dump_kati_stamp) { - printf("env %s: clean (%.*s)\n", s.c_str(), SPF(val)); } - } - { int num_globs = LOAD_INT(fp); string pat; for (int i = 0; i < num_globs; i++) { - COLLECT_STATS("glob time (regen)"); - LOAD_STRING(fp, &pat); -#if 0 - bool needs_reglob = false; - int num_dirs = LOAD_INT(fp); - for (int j = 0; j < num_dirs; j++) { - LOAD_STRING(fp, &s); - // TODO: Handle removed files properly. - needs_reglob |= gen_time < GetTimestamp(s); - } -#endif + GlobResult* gr = new GlobResult; + globs_.push_back(gr); + + LOAD_STRING(fp, &gr->pat); int num_files = LOAD_INT(fp); - vector* files; - Glob(pat.c_str(), &files); - sort(files->begin(), files->end()); - bool needs_regen = files->size() != static_cast(num_files); + gr->result.resize(num_files); for (int j = 0; j < num_files; j++) { + LOAD_STRING(fp, &gr->result[j]); + } + } + + int num_crs = LOAD_INT(fp); + for (int i = 0; i < num_crs; i++) { + ShellResult* sr = new ShellResult; + commands_.push_back(sr); + LOAD_STRING(fp, &sr->cmd); + LOAD_STRING(fp, &sr->result); + sr->has_condition = LOAD_INT(fp); + if (!sr->has_condition) + continue; + + int num_missing_dirs = LOAD_INT(fp); + for (int j = 0; j < num_missing_dirs; j++) { LOAD_STRING(fp, &s); - if (!needs_regen) { - if ((*files)[j] != s) { - needs_regen = true; - break; - } - } + sr->missing_dirs.push_back(s); } - if (needs_regen) { - if (ShouldIgnoreDirty(pat)) { - if (g_flags.dump_kati_stamp) { - printf("wildcard %s: ignored\n", pat.c_str()); - } - continue; + int num_read_dirs = LOAD_INT(fp); + for (int j = 0; j < num_read_dirs; j++) { + LOAD_STRING(fp, &s); + sr->read_dirs.push_back(s); + } + } + + LoadString(fp, &s); + if (orig_args != s) { + fprintf(stderr, "arguments changed, regenerating...\n"); + RETURN_TRUE; + } + + return needs_regen_; + } + + bool CheckGlobResult(const GlobResult* gr, string* err) { + COLLECT_STATS("glob time (regen)"); + vector* files; + Glob(gr->pat.c_str(), &files); + sort(files->begin(), files->end()); + bool needs_regen = files->size() != gr->result.size(); + for (size_t i = 0; i < gr->result.size(); i++) { + if (!needs_regen) { + if ((*files)[i] != gr->result[i]) { + needs_regen = true; + break; } + } + } + if (needs_regen) { + if (ShouldIgnoreDirty(gr->pat)) { if (g_flags.dump_kati_stamp) { - printf("wildcard %s: dirty\n", pat.c_str()); - } else { - fprintf(stderr, "wildcard(%s) was changed, regenerating...\n", - pat.c_str()); + printf("wildcard %s: ignored\n", gr->pat.c_str()); } - RETURN_TRUE; - } else if (g_flags.dump_kati_stamp) { - printf("wildcard %s: clean\n", pat.c_str()); + return false; + } + if (g_flags.dump_kati_stamp) { + printf("wildcard %s: dirty\n", gr->pat.c_str()); + } else { + *err = StringPrintf("wildcard(%s) was changed, regenerating...\n", + gr->pat.c_str()); } + } else if (g_flags.dump_kati_stamp) { + printf("wildcard %s: clean\n", gr->pat.c_str()); } + return needs_regen; } - int num_crs = LOAD_INT(fp); - for (int i = 0; i < num_crs; i++) { - string cmd, expected; - LOAD_STRING(fp, &cmd); - LOAD_STRING(fp, &expected); - - { - COLLECT_STATS("stat time (regen)"); - bool has_condition = LOAD_INT(fp); - if (has_condition) { - bool should_run_command = false; - - int num_missing_dirs = LOAD_INT(fp); - for (int j = 0; j < num_missing_dirs; j++) { - LOAD_STRING(fp, &s); - should_run_command |= Exists(s); - } - - int num_read_dirs = LOAD_INT(fp); - for (int j = 0; j < num_read_dirs; j++) { - LOAD_STRING(fp, &s); - // We assume we rarely do a significant change for the top - // directory which affects the results of find command. - if (s == "" || s == "." || ShouldIgnoreDirty(s)) - continue; + bool ShouldRunCommand(const ShellResult* sr) { + if (!sr->has_condition) + return true; - struct stat st; - if (lstat(s.c_str(), &st) != 0) { - should_run_command = true; - continue; - } - double ts = GetTimestampFromStat(st); - if (gen_time < ts) { - should_run_command = true; - continue; - } - if (S_ISLNK(st.st_mode)) { - ts = GetTimestamp(s); - should_run_command |= (ts < 0 || gen_time < ts); - } - } + COLLECT_STATS("stat time (regen)"); + for (const string& dir : sr->missing_dirs) { + if (Exists(dir)) + return true; + } + for (const string& dir : sr->read_dirs) { + // We assume we rarely do a significant change for the top + // directory which affects the results of find command. + if (dir == "" || dir == "." || ShouldIgnoreDirty(dir)) + continue; - if (!should_run_command) { - if (g_flags.dump_kati_stamp) - printf("shell %s: clean (no rerun)\n", cmd.c_str()); - continue; - } + struct stat st; + if (lstat(dir.c_str(), &st) != 0) { + return true; + } + double ts = GetTimestampFromStat(st); + if (gen_time_ < ts) { + return true; + } + if (S_ISLNK(st.st_mode)) { + ts = GetTimestamp(dir); + if (ts < 0 || gen_time_ < ts) + return true; } } + return false; + } + + bool CheckShellResult(const ShellResult* sr, string* err) { + if (!ShouldRunCommand(sr)) { + if (g_flags.dump_kati_stamp) + printf("shell %s: clean (no rerun)\n", sr->cmd.c_str()); + return false; + } FindCommand fc; - if (fc.Parse(cmd) && !fc.chdir.empty() && ShouldIgnoreDirty(fc.chdir)) { + if (fc.Parse(sr->cmd) && + !fc.chdir.empty() && ShouldIgnoreDirty(fc.chdir)) { if (g_flags.dump_kati_stamp) - printf("shell %s: ignored\n", cmd.c_str()); - continue; + printf("shell %s: ignored\n", sr->cmd.c_str()); + return false; } - { - COLLECT_STATS_WITH_SLOW_REPORT("shell time (regen)", cmd.c_str()); - string result; - RunCommand("/bin/sh", cmd, RedirectStderr::DEV_NULL, &result); - FormatForCommandSubstitution(&result); - if (expected != result) { - if (g_flags.dump_kati_stamp) { - printf("shell %s: dirty\n", cmd.c_str()); - } else { - fprintf(stderr, "$(shell %s) was changed, regenerating...\n", - cmd.c_str()); -#if 0 - fprintf(stderr, "%s => %s\n", - expected.c_str(), result.c_str()); -#endif - } - RETURN_TRUE; - } else if (g_flags.dump_kati_stamp) { - printf("shell %s: clean (rerun)\n", cmd.c_str()); + COLLECT_STATS_WITH_SLOW_REPORT("shell time (regen)", sr->cmd.c_str()); + string result; + RunCommand("/bin/sh", sr->cmd, RedirectStderr::DEV_NULL, &result); + FormatForCommandSubstitution(&result); + if (sr->result != result) { + if (g_flags.dump_kati_stamp) { + printf("shell %s: dirty\n", sr->cmd.c_str()); + } else { + *err = StringPrintf("$(shell %s) was changed, regenerating...\n", + sr->cmd.c_str()); + //*err += StringPrintf("%s => %s\n", expected.c_str(), result.c_str()); } + return true; + } else if (g_flags.dump_kati_stamp) { + printf("shell %s: clean (rerun)\n", sr->cmd.c_str()); } + return false; } - LoadString(fp, &s); - if (orig_args != s) { - fprintf(stderr, "arguments changed, regenerating...\n"); - RETURN_TRUE; - } + bool CheckStep2() { + unique_ptr tp(NewThreadPool(g_flags.num_jobs)); + + tp->Submit([this]() { + string err; + // TODO: Make glob cache thread safe and create a task for each glob. + for (GlobResult* gr : globs_) { + if (CheckGlobResult(gr, &err)) { + unique_lock lock(mu_); + if (!needs_regen_) { + needs_regen_ = true; + msg_ = err; + } + break; + } + } + }); + + for (ShellResult* sr : commands_) { + tp->Submit([this, sr]() { + string err; + if (CheckShellResult(sr, &err)) { + unique_lock lock(mu_); + if (!needs_regen_) { + needs_regen_ = true; + msg_ = err; + } + } + }); + } - if (!retval) { - if (fseek(fp, 0, SEEK_SET) < 0) - PERROR("fseek"); - size_t r = fwrite(&start_time, sizeof(start_time), 1, fp); - CHECK(r == 1); + tp->Wait(); + if (needs_regen_) { + fprintf(stderr, "%s", msg_.c_str()); + } + return needs_regen_; } - return retval; + private: + double gen_time_; + vector globs_; + vector commands_; + mutex mu_; + bool needs_regen_; + string msg_; +}; + +} // namespace + +bool NeedsRegen(double start_time, const string& orig_args) { + return StampChecker().NeedsRegen(start_time, orig_args); } diff --git a/stats.cc b/stats.cc index 847c61a..0e23e84 100644 --- a/stats.cc +++ b/stats.cc @@ -16,6 +16,7 @@ #include "stats.h" +#include #include #include "flags.h" diff --git a/thread.cc b/thread.cc new file mode 100644 index 0000000..1450466 --- /dev/null +++ b/thread.cc @@ -0,0 +1,84 @@ +// Copyright 2016 Google Inc. All rights reserved +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "thread.h" + +#include +#include +#include +#include +#include + +class ThreadPoolImpl : public ThreadPool { + public: + explicit ThreadPoolImpl(int num_threads) + : is_waiting_(false) { + for (int i = 0; i < num_threads; i++) { + threads_.push_back(thread([this]() { Loop(); })); + } + } + + virtual ~ThreadPoolImpl() override { + } + + virtual void Submit(function task) override { + unique_lock lock(mu_); + tasks_.push(task); + cond_.notify_one(); + } + + virtual void Wait() override { + { + unique_lock lock(mu_); + is_waiting_ = true; + cond_.notify_all(); + } + + for (thread& th : threads_) { + th.join(); + } + } + + private: + void Loop() { + while (true) { + function task; + { + unique_lock lock(mu_); + if (tasks_.empty()) { + if (is_waiting_) + return; + cond_.wait(lock); + } + + if (tasks_.empty()) + continue; + + task = tasks_.top(); + tasks_.pop(); + } + task(); + } + } + + vector threads_; + mutex mu_; + condition_variable cond_; + stack> tasks_; + bool is_waiting_; +}; + +ThreadPool* NewThreadPool(int num_threads) { + return new ThreadPoolImpl(num_threads); +} diff --git a/thread.h b/thread.h new file mode 100644 index 0000000..8fd3842 --- /dev/null +++ b/thread.h @@ -0,0 +1,35 @@ +// Copyright 2016 Google Inc. All rights reserved +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef THREAD_H_ +#define THREAD_H_ + +#include + +using namespace std; + +class ThreadPool { + public: + virtual ~ThreadPool() = default; + + virtual void Submit(function task) = 0; + virtual void Wait() = 0; + + protected: + ThreadPool() = default; +}; + +ThreadPool* NewThreadPool(int num_threads); + +#endif // THREAD_H_ -- cgit v1.2.3