From 1df10c43eab5b8c483fbf882a0c8a6e5e59c73c0 Mon Sep 17 00:00:00 2001 From: Bertrand SIMONNET Date: Wed, 9 Sep 2015 10:39:51 -0700 Subject: metricsd: Persist the report to disk if an upload fails. If the metrics server is unreachable, we may need to resend a metrics report later. Instead of keeping the staged report in memory, save it to disk to avoid loosing data if the system restarts or crashes. BUG: 23033262 TEST: unit tests. Change-Id: Idd14964e40f022952469f47d675d8cda9586d7cd --- metricsd/constants.h | 2 + metricsd/uploader/upload_service.cc | 93 ++++++++++++++++++++------------ metricsd/uploader/upload_service.h | 11 +++- metricsd/uploader/upload_service_test.cc | 18 +++++-- 4 files changed, 83 insertions(+), 41 deletions(-) (limited to 'metricsd') diff --git a/metricsd/constants.h b/metricsd/constants.h index 021bc7361..717e5d2e2 100644 --- a/metricsd/constants.h +++ b/metricsd/constants.h @@ -23,6 +23,8 @@ static const char kMetricsEventsFileName[] = "uma-events"; static const char kMetricsGUIDFileName[] = "Sysinfo.GUID"; static const char kMetricsServer[] = "https://clients4.google.com/uma/v2"; static const char kConsentFileName[] = "enabled"; +static const char kStagedLogName[] = "staged_log"; +static const char kFailedUploadCountName[] = "failed_upload_count"; static const char kDefaultVersion[] = "0.0.0.0"; // System properties used. diff --git a/metricsd/uploader/upload_service.cc b/metricsd/uploader/upload_service.cc index 6d7fd540b..b630cecb8 100644 --- a/metricsd/uploader/upload_service.cc +++ b/metricsd/uploader/upload_service.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -45,6 +46,7 @@ UploadService::UploadService(SystemProfileSetter* setter, metrics_lib_(metrics_lib), histogram_snapshot_manager_(this), sender_(new HttpSender(server)), + failed_upload_count_(metrics::kFailedUploadCountName), testing_(false) { } @@ -60,6 +62,7 @@ void UploadService::Init(const base::TimeDelta& upload_interval, const base::FilePath& metrics_directory) { base::StatisticsRecorder::Initialize(); metrics_file_ = metrics_directory.Append(metrics::kMetricsEventsFileName); + staged_log_path_ = metrics_directory.Append(metrics::kStagedLogName); if (!testing_) { base::MessageLoop::current()->PostDelayedTask(FROM_HERE, @@ -71,8 +74,8 @@ void UploadService::Init(const base::TimeDelta& upload_interval, } void UploadService::StartNewLog() { - CHECK(!staged_log_) << "the staged log should be discarded before starting " - "a new metrics log"; + CHECK(!HasStagedLog()) << "the staged log should be discarded before " + << "starting a new metrics log"; MetricsLog* log = new MetricsLog(); current_log_.reset(log); } @@ -88,7 +91,11 @@ void UploadService::UploadEventCallback(const base::TimeDelta& interval) { } void UploadService::UploadEvent() { - if (staged_log_) { + // If the system shutdown or crashed while uploading a report, we may not have + // deleted an old log. + RemoveFailedLog(); + + if (HasStagedLog()) { // Previous upload failed, retry sending the logs. SendStagedLog(); return; @@ -100,47 +107,44 @@ void UploadService::UploadEvent() { StageCurrentLog(); // If a log is available for upload, upload it. - if (staged_log_) { + if (HasStagedLog()) { SendStagedLog(); } } void UploadService::SendStagedLog() { - CHECK(staged_log_) << "staged_log_ must exist to be sent"; - // If metrics are not enabled, discard the log and exit. if (!metrics_lib_->AreMetricsEnabled()) { LOG(INFO) << "Metrics disabled. Don't upload metrics samples."; - staged_log_.reset(); + base::DeleteFile(staged_log_path_, false); return; } - std::string log_text; - staged_log_->GetEncodedLog(&log_text); - if (!sender_->Send(log_text, base::SHA1HashString(log_text))) { - ++failed_upload_count_; - if (failed_upload_count_ <= kMaxFailedUpload) { - LOG(WARNING) << "log upload failed " << failed_upload_count_ - << " times. It will be retried later."; - return; - } - LOG(WARNING) << "log failed more than " << kMaxFailedUpload << " times."; + std::string staged_log; + CHECK(base::ReadFileToString(staged_log_path_, &staged_log)); + + // Increase the failed count in case the daemon crashes while sending the log. + failed_upload_count_.Add(1); + + if (!sender_->Send(staged_log, base::SHA1HashString(staged_log))) { + LOG(WARNING) << "log failed to upload"; } else { - LOG(INFO) << "uploaded " << log_text.length() << " bytes"; + VLOG(1) << "uploaded " << staged_log.length() << " bytes"; + base::DeleteFile(staged_log_path_, false); } - // Discard staged log. - staged_log_.reset(); + + RemoveFailedLog(); } void UploadService::Reset() { - staged_log_.reset(); + base::DeleteFile(staged_log_path_, false); current_log_.reset(); - failed_upload_count_ = 0; + failed_upload_count_.Set(0); } void UploadService::ReadMetrics() { - CHECK(!staged_log_) - << "cannot read metrics until the old logs have been discarded"; + CHECK(!HasStagedLog()) << "cannot read metrics until the old logs have been " + << "discarded"; ScopedVector vector; metrics::SerializationUtils::ReadAndTruncateMetricsFromFile( @@ -153,7 +157,7 @@ void UploadService::ReadMetrics() { AddSample(*sample); i++; } - DLOG(INFO) << i << " samples read"; + VLOG(1) << i << " samples read"; } void UploadService::AddSample(const metrics::MetricSample& sample) { @@ -217,19 +221,27 @@ void UploadService::RecordDelta(const base::HistogramBase& histogram, } void UploadService::StageCurrentLog() { - CHECK(!staged_log_) - << "staged logs must be discarded before another log can be staged"; - - if (!current_log_) return; + // If we haven't logged anything since the last upload, don't upload an empty + // report. + if (!current_log_) + return; - staged_log_.swap(current_log_); - staged_log_->CloseLog(); - if (!staged_log_->PopulateSystemProfile(system_profile_setter_.get())) { + scoped_ptr staged_log; + staged_log.swap(current_log_); + staged_log->CloseLog(); + if (!staged_log->PopulateSystemProfile(system_profile_setter_.get())) { LOG(WARNING) << "Error while adding metadata to the log. Discarding the " << "log."; - staged_log_.reset(); + return; + } + std::string encoded_log; + staged_log->GetEncodedLog(&encoded_log); + + failed_upload_count_.Set(0); + if (static_cast(encoded_log.size()) != base::WriteFile( + staged_log_path_, encoded_log.data(), encoded_log.size())) { + LOG(ERROR) << "failed to persist to " << staged_log_path_.value(); } - failed_upload_count_ = 0; } MetricsLog* UploadService::GetOrCreateCurrentLog() { @@ -238,3 +250,16 @@ MetricsLog* UploadService::GetOrCreateCurrentLog() { } return current_log_.get(); } + +bool UploadService::HasStagedLog() { + return base::PathExists(staged_log_path_); +} + +void UploadService::RemoveFailedLog() { + if (failed_upload_count_.Get() > kMaxFailedUpload) { + LOG(INFO) << "log failed more than " << kMaxFailedUpload << " times."; + CHECK(base::DeleteFile(staged_log_path_, false)) + << "failed to delete staged log at " << staged_log_path_.value(); + failed_upload_count_.Set(0); + } +} diff --git a/metricsd/uploader/upload_service.h b/metricsd/uploader/upload_service.h index d9c690df1..ea93d21a3 100644 --- a/metricsd/uploader/upload_service.h +++ b/metricsd/uploader/upload_service.h @@ -24,6 +24,7 @@ #include "base/metrics/histogram_snapshot_manager.h" #include "metrics/metrics_library.h" +#include "persistent_integer.h" #include "uploader/metrics_log.h" #include "uploader/sender.h" #include "uploader/system_profile_cache.h" @@ -146,6 +147,12 @@ class UploadService : public base::HistogramFlattener { // system information. void StageCurrentLog(); + // Returns true iff a log is staged. + bool HasStagedLog(); + + // Remove the staged log iff the upload failed more than |kMaxFailedUpload|. + void RemoveFailedLog(); + // Returns the current log. If there is no current log, creates it first. MetricsLog* GetOrCreateCurrentLog(); @@ -153,11 +160,11 @@ class UploadService : public base::HistogramFlattener { MetricsLibraryInterface* metrics_lib_; base::HistogramSnapshotManager histogram_snapshot_manager_; scoped_ptr sender_; - int failed_upload_count_; + chromeos_metrics::PersistentInteger failed_upload_count_; scoped_ptr current_log_; - scoped_ptr staged_log_; base::FilePath metrics_file_; + base::FilePath staged_log_path_; bool testing_; }; diff --git a/metricsd/uploader/upload_service_test.cc b/metricsd/uploader/upload_service_test.cc index d3b5289dc..fd8248ae7 100644 --- a/metricsd/uploader/upload_service_test.cc +++ b/metricsd/uploader/upload_service_test.cc @@ -24,6 +24,7 @@ #include "constants.h" #include "metrics_library_mock.h" +#include "persistent_integer.h" #include "serialization/metric_sample.h" #include "uploader/metrics_log.h" #include "uploader/mock/mock_system_profile_setter.h" @@ -38,6 +39,8 @@ class UploadServiceTest : public testing::Test { protected: virtual void SetUp() { CHECK(dir_.CreateUniqueTempDir()); + chromeos_metrics::PersistentInteger::SetMetricsDirectory( + dir_.path().value()); upload_service_.reset(new UploadService(new MockSystemProfileSetter(), &metrics_lib_, "", true)); @@ -45,9 +48,6 @@ class UploadServiceTest : public testing::Test { upload_service_->Init(base::TimeDelta::FromMinutes(30), dir_.path()); upload_service_->GatherHistograms(); upload_service_->Reset(); - - chromeos_metrics::PersistentInteger::SetMetricsDirectory( - dir_.path().value()); } scoped_ptr Crash(const std::string& name) { @@ -132,9 +132,17 @@ TEST_F(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload) { upload_service_->UploadEvent(); } - EXPECT_TRUE(upload_service_->staged_log_); + EXPECT_TRUE(upload_service_->HasStagedLog()); upload_service_->UploadEvent(); - EXPECT_FALSE(upload_service_->staged_log_); + EXPECT_FALSE(upload_service_->HasStagedLog()); + + // Log a new sample. The failed upload counter should be reset. + upload_service_->AddSample(*Crash("user")); + for (int i = 0; i < UploadService::kMaxFailedUpload; i++) { + upload_service_->UploadEvent(); + } + // The log is not discarded after multiple failed uploads. + EXPECT_TRUE(upload_service_->HasStagedLog()); } TEST_F(UploadServiceTest, EmptyLogsAreNotSent) { -- cgit v1.2.3