summaryrefslogtreecommitdiffstats
path: root/metricsd
diff options
context:
space:
mode:
authorBertrand SIMONNET <bsimonnet@google.com>2015-09-09 10:39:51 -0700
committerBertrand SIMONNET <bsimonnet@google.com>2015-09-14 13:55:33 -0700
commit1df10c43eab5b8c483fbf882a0c8a6e5e59c73c0 (patch)
treecaf438fc99b8fb5a9139512db4cb9e4cc21ffe34 /metricsd
parent2765d0abccd0c754422332d114cdecc271888e2d (diff)
downloadcore-1df10c43eab5b8c483fbf882a0c8a6e5e59c73c0.tar.gz
core-1df10c43eab5b8c483fbf882a0c8a6e5e59c73c0.tar.bz2
core-1df10c43eab5b8c483fbf882a0c8a6e5e59c73c0.zip
metricsd: Persist the report to disk if an upload fails.
If the metrics server is unreachable, we may need to resend a metrics report later. Instead of keeping the staged report in memory, save it to disk to avoid loosing data if the system restarts or crashes. BUG: 23033262 TEST: unit tests. Change-Id: Idd14964e40f022952469f47d675d8cda9586d7cd
Diffstat (limited to 'metricsd')
-rw-r--r--metricsd/constants.h2
-rw-r--r--metricsd/uploader/upload_service.cc93
-rw-r--r--metricsd/uploader/upload_service.h11
-rw-r--r--metricsd/uploader/upload_service_test.cc18
4 files changed, 83 insertions, 41 deletions
diff --git a/metricsd/constants.h b/metricsd/constants.h
index 021bc7361..717e5d2e2 100644
--- a/metricsd/constants.h
+++ b/metricsd/constants.h
@@ -23,6 +23,8 @@ static const char kMetricsEventsFileName[] = "uma-events";
static const char kMetricsGUIDFileName[] = "Sysinfo.GUID";
static const char kMetricsServer[] = "https://clients4.google.com/uma/v2";
static const char kConsentFileName[] = "enabled";
+static const char kStagedLogName[] = "staged_log";
+static const char kFailedUploadCountName[] = "failed_upload_count";
static const char kDefaultVersion[] = "0.0.0.0";
// System properties used.
diff --git a/metricsd/uploader/upload_service.cc b/metricsd/uploader/upload_service.cc
index 6d7fd540b..b630cecb8 100644
--- a/metricsd/uploader/upload_service.cc
+++ b/metricsd/uploader/upload_service.cc
@@ -19,6 +19,7 @@
#include <string>
#include <base/bind.h>
+#include <base/files/file_util.h>
#include <base/logging.h>
#include <base/memory/scoped_vector.h>
#include <base/message_loop/message_loop.h>
@@ -45,6 +46,7 @@ UploadService::UploadService(SystemProfileSetter* setter,
metrics_lib_(metrics_lib),
histogram_snapshot_manager_(this),
sender_(new HttpSender(server)),
+ failed_upload_count_(metrics::kFailedUploadCountName),
testing_(false) {
}
@@ -60,6 +62,7 @@ void UploadService::Init(const base::TimeDelta& upload_interval,
const base::FilePath& metrics_directory) {
base::StatisticsRecorder::Initialize();
metrics_file_ = metrics_directory.Append(metrics::kMetricsEventsFileName);
+ staged_log_path_ = metrics_directory.Append(metrics::kStagedLogName);
if (!testing_) {
base::MessageLoop::current()->PostDelayedTask(FROM_HERE,
@@ -71,8 +74,8 @@ void UploadService::Init(const base::TimeDelta& upload_interval,
}
void UploadService::StartNewLog() {
- CHECK(!staged_log_) << "the staged log should be discarded before starting "
- "a new metrics log";
+ CHECK(!HasStagedLog()) << "the staged log should be discarded before "
+ << "starting a new metrics log";
MetricsLog* log = new MetricsLog();
current_log_.reset(log);
}
@@ -88,7 +91,11 @@ void UploadService::UploadEventCallback(const base::TimeDelta& interval) {
}
void UploadService::UploadEvent() {
- if (staged_log_) {
+ // If the system shutdown or crashed while uploading a report, we may not have
+ // deleted an old log.
+ RemoveFailedLog();
+
+ if (HasStagedLog()) {
// Previous upload failed, retry sending the logs.
SendStagedLog();
return;
@@ -100,47 +107,44 @@ void UploadService::UploadEvent() {
StageCurrentLog();
// If a log is available for upload, upload it.
- if (staged_log_) {
+ if (HasStagedLog()) {
SendStagedLog();
}
}
void UploadService::SendStagedLog() {
- CHECK(staged_log_) << "staged_log_ must exist to be sent";
-
// If metrics are not enabled, discard the log and exit.
if (!metrics_lib_->AreMetricsEnabled()) {
LOG(INFO) << "Metrics disabled. Don't upload metrics samples.";
- staged_log_.reset();
+ base::DeleteFile(staged_log_path_, false);
return;
}
- std::string log_text;
- staged_log_->GetEncodedLog(&log_text);
- if (!sender_->Send(log_text, base::SHA1HashString(log_text))) {
- ++failed_upload_count_;
- if (failed_upload_count_ <= kMaxFailedUpload) {
- LOG(WARNING) << "log upload failed " << failed_upload_count_
- << " times. It will be retried later.";
- return;
- }
- LOG(WARNING) << "log failed more than " << kMaxFailedUpload << " times.";
+ std::string staged_log;
+ CHECK(base::ReadFileToString(staged_log_path_, &staged_log));
+
+ // Increase the failed count in case the daemon crashes while sending the log.
+ failed_upload_count_.Add(1);
+
+ if (!sender_->Send(staged_log, base::SHA1HashString(staged_log))) {
+ LOG(WARNING) << "log failed to upload";
} else {
- LOG(INFO) << "uploaded " << log_text.length() << " bytes";
+ VLOG(1) << "uploaded " << staged_log.length() << " bytes";
+ base::DeleteFile(staged_log_path_, false);
}
- // Discard staged log.
- staged_log_.reset();
+
+ RemoveFailedLog();
}
void UploadService::Reset() {
- staged_log_.reset();
+ base::DeleteFile(staged_log_path_, false);
current_log_.reset();
- failed_upload_count_ = 0;
+ failed_upload_count_.Set(0);
}
void UploadService::ReadMetrics() {
- CHECK(!staged_log_)
- << "cannot read metrics until the old logs have been discarded";
+ CHECK(!HasStagedLog()) << "cannot read metrics until the old logs have been "
+ << "discarded";
ScopedVector<metrics::MetricSample> vector;
metrics::SerializationUtils::ReadAndTruncateMetricsFromFile(
@@ -153,7 +157,7 @@ void UploadService::ReadMetrics() {
AddSample(*sample);
i++;
}
- DLOG(INFO) << i << " samples read";
+ VLOG(1) << i << " samples read";
}
void UploadService::AddSample(const metrics::MetricSample& sample) {
@@ -217,19 +221,27 @@ void UploadService::RecordDelta(const base::HistogramBase& histogram,
}
void UploadService::StageCurrentLog() {
- CHECK(!staged_log_)
- << "staged logs must be discarded before another log can be staged";
-
- if (!current_log_) return;
+ // If we haven't logged anything since the last upload, don't upload an empty
+ // report.
+ if (!current_log_)
+ return;
- staged_log_.swap(current_log_);
- staged_log_->CloseLog();
- if (!staged_log_->PopulateSystemProfile(system_profile_setter_.get())) {
+ scoped_ptr<MetricsLog> staged_log;
+ staged_log.swap(current_log_);
+ staged_log->CloseLog();
+ if (!staged_log->PopulateSystemProfile(system_profile_setter_.get())) {
LOG(WARNING) << "Error while adding metadata to the log. Discarding the "
<< "log.";
- staged_log_.reset();
+ return;
+ }
+ std::string encoded_log;
+ staged_log->GetEncodedLog(&encoded_log);
+
+ failed_upload_count_.Set(0);
+ if (static_cast<int>(encoded_log.size()) != base::WriteFile(
+ staged_log_path_, encoded_log.data(), encoded_log.size())) {
+ LOG(ERROR) << "failed to persist to " << staged_log_path_.value();
}
- failed_upload_count_ = 0;
}
MetricsLog* UploadService::GetOrCreateCurrentLog() {
@@ -238,3 +250,16 @@ MetricsLog* UploadService::GetOrCreateCurrentLog() {
}
return current_log_.get();
}
+
+bool UploadService::HasStagedLog() {
+ return base::PathExists(staged_log_path_);
+}
+
+void UploadService::RemoveFailedLog() {
+ if (failed_upload_count_.Get() > kMaxFailedUpload) {
+ LOG(INFO) << "log failed more than " << kMaxFailedUpload << " times.";
+ CHECK(base::DeleteFile(staged_log_path_, false))
+ << "failed to delete staged log at " << staged_log_path_.value();
+ failed_upload_count_.Set(0);
+ }
+}
diff --git a/metricsd/uploader/upload_service.h b/metricsd/uploader/upload_service.h
index d9c690df1..ea93d21a3 100644
--- a/metricsd/uploader/upload_service.h
+++ b/metricsd/uploader/upload_service.h
@@ -24,6 +24,7 @@
#include "base/metrics/histogram_snapshot_manager.h"
#include "metrics/metrics_library.h"
+#include "persistent_integer.h"
#include "uploader/metrics_log.h"
#include "uploader/sender.h"
#include "uploader/system_profile_cache.h"
@@ -146,6 +147,12 @@ class UploadService : public base::HistogramFlattener {
// system information.
void StageCurrentLog();
+ // Returns true iff a log is staged.
+ bool HasStagedLog();
+
+ // Remove the staged log iff the upload failed more than |kMaxFailedUpload|.
+ void RemoveFailedLog();
+
// Returns the current log. If there is no current log, creates it first.
MetricsLog* GetOrCreateCurrentLog();
@@ -153,11 +160,11 @@ class UploadService : public base::HistogramFlattener {
MetricsLibraryInterface* metrics_lib_;
base::HistogramSnapshotManager histogram_snapshot_manager_;
scoped_ptr<Sender> sender_;
- int failed_upload_count_;
+ chromeos_metrics::PersistentInteger failed_upload_count_;
scoped_ptr<MetricsLog> current_log_;
- scoped_ptr<MetricsLog> staged_log_;
base::FilePath metrics_file_;
+ base::FilePath staged_log_path_;
bool testing_;
};
diff --git a/metricsd/uploader/upload_service_test.cc b/metricsd/uploader/upload_service_test.cc
index d3b5289dc..fd8248ae7 100644
--- a/metricsd/uploader/upload_service_test.cc
+++ b/metricsd/uploader/upload_service_test.cc
@@ -24,6 +24,7 @@
#include "constants.h"
#include "metrics_library_mock.h"
+#include "persistent_integer.h"
#include "serialization/metric_sample.h"
#include "uploader/metrics_log.h"
#include "uploader/mock/mock_system_profile_setter.h"
@@ -38,6 +39,8 @@ class UploadServiceTest : public testing::Test {
protected:
virtual void SetUp() {
CHECK(dir_.CreateUniqueTempDir());
+ chromeos_metrics::PersistentInteger::SetMetricsDirectory(
+ dir_.path().value());
upload_service_.reset(new UploadService(new MockSystemProfileSetter(),
&metrics_lib_, "", true));
@@ -45,9 +48,6 @@ class UploadServiceTest : public testing::Test {
upload_service_->Init(base::TimeDelta::FromMinutes(30), dir_.path());
upload_service_->GatherHistograms();
upload_service_->Reset();
-
- chromeos_metrics::PersistentInteger::SetMetricsDirectory(
- dir_.path().value());
}
scoped_ptr<metrics::MetricSample> Crash(const std::string& name) {
@@ -132,9 +132,17 @@ TEST_F(UploadServiceTest, DiscardLogsAfterTooManyFailedUpload) {
upload_service_->UploadEvent();
}
- EXPECT_TRUE(upload_service_->staged_log_);
+ EXPECT_TRUE(upload_service_->HasStagedLog());
upload_service_->UploadEvent();
- EXPECT_FALSE(upload_service_->staged_log_);
+ EXPECT_FALSE(upload_service_->HasStagedLog());
+
+ // Log a new sample. The failed upload counter should be reset.
+ upload_service_->AddSample(*Crash("user"));
+ for (int i = 0; i < UploadService::kMaxFailedUpload; i++) {
+ upload_service_->UploadEvent();
+ }
+ // The log is not discarded after multiple failed uploads.
+ EXPECT_TRUE(upload_service_->HasStagedLog());
}
TEST_F(UploadServiceTest, EmptyLogsAreNotSent) {