summaryrefslogtreecommitdiffstats
path: root/runtime/base/histogram.h
blob: 7544a9c91847c0b96685a531f80b3743f0ea8a39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
 * Copyright (C) 2013 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef ART_RUNTIME_BASE_HISTOGRAM_H_
#define ART_RUNTIME_BASE_HISTOGRAM_H_

#include <string>
#include <vector>

#include <android-base/macros.h>

namespace art {

// Creates a data histogram  for a better understanding of statistical data.
// Histogram analysis goes beyond simple mean and standard deviation to provide
// percentiles values, describing where the $% of the input data lies.
// Designed to be simple and used with timing logger in art.

template <class Value> class Histogram {
  const double kAdjust;
  const size_t kInitialBucketCount;

 public:
  class CumulativeData {
    friend class Histogram<Value>;
    std::vector<uint64_t> freq_;
    std::vector<double> perc_;
  };

  // Used by the cumulative timing logger to search the histogram set using for an existing split
  // with the same name using CumulativeLogger::HistogramComparator.
  explicit Histogram(const char* name);
  // This is the expected constructor when creating new Histograms.
  Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
  void AddValue(Value);
  void AdjustAndAddValue(Value);  // Add a value after dividing it by kAdjust.
  // Builds the cumulative distribution function from the frequency data.
  // Accumulative summation of frequencies.
  // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
  // Accumulative summation of percentiles; which is the frequency / SampleSize
  // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
  void CreateHistogram(CumulativeData* data) const;
  // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
  void Reset();
  double Mean() const;
  double Variance() const;
  double Percentile(double per, const CumulativeData& data) const;
  void PrintConfidenceIntervals(std::ostream& os, double interval,
                                const CumulativeData& data) const;
  void PrintMemoryUse(std::ostream& os) const;
  void PrintBins(std::ostream& os, const CumulativeData& data) const;
  void DumpBins(std::ostream& os) const;
  Value GetRange(size_t bucket_idx) const;
  size_t GetBucketCount() const;

  uint64_t SampleSize() const {
    return sample_size_;
  }

  Value Sum() const {
    return sum_;
  }

  Value AdjustedSum() const {
    return sum_ * kAdjust;
  }

  Value Min() const {
    return min_value_added_;
  }

  Value Max() const {
    return max_value_added_;
  }

  Value BucketWidth() const {
    return bucket_width_;
  }

  const std::string& Name() const {
    return name_;
  }

 private:
  void Initialize();
  size_t FindBucket(Value val) const;
  void BucketiseValue(Value val);
  // Add more buckets to the histogram to fill in a new value that exceeded
  // the max_read_value_.
  void GrowBuckets(Value val);
  std::string name_;
  // Maximum number of buckets.
  const size_t max_buckets_;
  // Number of samples placed in histogram.
  size_t sample_size_;
  // Width of the bucket range. The lower the value is the more accurate
  // histogram percentiles are. Grows adaptively when we hit max buckets.
  Value bucket_width_;
  // How many occurrences of values fall within a bucket at index i where i covers the range
  // starting at  min_ + i * bucket_width_ with size bucket_size_.
  std::vector<uint32_t> frequency_;
  // Summation of all the elements inputed by the user.
  Value sum_;
  // Minimum value that can fit in the histogram. Fixed to zero for now.
  Value min_;
  // Maximum value that can fit in the histogram, grows adaptively.
  Value max_;
  // Summation of the values entered. Used to calculate variance.
  Value sum_of_squares_;
  // Maximum value entered in the histogram.
  Value min_value_added_;
  // Minimum value entered in the histogram.
  Value max_value_added_;

  DISALLOW_COPY_AND_ASSIGN(Histogram);
};
}  // namespace art

#endif  // ART_RUNTIME_BASE_HISTOGRAM_H_