diff options
-rw-r--r-- | src/base/statistics.cc | 83 | ||||
-rw-r--r-- | src/base/statistics.hh | 163 | ||||
-rw-r--r-- | src/base/stats/info.hh | 2 | ||||
-rw-r--r-- | src/base/stats/text.cc | 12 | ||||
-rw-r--r-- | src/unittest/stattest.cc | 112 |
5 files changed, 365 insertions, 7 deletions
diff --git a/src/base/statistics.cc b/src/base/statistics.cc index 1e108298a..95402a221 100644 --- a/src/base/statistics.cc +++ b/src/base/statistics.cc @@ -236,6 +236,89 @@ Vector2dInfo::enable() y_subnames.resize(y); } +void +HistStor::grow_out() +{ + int size = cvec.size(); + int zero = size / 2; // round down! + int top_half = zero + (size - zero + 1) / 2; // round up! + int bottom_half = (size - zero) / 2; // round down! + + // grow down + int low_pair = zero - 1; + for (int i = zero - 1; i >= bottom_half; i--) { + cvec[i] = cvec[low_pair]; + if (low_pair - 1 >= 0) + cvec[i] += cvec[low_pair - 1]; + low_pair -= 2; + } + assert(low_pair == 0 || low_pair == -1 || low_pair == -2); + + for (int i = bottom_half - 1; i >= 0; i--) + cvec[i] = Counter(); + + // grow up + int high_pair = zero; + for (int i = zero; i < top_half; i++) { + cvec[i] = cvec[high_pair]; + if (high_pair + 1 < size) + cvec[i] += cvec[high_pair + 1]; + high_pair += 2; + } + assert(high_pair == size || high_pair == size + 1); + + for (int i = top_half; i < size; i++) + cvec[i] = Counter(); + + max_bucket *= 2; + min_bucket *= 2; + bucket_size *= 2; +} + +void +HistStor::grow_convert() +{ + int size = cvec.size(); + int half = (size + 1) / 2; // round up! + //bool even = (size & 1) == 0; + + int pair = size - 1; + for (int i = size - 1; i >= half; --i) { + cvec[i] = cvec[pair]; + if (pair - 1 >= 0) + cvec[i] += cvec[pair - 1]; + pair -= 2; + } + + for (int i = half - 1; i >= 0; i--) + cvec[i] = Counter(); + + min_bucket = -max_bucket;// - (even ? bucket_size : 0); + bucket_size *= 2; +} + +void +HistStor::grow_up() +{ + int size = cvec.size(); + int half = (size + 1) / 2; // round up! + + int pair = 0; + for (int i = 0; i < half; i++) { + cvec[i] = cvec[pair]; + if (pair + 1 < size) + cvec[i] += cvec[pair + 1]; + pair += 2; + } + assert(pair == size || pair == size + 1); + + for (int i = half; i < size; i++) + cvec[i] = Counter(); + + max_bucket *= 2; + bucket_size *= 2; +} + Formula::Formula() { } diff --git a/src/base/statistics.hh b/src/base/statistics.hh index 529871dc4..579a7908e 100644 --- a/src/base/statistics.hh +++ b/src/base/statistics.hh @@ -1435,6 +1435,146 @@ class DistStor }; /** + * Templatized storage and interface for a histogram stat. + */ +class HistStor +{ + public: + /** The parameters for a distribution stat. */ + struct Params : public DistParams + { + /** The number of buckets.. */ + size_type buckets; + + Params() : DistParams(Hist) {} + }; + + private: + /** The minimum value to track. */ + Counter min_bucket; + /** The maximum value to track. */ + Counter max_bucket; + /** The number of entries in each bucket. */ + Counter bucket_size; + + /** The current sum. */ + Counter sum; + /** The sum of squares. */ + Counter squares; + /** The number of samples. */ + Counter samples; + /** Counter for each bucket. */ + VCounter cvec; + + public: + HistStor(Info *info) + : cvec(safe_cast<const Params *>(info->storageParams)->buckets) + { + reset(info); + } + + void grow_up(); + void grow_out(); + void grow_convert(); + + /** + * Add a value to the distribution for the given number of times. + * @param val The value to add. + * @param number The number of times to add the value. + */ + void + sample(Counter val, int number) + { + assert(min_bucket < max_bucket); + if (val < min_bucket) { + if (min_bucket == 0) + grow_convert(); + + while (val < min_bucket) + grow_out(); + } else if (val >= max_bucket + bucket_size) { + if (min_bucket == 0) { + while (val >= max_bucket + bucket_size) + grow_up(); + } else { + while (val >= max_bucket + bucket_size) + grow_out(); + } + } + + size_type index = + (int64_t)std::floor((val - min_bucket) / bucket_size); + + assert(index >= 0 && index < size()); + cvec[index] += number; + + sum += val * number; + squares += val * val * number; + samples += number; + } + + /** + * Return the number of buckets in this distribution. + * @return the number of buckets. + */ + size_type size() const { return cvec.size(); } + + /** + * Returns true if any calls to sample have been made. + * @return True if any values have been sampled. + */ + bool + zero() const + { + return samples == Counter(); + } + + void + prepare(Info *info, DistData &data) + { + const Params *params = safe_cast<const Params *>(info->storageParams); + + assert(params->type == Hist); + data.type = params->type; + data.min = min_bucket; + data.max = max_bucket + bucket_size - 1; + data.bucket_size = bucket_size; + + data.min_val = min_bucket; + data.max_val = max_bucket; + + int buckets = params->buckets; + data.cvec.resize(buckets); + for (off_type i = 0; i < buckets; ++i) + data.cvec[i] = cvec[i]; + + data.sum = sum; + data.squares = squares; + data.samples = samples; + } + + /** + * Reset stat value to default + */ + void + reset(Info *info) + { + const Params *params = safe_cast<const Params *>(info->storageParams); + min_bucket = 0; + max_bucket = params->buckets - 1; + bucket_size = 1; + + size_type size = cvec.size(); + for (off_type i = 0; i < size; ++i) + cvec[i] = Counter(); + + sum = Counter(); + squares = Counter(); + samples = Counter(); + } +}; + +/** * Templatized storage and interface for a distribution that calculates mean * and variance. */ @@ -2294,6 +2434,29 @@ class Distribution : public DistBase<Distribution, DistStor> }; /** + * A simple histogram stat. + * @sa Stat, DistBase, HistStor + */ +class Histogram : public DistBase<Histogram, HistStor> +{ + public: + /** + * Set the parameters of this histogram. @sa HistStor::Params + * @param size The number of buckets in the histogram + * @return A reference to this histogram. + */ + Histogram & + init(size_type size) + { + HistStor::Params *params = new HistStor::Params; + params->buckets = size; + this->setParams(params); + this->doInit(); + return this->self(); + } +}; + +/** * Calculates the mean and variance of all the samples. * @sa DistBase, SampleStor */ diff --git a/src/base/stats/info.hh b/src/base/stats/info.hh index 421ed4a55..b1c05eed1 100644 --- a/src/base/stats/info.hh +++ b/src/base/stats/info.hh @@ -164,7 +164,7 @@ class VectorInfo : public Info virtual Result total() const = 0; }; -enum DistType { Deviation, Dist }; +enum DistType { Deviation, Dist, Hist }; struct DistData { diff --git a/src/base/stats/text.cc b/src/base/stats/text.cc index 576f7e5d4..385b92a1a 100644 --- a/src/base/stats/text.cc +++ b/src/base/stats/text.cc @@ -377,11 +377,11 @@ DistPrint::operator()(ostream &stream) const size_t size = data.cvec.size(); Result total = 0.0; - if (data.underflow != NAN) + if (data.type == Dist && data.underflow != NAN) total += data.underflow; for (off_type i = 0; i < size; ++i) total += data.cvec[i]; - if (data.overflow != NAN) + if (data.type == Dist && data.overflow != NAN) total += data.overflow; if (total) { @@ -389,7 +389,7 @@ DistPrint::operator()(ostream &stream) const print.cdf = 0.0; } - if (data.underflow != NAN) { + if (data.type == Dist && data.underflow != NAN) { print.name = base + "underflows"; print.update(data.underflow, total); print(stream); @@ -410,7 +410,7 @@ DistPrint::operator()(ostream &stream) const print(stream); } - if (data.overflow != NAN) { + if (data.type == Dist && data.overflow != NAN) { print.name = base + "overflows"; print.update(data.overflow, total); print(stream); @@ -419,13 +419,13 @@ DistPrint::operator()(ostream &stream) const print.pdf = NAN; print.cdf = NAN; - if (data.min_val != NAN) { + if (data.type == Dist && data.min_val != NAN) { print.name = base + "min_value"; print.value = data.min_val; print(stream); } - if (data.max_val != NAN) { + if (data.type == Dist && data.max_val != NAN) { print.name = base + "max_value"; print.value = data.max_val; print(stream); diff --git a/src/unittest/stattest.cc b/src/unittest/stattest.cc index b676ed9bd..529511c71 100644 --- a/src/unittest/stattest.cc +++ b/src/unittest/stattest.cc @@ -134,6 +134,18 @@ main(int argc, char *argv[]) Vector2d s16; Value s17; Value s18; + Histogram h01; + Histogram h02; + Histogram h03; + Histogram h04; + Histogram h05; + Histogram h06; + Histogram h07; + Histogram h08; + Histogram h09; + Histogram h10; + Histogram h11; + Histogram h12; Formula f1; Formula f2; @@ -266,6 +278,77 @@ main(int argc, char *argv[]) .desc("this is stat 18") ; + h01 + .init(11) + .name("Histogram01") + .desc("this is histogram 1") + ; + + h02 + .init(10) + .name("Histogram02") + .desc("this is histogram 2") + ; + + h03 + .init(11) + .name("Histogram03") + .desc("this is histogram 3") + ; + + h04 + .init(10) + .name("Histogram04") + .desc("this is histogram 4") + ; + + h05 + .init(11) + .name("Histogram05") + .desc("this is histogram 5") + ; + + h06 + .init(10) + .name("Histogram06") + .desc("this is histogram 6") + ; + + h07 + .init(11) + .name("Histogram07") + .desc("this is histogram 7") + ; + + h08 + .init(10) + .name("Histogram08") + .desc("this is histogram 8") + ; + + h09 + .init(11) + .name("Histogram09") + .desc("this is histogram 9") + ; + + h10 + .init(10) + .name("Histogram10") + .desc("this is histogram 10") + ; + + h11 + .init(11) + .name("Histogram11") + .desc("this is histogram 11") + ; + + h12 + .init(10) + .name("Histogram12") + .desc("this is histogram 12") + ; f1 .name("Formula1") @@ -544,6 +627,35 @@ main(int argc, char *argv[]) s6.sample(102); s12.sample(100); + for (int i = 0; i < 100; i++) { + h01.sample(i); + h02.sample(i); + } + + for (int i = -100; i < 100; i++) { + h03.sample(i); + h04.sample(i); + } + + for (int i = -100; i < 1000; i++) { + h05.sample(i); + h06.sample(i); + } + + for (int i = 100; i >= -1000; i--) { + h07.sample(i); + h08.sample(i); + } + + for (int i = 0; i <= 1023; i++) { + h09.sample(i); + h10.sample(i); + } + + for (int i = -1024; i <= 1023; i++) { + h11.sample(i); + h12.sample(i); + } prepare(); |