CFEL - ASG Software Suite  2.5.0
CASS
statistics_calculator.hpp
Go to the documentation of this file.
1 //Copyright (C) 2013 Lutz Foucar
2 
3 /**
4  * @file statistics_calculator.hpp contains declarations of statistic calculators
5  *
6  * @author Lutz Foucar
7  */
8 
9 #ifndef _STATISTICS_CALCULATOR_H_
10 #define _STATISTICS_CALCULATOR_H_
11 
12 #include <vector>
13 #include <iterator>
14 #include <algorithm>
15 
16 namespace cass
17 {
18 /** statistics calculator for a cummulative statistic
19  *
20  * This class is based on Knuths algorithm
21  *
22  * Donald E. Knuth (1998).
23  * The Art of Computer Programming,
24  * volume 2: Seminumerical Algorithms,
25  * 3rd edn., p. 232.
26  * Boston: Addison-Wesley.
27  *
28  * @tparam type of the values for the average, defines the precision
29  *
30  * @author Lutz Foucar
31  */
32 template <typename Type>
34 {
35 public:
36  /** define the value type */
37  typedef Type value_type;
38 
39  /** define the type of the counter */
40  typedef size_t counter_type;
41 
42  /** default constructor
43  *
44  * resets the values.
45  */
47  {
48  reset();
49  }
50 
51  /** add a datum to the distribution
52  *
53  * @param datum The datum to be added
54  */
55  void addDatum(const value_type &datum)
56  {
57  ++_N;
58  const value_type delta(datum - _mean);
59  _mean += (delta / static_cast<value_type>(_N));
60  _tmp += (delta * (datum - _mean));
61  _sum += datum;
62  }
63 
64  /** add a number of dati to the distribution
65  *
66  * Adds all elements between first and last to the distribution, but not the
67  * last
68  *
69  * @param first The first in the range to be added
70  * @param last The element after last to be added
71  */
72  template <class InputIterator>
73  void addDistribution(InputIterator first, InputIterator last)
74  {
75  while (first != last)
76  addDatum(*first++);
77  }
78 
79 
80  /** retrieve the mean of the distribution
81  *
82  * @return mean of the distribution
83  */
84  value_type mean() const
85  {
86  return _mean;
87  }
88 
89  /** retrieve the variance of the distribution
90  *
91  * @return variance of the distribution
92  */
93  value_type variance() const
94  {
95  return (_tmp/static_cast<value_type>(_N - 1));
96  }
97 
98  /** retrieve the standart deviation of the distribution
99  *
100  * @return standart deviation of the distribution
101  */
102  value_type stdv() const
103  {
104  return sqrt(variance());
105  }
106 
107  /** retrieve the sum of all dati that have been added
108  *
109  * @return sum of all dati
110  */
111  value_type sum() const
112  {
113  return _sum;
114  }
115 
116  /** retrieve the number of datum that have been added
117  *
118  * @return counts
119  */
120  counter_type count() const
121  {
122  return _N;
123  }
124 
125  /** reset the statistics */
126  void reset()
127  {
128  _mean = _tmp = _N = _sum = 0.;
129  }
130 
131 private:
132  /** the sum of all dati */
133  value_type _sum;
134 
135  /** the current mean value */
136  value_type _mean;
137 
138  /** the current intermediate value that one calcs the stdv from */
139  value_type _tmp;
140 
141  /** counter to see how many values have been added to the statistics */
142  counter_type _N;
143 };
144 
145 
146 
147 /** statistics calculator for a cummulative statistic, removes outliers
148  *
149  * class uses the CummulativeStatisticsCalculator to calculate the mean and
150  * stdv, but removes the outliers from the statistics.
151  *
152  * @tparam type of the values for the average, defines the precision
153  *
154  * @author Lutz Foucar
155  */
156 template <typename Type>
158 {
159 public:
160  /** define the value type */
161  typedef Type value_type;
162 
163  /** define the statistics */
165 
166  /** define the type of container used for the values */
167  typedef std::vector<value_type> container_type;
168 
169  /** define a const iterator of the container */
170  typedef typename container_type::iterator iterator_t;
171 
172  /** define number of elements */
173  typedef typename std::iterator_traits<iterator_t>::difference_type count_type;
174 
175  /** constructor sets the signal to noise ratio
176  *
177  * @param snr the signal to noise ratio to detect outliers
178  */
179  CummulativeStatisticsNoOutlier(const value_type &snr)
180  : _snr(snr)
181  {}
182 
183  /** add a datum to the container
184  *
185  * @param datum The datum to be added
186  */
187  void addDatum(const value_type & datum)
188  {
189  _container.push_back(datum);
190  _containerChanged=true;
191  }
192 
193  /** add a number of dati to the distribution
194  *
195  * Adds all elements between first and last to the distribution, but not the
196  * last
197  *
198  * @param first The first in the range to be added
199  * @param last The element after last to be added
200  */
201  template <class InputIterator>
202  void addDistribution(InputIterator first, InputIterator last)
203  {
204  while (first != last)
205  addDatum(*first++);
206  }
207 
208  /** substitute the internal container with an outside one
209  *
210  * @param distribution The external container
211  */
212  void setDistribution(const container_type & distribution)
213  {
215  _containerChanged = true;
216  }
217 
218  /** retrieve the mean of the distribution without outliers
219  *
220  * @return mean of the distribution
221  */
222  value_type mean()
223  {
224  updateStat();
225  return _mean;
226  }
227 
228  /** retrieve the standart deviation of the distribution
229  *
230  * @return standart deviation of the distribution
231  */
232  value_type stdv()
233  {
234  updateStat();
235  return _stdv;
236  }
237 
238  /** retrieve the number of points used in the statistics
239  *
240  * @return number of points used in the statistics
241  */
242  count_type nbrPointsUsed()
243  {
244  updateStat();
245  return _nPoints;
246  }
247 
248  /** retrieve the number of outliers higher than the distribution used
249  *
250  * @return number of outliers higher than the distribution used
251  */
252  count_type nbrUpperOutliers()
253  {
254  updateStat();
255  return _nUpperOutliers;
256  }
257 
258  /** retrieve the number of outliers lower than the distribution used
259  *
260  * @return number of outliers lower than the distribution used
261  */
262  count_type nbrLowerOutliers()
263  {
264  updateStat();
265  return _nLowerOutliers;
266  }
267 
268  /** retrieve the total number of outliers
269  *
270  * @return total number of outliers
271  */
272  count_type nbrOutliers()
273  {
274  return nbrLowerOutliers() + nbrUpperOutliers();
275  }
276 
277  /** reset the container */
278  void reset()
279  {
280  _container.clear();
281  _containerChanged = true;
282  }
283 
284 private:
285  /** update the statistic values
286  *
287  * first calculate the mean and stdv of the container. Then remove
288  * outliers and reacalculate the mean and stdv. Do this until no outliers are
289  * present in the remaining distribution.
290  *
291  * @return mean without outliers of the distribution
292  */
293  void updateStat()
294  {
295  using namespace std;
296 
297 
298  if (!_containerChanged)
299  return;
300 
301  container_type c(_container);
302  sort(c.begin(),c.end());
303  iterator_t lowPos(c.begin());
304  iterator_t upPos(c.end());
305 
306  bool outliersdetected(false);
307  do
308  {
309  _stat.reset();
310  _stat.addDistribution(lowPos,upPos);
311 
312  const value_type lowBound(_stat.mean() - _snr * _stat.stdv());
313  const value_type upBound(_stat.mean() + _snr * _stat.stdv());
314  iterator_t newLowPos(lower_bound(c.begin(), c.end(), lowBound));
315  iterator_t newUpPos(upper_bound (c.begin(), c.end(), upBound));
316 
317  /** outliers have been detected when the low and up iterators have changed */
318  outliersdetected = ( newLowPos != lowPos || newUpPos != upPos);
319 
320  lowPos = newLowPos;
321  upPos = newUpPos;
322  }
323  while (outliersdetected);
324 
325  _mean = _stat.mean();
326  _stdv = _stat.stdv();
327  _nPoints = distance(lowPos,upPos);
328  _nLowerOutliers = distance(c.begin(),lowPos);
329  _nUpperOutliers = distance(upPos,c.end());
330 
331  _containerChanged = false;
332  }
333 
334  /** the statistics calculator */
335  statistics_t _stat;
336 
337  /** container to store the values in */
338  container_type _container;
339 
340  /** the mean without outliers */
341  Type _mean;
342 
343  /** the stdv without outliers */
344  Type _stdv;
345 
346  /** the number of points included in the statistics calculation */
347  count_type _nPoints;
348 
349  /** the number of upper outliers */
350  count_type _nUpperOutliers;
351 
352  /** the number of lower outliers */
353  count_type _nLowerOutliers;
354 
355  /** flag to show whether the statistic values have to be updated */
357 
358  /** the signal to noise ratio that will detectect outliers */
359  Type _snr;
360 };
361 
362 
363 /** statistics calculator for a exponential moving statistics
364  *
365  * The algorithms used by this class are based on a discussion found here:
366  * jttp://mathforum.org/kb/message.jspa?messageID=1637905
367  *
368  * @tparam type of the values for the average, defines the precision
369  *
370  * @author Lutz Foucar
371  */
372 template <typename Type>
374 {
375 public:
376  /** define the value type */
377  typedef Type value_type;
378 
379  /** constructor
380  *
381  * @param nAverages the last how many datums should have highest contribution
382  * to this.
383  */
384  MovingStatisticsCalculator(unsigned int nAverages=200)
385  {
386  nbrAverages(nAverages);
387  reset();
388  }
389 
390  /** add a datum to the distribution
391  *
392  * @param datum The datum to be added
393  */
394  void addDatum(const value_type &datum)
395  {
396  _mean = (1.f - _alpha)*mean + _alpha*datum;
397  _stdv = std::sqrt(_alpha*(datum - _mean)*(datum - _mean) + (1.f - _alpha)*_stdv*_stdv);
398  }
399 
400  /** retrieve the mean of the distribution
401  *
402  * @return mean of the distribution
403  */
404  value_type mean() const
405  {
406  return _mean;
407  }
408 
409  /** retrieve the variance of the distribution
410  *
411  * @return variance of the distribution
412  */
413  value_type variance() const
414  {
415  return (_stdv*_stdv);
416  }
417 
418  /** retrieve the standart deviation of the distribution
419  *
420  * @return standart deviation of the distribution
421  */
422  value_type stdv() const
423  {
424  return stdv;
425  }
426 
427  /** reset the statistics */
428  void reset()
429  {
430  _mean = _stdv = 0.;
431  _firstdatum = true;
432  }
433 
434  /** set the nbr of averages
435  *
436  * convert the nbr of averages to the alpha for the statistic calculation
437  *
438  * @param nAverages the last how many datums should have highest contribution
439  * to this.
440  */
441  void nbrAverages(unsigned int nAverages)
442  {
443  _alpha = (2./(1.+ static_cast<value_type>(nAverages)));
444  }
445 
446 private:
447  /** the current mean value */
448  value_type _mean;
449 
450  /** the stdv value */
451  value_type _stdv;
452 
453  /** how much should the current datum be weighted */
454  value_type _alpha;
455 
456  /** flag to see whether first datum is added */
458 };
459 
460 
461 /** statistics calculator for a median
462  *
463  * adds the datums to an internal vector, which is sorted using nth_element
464  * when getting the median.
465  *
466  * @tparam type of the values for the average, defines the precision
467  *
468  * @author Lutz Foucar
469  */
470 template <typename Type>
472 {
473 public:
474  /** define the value type */
475  typedef Type value_type;
476  typedef std::vector<value_type> container_type;
477 
478  /** default constructor
479  *
480  * resets the values.
481  */
483  {
484  reset();
485  }
486 
487  /** add a datum to the distribution
488  *
489  * @param datum The datum to be added
490  */
491  void addDatum(const value_type &datum)
492  {
493  _container.push_back(datum);
494  }
495 
496  /** add a number of dati to the distribution
497  *
498  * Adds all elements between first and last to the distribution, but not the
499  * last
500  *
501  * @param first The first in the range to be added
502  * @param last The element after last to be added
503  */
504  template <class InputIterator>
505  void addDistribution(InputIterator first, InputIterator last)
506  {
507  while (first != last)
508  addDatum(*first++);
509  }
510 
511  /** retrieve the median of the distribution
512  *
513  * uses median(const container_type &container) to calc the median
514  *
515  * @return median of the distribution
516  */
517  value_type median() const
518  {
519  return median(_container);
520  }
521 
522  /** reset the statistics */
523  void reset()
524  {
525  _container.clear();
526  }
527 
528  /** calculate the mean of a container_type container
529  *
530  * a static function so that one can use it without having an object of this
531  * class.
532  *
533  * copy the container, sort the copied container and retrieve the central
534  * element
535  *
536  * @return the median of the values inside the container
537  * @param container the container from whos values the median should be
538  * calculated
539  */
540  static value_type median(const container_type &container)
541  {
542  container_type cc(container);
543  const size_t medianpos(0.5*cc.size());
544  std::nth_element(cc.begin(), cc.begin() + medianpos, cc.end());
545  const value_type medianval(cc[medianpos]);
546  return medianval;
547  }
548 
549 private:
550  /** the current mean value */
551  container_type _container;
552 };
553 
554 }//end namespace cass
555 #endif
size_t counter_type
define the type of the counter
void addDatum(const value_type &datum)
add a datum to the distribution
value_type _tmp
the current intermediate value that one calcs the stdv from
counter_type _N
counter to see how many values have been added to the statistics
void updateStat()
update the statistic values
Type _snr
the signal to noise ratio that will detectect outliers
counter_type count() const
retrieve the number of datum that have been added
count_type nbrUpperOutliers()
retrieve the number of outliers higher than the distribution used
void reset()
reset the statistics
value_type stdv() const
retrieve the standart deviation of the distribution
value_type mean()
retrieve the mean of the distribution without outliers
statistics calculator for a cummulative statistic, removes outliers
bool _firstdatum
flag to see whether first datum is added
count_type _nLowerOutliers
the number of lower outliers
statistics calculator for a cummulative statistic
STL namespace.
MedianCalculator()
default constructor
statistics calculator for a median
container_type _container
the current mean value
value_type mean() const
retrieve the mean of the distribution
void addDatum(const value_type &datum)
add a datum to the distribution
statistics calculator for a exponential moving statistics
count_type nbrPointsUsed()
retrieve the number of points used in the statistics
value_type mean() const
retrieve the mean of the distribution
value_type _mean
the current mean value
value_type variance() const
retrieve the variance of the distribution
count_type _nPoints
the number of points included in the statistics calculation
value_type _mean
the current mean value
bool _containerChanged
flag to show whether the statistic values have to be updated
CummulativeStatisticsNoOutlier(const value_type &snr)
constructor sets the signal to noise ratio
void addDistribution(InputIterator first, InputIterator last)
add a number of dati to the distribution
void addDatum(const value_type &datum)
add a datum to the distribution
value_type sum() const
retrieve the sum of all dati that have been added
void nbrAverages(unsigned int nAverages)
set the nbr of averages
count_type _nUpperOutliers
the number of upper outliers
CummulativeStatisticsCalculator< value_type > statistics_t
define the statistics
void setDistribution(const container_type &distribution)
substitute the internal container with an outside one
std::vector< value_type > container_type
void addDatum(const value_type &datum)
add a datum to the container
container_type::iterator iterator_t
define a const iterator of the container
Type _stdv
the stdv without outliers
void addDistribution(InputIterator first, InputIterator last)
add a number of dati to the distribution
static value_type median(const container_type &container)
calculate the mean of a container_type container
value_type median() const
retrieve the median of the distribution
count_type nbrLowerOutliers()
retrieve the number of outliers lower than the distribution used
Type value_type
define the value type
void addDistribution(InputIterator first, InputIterator last)
add a number of dati to the distribution
Type _mean
the mean without outliers
value_type stdv()
retrieve the standart deviation of the distribution
statistics_t _stat
the statistics calculator
std::vector< value_type > container_type
define the type of container used for the values
value_type stdv() const
retrieve the standart deviation of the distribution
value_type variance() const
retrieve the variance of the distribution
Intensity distribution
MovingStatisticsCalculator(unsigned int nAverages=200)
constructor
Type value_type
define the value type
std::iterator_traits< iterator_t >::difference_type count_type
define number of elements
count_type nbrOutliers()
retrieve the total number of outliers
container_type _container
container to store the values in
value_type _alpha
how much should the current datum be weighted