Bootstrapper.cc
Go to the documentation of this file.
1 
10 #include "Bootstrapper.h"
11 
12 #include <utl/TabulatedFunctionErrors.h>
13 #include <utl/MultiTabulatedFunctionErrors.h>
14 #include <utl/ErrorLogger.h>
15 #include <utl/AugerException.h>
16 
17 #undef NDEBUG
18 //#define NDEBUG
19 #include <assert.h>
20 #include <cmath>
21 #include <string>
22 #include <sstream>
23 #include <iostream>
24 #include <vector>
25 #include <algorithm>
26 
27 using namespace FdLightCollectionEfficiencyKG;
28 using namespace utl;
29 using namespace std;
30 
31 
32 /*************************************************************************/
33 Bootstrapper::Bootstrapper(const std::list<utl::TabulatedFunctionErrors>& data, const int verbosity) :
34  fData(data),
35  fVerbosity(verbosity)
36 {
37  Bootstrap();
38 }
39 
40 /*************************************************************************/
41 void
43 {
44  const unsigned int nSamples = fData.size();
45  assert(nSamples);
46 
47  // for each bin i, calculate the uncertainty on the mean
48 
49  // one vector of efficiencies for each bin i
50  vector<vector<double> > efficiencies;
51  vector<vector<double> > efficiencyUncertainties;
52  const unsigned int nBins = fData.begin()->GetNPoints();
53  efficiencies.resize(nBins);
54  efficiencyUncertainties.resize(nBins);
55 
56  for (unsigned int i = 0; i < nBins; ++i) {
57  efficiencies[i].reserve(nSamples);
58  efficiencyUncertainties[i].reserve(nSamples);
59  }
60 
61  vector<double> xvalues;
62  xvalues.resize(nBins, 0.);
63 
64  // iterate over the efficiency functions
65  for (std::list<utl::TabulatedFunctionErrors>::const_iterator iter = fData.begin();
66  iter != fData.end(); ++iter)
67  {
68  const utl::TabulatedFunctionErrors& lceff = *iter;
69  assert(lceff.GetNPoints() > 0);
70 
71  for (unsigned int i = 0; i < lceff.GetNPoints(); ++i) {
72  const double eff = lceff.GetY(i);
73  const double effUnc = lceff.GetYErr(i);
74  assert(isfinite(eff));
75  efficiencies[i].push_back(eff);
76  efficiencyUncertainties[i].push_back(effUnc);
77  xvalues[i] = lceff.GetX(i); // FIXME what a waste
78  }
79  } // end foreach efficiency function
80 
81  fMean.Clear();
82  for (unsigned int i = 0; i < efficiencies.size(); ++i) {
83  if (efficiencies[i].size() != 0) {
84  //pair<double, double> meanWUncert = MeanWithUncertainty(efficiencies[i], efficiencyUncertainties[i], eSampleVariance);
85  //pair<double, double> meanWUncert = MeanWithUncertainty(efficiencies[i], efficiencyUncertainties[i], eMedianAbsoluteDeviation);
86  pair<double, double> meanWUncert = MeanWithUncertainty(efficiencies[i], efficiencyUncertainties[i], eSampleVarianceWeighted);
87 
88  assert(isfinite(meanWUncert.first));
89  assert(isfinite(meanWUncert.second));
90  //cout << meanWUncert.first << " " << meanWUncert.second << endl;
91  fMean.PushBack( xvalues[i], 0., meanWUncert.first, meanWUncert.second );
92  }
93  else
94  fMean.PushBack( xvalues[i], 0., 0., 1.e3 );
95  }
96 
97  ostringstream msg;
98  msg << "Bootstrapped using " << nSamples << " efficiencies";
99  INFO(msg);
100 }
101 
102 
103 /*************************************************************************/
104 bool
105 Bootstrapper::MaxRelUncertaintyBelowThreshold(const double relUncertaintyThreshold, const double absValueThreshold)
106 {
107  const utl::TabulatedFunctionErrors& lcEff = fMean;
108 
109  for (unsigned int i = 0; i < lcEff.GetNPoints(); ++i) {
110  const double val = lcEff.GetY(i);
111  const double unc = lcEff.GetYErr(i);
112  assert(isfinite(val));
113  assert(isfinite(unc));
114  //cout << val << " +/- " << unc << " " << unc/val*100. << endl;
115  if (val >= absValueThreshold && unc/val > relUncertaintyThreshold) {
116  if (fVerbosity >= 0) {
117  ostringstream msg;
118  msg << "Uncertainty still too large at"
119  << " index " << i << ", value " << unc/val*100. << "%";
120  INFO(msg);
121  }
122  return false;
123  }
124  }
125 
126  return true;
127 }
128 
129 
130 /*************************************************************************/
131 std::pair<double, double>
132 Bootstrapper::MeanWithUncertainty(const std::vector<double>& samples,
133  const std::vector<double>& uncertainties,
134  const VarianceEstimator est)
135 {
136  std::pair<double, double> retval; // mean, unc. of mean
137  if (est == eSampleVarianceWeighted) {
138  const double mean = WeightedMean(samples, uncertainties);
139  retval = std::make_pair( mean, sqrt( WeightedSampleVariance(samples, uncertainties, mean) / (double)samples.size() ) );
140  }
141  else {
142  const double mean = Mean(samples);
143  assert(isfinite(mean));
144  const double kMAD = 1.4826;
145 
146  switch (est) {
147  case eSampleVariance:
148  retval = std::make_pair( mean, sqrt( SampleVariance(samples, mean) / (double)samples.size() ) );
149  break;
151  retval = std::make_pair( mean, kMAD*MedianAbsoluteDeviation(samples, mean) / sqrt((double)samples.size()) );
152  break;
153  default:
154  throw utl::DoesNotComputeException("Bad variance estimator");
155  };
156  }
157 
158  assert(isfinite(retval.first));
159  assert(isfinite(retval.second));
160  return retval;
161 }
162 
163 /*************************************************************************/
164 double
165 Bootstrapper::Mean(const std::vector<double>& samples)
166 {
167  double sum = 0;
168  const unsigned int n = samples.size();
169  assert(n > 0);
170  for (unsigned int i = 0; i < n; ++i) {
171  assert(isfinite(samples[i]));
172  sum += samples[i];
173  }
174  return sum / (double)n;
175 }
176 
177 /*************************************************************************/
178 double
179 Bootstrapper::WeightedMean(const std::vector<double>& samples, const std::vector<double>& uncertainties)
180 {
181  const unsigned int n = samples.size();
182  assert(n > 0);
183  assert(uncertainties.size() == n);
184 
185  double sum = 0, sumW = 0;
186  for (unsigned int i = 0; i < n; ++i) {
187  assert(isfinite(samples[i]));
188  assert(isfinite(uncertainties[i]));
189  const double unc = uncertainties[i];
190  const double w = 1./(unc*unc);
191  sum += samples[i]*w;
192  sumW += w;
193  }
194  return sum / sumW;
195 }
196 
197 /*************************************************************************/
198 double
199 Bootstrapper::SampleVariance(const std::vector<double>& samples, const double mean)
200 {
201  double variance = 0.;
202  const unsigned int n = samples.size();
203  assert(n > 1);
204  for (unsigned int i = 0; i < n; i++) {
205  assert(isfinite(samples[i]));
206  variance += pow(samples[i]-mean, 2.);
207  }
208  return variance / ((double)n - 1.);
209 }
210 
211 /*************************************************************************/
212 double
213 Bootstrapper::WeightedSampleVariance(const std::vector<double>& samples,
214  const std::vector<double>& uncertainties,
215  const double mean)
216 {
217  const unsigned int n = samples.size();
218  assert(n > 1);
219  assert(uncertainties.size() == n);
220 
221  double sumW = 0., sumW2 = 0., wvariance = 0.;
222  for (unsigned int i = 0; i < n; ++i) {
223  const double unc = uncertainties[i];
224  assert(isfinite(samples[i]));
225  assert(isfinite(unc));
226  assert(unc > 0.);
227  const double w = 1./(unc*unc);
228  sumW += w;
229  sumW2 += w*w;
230  wvariance += w*pow(samples[i]-mean, 2.);
231  }
232  return (sumW / (sumW*sumW - sumW2)) * wvariance;
233 }
234 
235 /*************************************************************************/
236 double
237 Bootstrapper::Median(const std::vector<double>& sortedSamples)
238 {
239  const double medianElem = ((double)sortedSamples.size()+1.)/2.;
240  const unsigned int idx = (unsigned int)medianElem;
241  double median;
242  if ( (medianElem-(double)idx > 0.1) )
243  median = 0.5 * (sortedSamples[idx] + sortedSamples[idx+1]);
244  else
245  median = sortedSamples[idx];
246 
247  assert(isfinite(median));
248  return median;
249 }
250 
251 /*************************************************************************/
252 double
253 Bootstrapper::MedianAbsoluteDeviation(const std::vector<double>& samples, const double /*mean*/)
254 {
255  // MAD=median_i(|X_i-median_j(X_j)|)
256  vector<double> sortedSamples = samples; // Note: Consider the non-const case as an optimization?
257  const unsigned int nSamples = sortedSamples.size();
258 
259  std::sort(sortedSamples.begin(), sortedSamples.end());
260  const double sampleMedian = Median(sortedSamples);
261  assert(isfinite(sampleMedian));
262 
263  for (unsigned int i = 0; i < nSamples; ++i) {
264  assert(isfinite(sortedSamples[i]));
265  sortedSamples[i] = fabs(sortedSamples[i] - sampleMedian);
266  }
267 
268  std::sort(sortedSamples.begin(), sortedSamples.end());
269 
270  const double mad = Median(sortedSamples);
271  assert(isfinite(mad));
272  return mad;
273 }
274 
275 // Configure (x)emacs for this file ...
276 // Local Variables:
277 // mode: c++
278 // End:
unsigned int GetNPoints() const
double WeightedSampleVariance(const std::vector< double > &samples, const std::vector< double > &uncertainties, const double mean)
#define INFO(message)
Macro for logging informational messages.
Definition: ErrorLogger.h:161
double pow(const double x, const unsigned int i)
const double & GetYErr(const unsigned int idx) const
utl::TabulatedFunctionErrors fMean
Definition: Bootstrapper.h:67
double WeightedMean(const std::vector< double > &samples, const std::vector< double > &uncertainties)
Bootstrapper(const std::list< utl::TabulatedFunctionErrors > &data, const int verbosity=0)
Definition: Bootstrapper.cc:33
std::pair< double, double > MeanWithUncertainty(const std::vector< double > &samples, const std::vector< double > &uncertainties, const VarianceEstimator est=eSampleVariance)
void PushBack(const double x, const double xErr, const double y, const double yErr)
Base class for inconsistency/illogicality exceptions.
const double & GetY(const unsigned int idx) const
double MedianAbsoluteDeviation(const std::vector< double > &samples, const double mean)
double Median(const std::vector< double > &sortedSamples)
uint16_t * data
Definition: dump1090.h:228
const double & GetX(const unsigned int idx) const
bool MaxRelUncertaintyBelowThreshold(const double relUncertaintyThreshold, const double absValueThreshold=0.)
int fVerbosity
global verbosity flag
Definition: Bootstrapper.h:65
double Mean(const std::vector< double > &samples)
double SampleVariance(const std::vector< double > &samples, const double mean)
const std::list< utl::TabulatedFunctionErrors > & fData
Definition: Bootstrapper.h:63

, generated on Tue Sep 26 2023.