Huber estimator used for robust fitting (A.Kalweit)
[u/mrichter/AliRoot.git] / STAT / AliTMinuitToolkit.cxx
CommitLineData
4b23a517 1
2#include "AliTMinuitToolkit.h"
3#include <TNamed.h>
4#include <TVirtualFitter.h>
5#include <TH1F.h>
6#include <TH2F.h>
7#include <TF1.h>
8#include <TFormula.h>
dc697472 9#include <TVector.h>
10#include <TMatrix.h>
4b23a517 11#include <TMath.h>
12#include <TString.h>
13#include <TROOT.h>
14#include <TCanvas.h>
15#include <TRandom.h>
16
17
dc697472 18//--------------------------------------------------------------------------------------
4b23a517 19//
20// The AliTMinuitToolkit serves as an easy to use interface for the TMinuit
21// package:
22//
23// - It allows to fit a curve to one and two dimensional histograms
24// (TH2F::Fit() only allows to fit a hyperplane).
25// - Or n points can be specified directly via a n x 2 matrix.
dc697472 26// - An option for robust fitting with non-linear functions is implemented.
4b23a517 27//
28//
29// 1. Setting the formula:
30//
31// The formula is simply set via "void SetFitFunction(TFormula * formula)".
32//
33//
34// 2. Adding the data points
35//
36// - In order to fit a histogram, use "void FitHistogram(TH1F * his)" or
37// "void FitHistogram(TH2F * his)". The fitter is started automatically
38// - Alternatively, the direct specification of the points is possible via
39// "void SetPoints(TMatrixD * points)". Note, that the each point
40// corresponds to one row in the matrix. The fitter is then started with
dc697472 41// the command "void Fit()". The weight of each point can be specified
42// with an n-dimensional vector using "void SetWeights(TVectorD * weights)".
4b23a517 43//
44//
45// 3. Accessing the fit results
46//
47// The N parameters of the formula are stored in a N-dimensional vector which
dc697472 48// is returned by "TVectorD * GetParameters()". In a similar way the covariance
4b23a517 49// matrix of the fit is returned via "TMatrixD * GetCovarianceMatrix()" which
50// is of the type N x N.
51//
52//
53// 4. Non-linear robust fitting:
54//
55// Even a few outliers can lead to wrong results of a least-squares fitting
56// procedure. In this case the use of robust(resistant) methods can be
57// helpful, but a stronger dependence on starting values or convergence to
58// local minima can occur.
59//
dc697472 60// The robust option becomes active if EnableRobust(true, sigma) is called. It is
61// very much recommended that a normalization value (scale variable) corresponding
62// to an expected deviation (sigma) is specified via
63// "EnableRobust(Bool_t b, Double_t sigma)".
64//
65// Performing the fit without knowledge of sigma is also possible if only
66// "EnableRobust(true)" is activated, but this is NOT RECOMMENDED.
67//
68// The method is based on another estimator instead of chi^2. For small deviations
69// the function behaves like x^2 and for larger deviations like |x| - the so
70// called Huber estimator:
71//
72// h(x) = x^2 , for x < 2.5*sigma
73// h(x) = 2*(2.5*sigma)*x - (2.5*sigma)^2 , for x > 2.5*sigma
74//
75// If a weighting function is specified in addition, a second run with the ordinary
76// metric is started, but before entering the iteration every point is weighted
77// according to its distance to the outcoming function of the first run. The weighting
78// function w(x) must be defined on the intervall x in [0,1]. w(0) then
79// corresponds to the weight of the closest point and w(1) to the point with the
80// largest distance.
4b23a517 81//
82// Some standard weighting functions are predefined in
83// "SetWeightFunction(Char_t * name, Float_t param1, Float_t param2 = 0)":
84// - "BOX" equals to 1 if x < param1 and to 0 if x > param1.
85// - "EXPONENTIAL" corresponds to "Math::Exp(-TMath::Log(param1)*x)"
86// - "ERRORFUNCTION" corresponds to "TMath::Erfc((x-param1)/param2)"
87//
dc697472 88//
89// REFERENCE for non-linear robust fitting:
90// Ekblom H. and Madsen K. (1988), Alogrithms for non-linear Huber estimation,
91// BIT Numerical Mathematics 29 (1989) 60-76.
92// internet: http://www.springerlink.com/content/m277218542988344/
93//
94//
95// 5. examples:
96//
97// A small example illustrating the working principles of AliTMinuitToolkit is given
98// in the function "AliTMinuitToolkit::Test()".
99//
100//
101//
102// Comments and questions are always welcome: A.Kalweit@gsi.de
103//--------------------------------------------------------------------------------------
4b23a517 104
105
106ClassImp(AliTMinuitToolkit)
107
108AliTMinuitToolkit::AliTMinuitToolkit() :
109 TNamed(),
110 fFormula(0),
111 fWeightFunction(0),
112 fFitAlgorithm(0),
113 fPoints(0),
dc697472 114 fWeights(0),
4b23a517 115 fParam(0),
116 fParamLimits(0),
117 fCovar(0),
da7c9ae3 118 fChi2(0),
119 fMaxCalls(0),
dc697472 120 fPrecision(0),
121 fUseRobust(0),
122 fExpectedSigma(0)
4b23a517 123{
124 //
125 // standard constructor
126 //
127 fMaxCalls = 500;
128 fPrecision = 1;
dc697472 129 fUseRobust = false;
130 fExpectedSigma = 0;
4b23a517 131}
132
133
da7c9ae3 134AliTMinuitToolkit::AliTMinuitToolkit(const AliTMinuitToolkit&) :
135 TNamed(),
136 fFormula(0),
137 fWeightFunction(0),
138 fFitAlgorithm(0),
139 fPoints(0),
dc697472 140 fWeights(0),
da7c9ae3 141 fParam(0),
142 fParamLimits(0),
143 fCovar(0),
144 fChi2(0),
145 fMaxCalls(0),
dc697472 146 fPrecision(0),
147 fUseRobust(0),
148 fExpectedSigma(0)
da7c9ae3 149{
150
151
152}
153
154
155AliTMinuitToolkit& AliTMinuitToolkit::operator=(const AliTMinuitToolkit&) {
156
157 return *this;
158}
159
160
4b23a517 161
162AliTMinuitToolkit::~AliTMinuitToolkit(){
163 //
164 // destructor
165 //
166 delete fPoints;
dc697472 167 delete fWeights;
4b23a517 168 delete fWeightFunction;
169 delete fParamLimits;
170 delete fFormula;
171 delete fParam;
172 delete fCovar;
173 delete fChi2;
174}
175
176void AliTMinuitToolkit::FitHistogram(TH1F * his) {
177 //
178 // Fit a one dimensional histogram
179 //
180 fPoints = new TMatrixD(his->GetNbinsX(), 2);
181
182 for(Int_t ibin=0; ibin < his->GetNbinsX(); ibin++) {
183 Double_t x = his->GetXaxis()->GetBinCenter(ibin+1);
184 Double_t y = his->GetBinContent(ibin+1);
185
186 (*fPoints)(ibin, 0) = x;
187 (*fPoints)(ibin, 1) = y;
188 }
189
190 Fit();
191}
192
193
194void AliTMinuitToolkit::FitHistogram(TH2F * his) {
195 //
dc697472 196 // Fit a curve to a two dimensional histogram
4b23a517 197 //
198 fPoints = new TMatrixD((Long64_t)his->GetEntries(), 2);
199 Long64_t entry = 0;
200
201 for(Int_t ibin=0; ibin < his->GetNbinsX(); ibin++) {
202 Double_t x = his->GetXaxis()->GetBinCenter(ibin);
203 for(Int_t jbin=0; jbin < his->GetNbinsY(); jbin++) {
204 Long64_t n = his->GetBin(ibin, jbin);
205 Double_t y = his->GetYaxis()->GetBinCenter(jbin);
206 for(Int_t ientries=0; ientries < his->GetBinContent(n); ientries++) {
207 (*fPoints)(entry,0) = x;
208 (*fPoints)(entry,1) = y;
209 entry++;
210 }
211
212 }
213 }
214
215 Fit();
216}
217
218
219void AliTMinuitToolkit::SetWeightFunction(Char_t * name, Float_t param1, Float_t param2) {
220 //
221 // Set the weight function which must be defined on the interval [0,1].
222 //
223 TString FuncType(name);
224 FuncType.ToUpper();
225
226 if (FuncType == "EXPONENTIAL") fWeightFunction = new TFormula("exp", Form("TMath::Exp(-TMath::Log(%f)*x)", param1));
227 if (FuncType == "BOX") fWeightFunction = new TFormula("box", Form("TMath::Erfc((x-%f)/0.0001)", param1));
dc697472 228 if (FuncType == "ERRORFUNCTION") fWeightFunction = new TFormula("err", Form("TMath::Erfc((x-%f)/%f)", param1, param2));
4b23a517 229
230}
231
232
233void AliTMinuitToolkit::FitterFCN(int &npar, double *dummy, double &fchisq, double *gin, int iflag){
234 //
235 // internal function which gives the specified function to the TMinuit function
236 //
237
238 // suppress warnings for unused variables:
239 dummy = dummy;
240 iflag = iflag;
241 npar = npar;
242 //
243 AliTMinuitToolkit * fitter = (AliTMinuitToolkit*)TVirtualFitter::GetFitter()->GetObjectFit();
244 fchisq = 0;
245 Int_t nvar = fitter->GetPoints()->GetNcols()-1;
246 Int_t npoints = fitter->GetPoints()->GetNrows();
247
dc697472 248 // calculate mean deviation for normalization or use user-defined sigma
249 Double_t dev = 0.;
250 if (fitter->GetExpectedSigma() == 0 && fitter->GetStatus() == true) {
251 for (Int_t ipoint=0; ipoint<npoints; ipoint++){
4b23a517 252 Double_t x[100];
dc697472 253 for (Int_t ivar=0; ivar<nvar; ivar++){
4b23a517 254 x[ivar] = (*fitter->GetPoints())(ipoint, ivar);
dc697472 255 }
256 Float_t funx = fitter->GetFormula()->EvalPar(x,gin);
257 Double_t delta = (*fitter->GetPoints())(ipoint, nvar) - funx;
258 dev += TMath::Sqrt(TMath::Abs(delta));
259 }
260 dev = dev/npoints;
261 } else {
262 dev = fitter->GetExpectedSigma();
4b23a517 263 }
dc697472 264 // calculate chisquare
4b23a517 265 for (Int_t ipoint=0; ipoint<npoints; ipoint++){
266 Double_t x[100];
267 for (Int_t ivar=0; ivar<nvar; ivar++){
268 x[ivar] = (*fitter->GetPoints())(ipoint, ivar);
269 }
dc697472 270 Float_t funx = fitter->GetFormula()->EvalPar(x,gin);
271 Double_t delta = TMath::Abs((*fitter->GetPoints())(ipoint, nvar) - funx);
272 if (fitter->GetStatus() == true) {
273 delta = delta/dev; // normalization
274 if (delta <= 2.5) fchisq+= delta*delta; // new metric: Huber-k-estimator
275 if (delta > 2.5) fchisq+= 2*(2.5)*delta - (2.5*2.5);
276 } else {
277 Double_t weight = (*fitter->GetWeights())(ipoint);
278 fchisq+= delta*delta*weight; //old metric
279 }
4b23a517 280 }
dc697472 281 }
4b23a517 282
283
284void AliTMinuitToolkit::Fit() {
285 //
286 // internal function that calls the fitter
287 //
dc697472 288 Int_t nparam = fParam->GetNrows();
289 Int_t npoints = fPoints->GetNrows();
290 Int_t nvar = fPoints->GetNcols()-1;
4b23a517 291
292 // set all paramter limits to infinity as default
293 if (fParamLimits == 0) {
294 fParamLimits = new TMatrixD(nparam ,2);
295 for (Int_t iparam=0; iparam<nparam; iparam++){
296 (*fParamLimits)(iparam, 0) = 0;
297 (*fParamLimits)(iparam, 1) = 0;
298 }
299 }
300
301 // set all weights to 1 as default
dc697472 302 Bool_t weightFlag = false;
4b23a517 303 if (fWeightFunction == 0) {
304 fWeightFunction = new TFormula("constant", "1");
dc697472 305 } else {
306 weightFlag = true;
4b23a517 307 }
308
309 // migrad fit algorithm as default
310 if (fFitAlgorithm == 0) {
311 fFitAlgorithm = "migrad";
312 }
313
dc697472 314 // assign weights
315 if (fWeights == 0) {
316 fWeights = new TVectorD(npoints);
317 for (Int_t ipoint=0; ipoint<npoints; ipoint++) (*fWeights)(ipoint) = 1;
318 }
319
4b23a517 320 // set up the fitter
321 TVirtualFitter *minuit = TVirtualFitter::Fitter(0, nparam);
322 minuit->SetObjectFit(this);
dc697472 323 minuit->SetFCN(AliTMinuitToolkit::FitterFCN);
4b23a517 324
325 // initialize paramters (step size???)
326 for (Int_t iparam=0; iparam<nparam; iparam++){
327 minuit->SetParameter(iparam, Form("p[%d]",iparam), (*fParam)(iparam), (*fParam)(iparam)/10, (*fParamLimits)(iparam, 0), (*fParamLimits)(iparam, 1));
328 }
dc697472 329
330 //
4b23a517 331 Double_t argList[2];
332 argList[0] = fMaxCalls; //maximal number of calls
333 argList[1] = fPrecision; //tolerance normalized to 0.001
334 if (fMaxCalls == 500 && fPrecision == 1) minuit->ExecuteCommand(fFitAlgorithm, 0, 0);
335 if (fMaxCalls != 500 || fPrecision != 1) minuit->ExecuteCommand(fFitAlgorithm, argList, 2);
336 // two additional arguments can be specified ExecuteCommand("migrad", argList, 2) - use 0,0 for default
dc697472 337
338 // fill parameter vector
339 for (Int_t ivar=0; ivar<nparam; ivar++){
340 (*fParam)(ivar) = minuit->GetParameter(ivar);
341 fFormula->SetParameter(ivar, minuit->GetParameter(ivar));
342 }
4b23a517 343
dc697472 344 // if a weight function is specified -> enter 2nd run with weights
345 if (weightFlag == true && fUseRobust == true) {
346 // sort points for weighting
347 Double_t *sortList = new Double_t[npoints];
348 Int_t *indexList = new Int_t[npoints];
349 for (Int_t ipoint=0; ipoint<npoints; ipoint++){
350 Double_t funx = fFormula->Eval((*fPoints)(ipoint, 0));
351 Double_t delta = TMath::Abs((*fPoints)[ipoint][nvar] - funx);
352 sortList[ipoint] = delta;
353 }
354 TMath::Sort(npoints, sortList, indexList, false);
355 for (Int_t ip=0; ip<npoints; ip++){
356 Double_t t = ip/(Double_t)npoints;
357 (*fWeights)(indexList[ip]) = fWeightFunction->Eval(t);
358 }
359
360 // set up the fitter
361 fUseRobust = false;
362 for (Int_t iparam=0; iparam<nparam; iparam++){
363 minuit->SetParameter(iparam, Form("p[%d]",iparam), (*fParam)(iparam), (*fParam)(iparam)/10, (*fParamLimits)(iparam, 0), (*fParamLimits)(iparam, 1));
364 }
365 // start fitting
366 if (fMaxCalls == 500 && fPrecision == 1) minuit->ExecuteCommand(fFitAlgorithm, 0, 0);
367 if (fMaxCalls != 500 || fPrecision != 1) minuit->ExecuteCommand(fFitAlgorithm, argList, 2);
368 fUseRobust = true;
369
370 delete sortList;
371 delete indexList;
372 }
373
4b23a517 374 // fill parameter vector
375 for (Int_t ivar=0; ivar<nparam; ivar++){
376 (*fParam)(ivar) = minuit->GetParameter(ivar);
dc697472 377 fFormula->SetParameter(ivar, minuit->GetParameter(ivar));
4b23a517 378 }
379
380 // fill covariance matrix
381 fCovar = new TMatrixD(nparam, nparam);
4b23a517 382 for(Int_t i=0; i < nparam; i++) {
383 for(Int_t j=0; j < nparam; j++) {
384 (*fCovar)(i,j) = minuit->GetCovarianceMatrixElement(i,j);
385 }
386 }
dc697472 387
388 if (weightFlag == false) fWeightFunction = 0;
4b23a517 389}
390
391
392
393void AliTMinuitToolkit::Test() {
394 //
395 // This test function shows the basic working principles of this class
396 // and illustrates how a robust fit can improve the results
397 //
398 TFormula *FormExp = new TFormula("formExp", "[0]*TMath::Exp(-[1]*x)");
399 SetFitFunction(FormExp);
dc697472 400 SetFitAlgorithm("migrad");
4b23a517 401 // Set initial values
402 TVectorD *vec1 = new TVectorD(2);
403 (*vec1)(0) = 1800;
404 (*vec1)(1) = 1;
405 SetInitialParam(vec1);
406 //provide some example histogram
dc697472 407 TH1F * hist = new TH1F("test", "with (red) and without (black) robust option", 20,0,4);
4b23a517 408 TRandom * rand = new TRandom();
409 for (Int_t i = 0; i < 10000; i++) {
410 hist->Fill(rand->Exp(1));
411 if (i < 1000) hist->Fill(3); //"outliers"
412 if (i < 1070) hist->Fill(3.5);
413 if (i < 670) hist->Fill(2);
414 if (i < 770) hist->Fill(1.5);//"outliers"
415 if (i < 740) hist->Fill(1);
416 }
417 TCanvas * canv = new TCanvas();
418 canv->cd(1);
419 hist->Draw();
dc697472 420
421 // 1. fit it with the exponential decay - no robust
4b23a517 422 FitHistogram(hist);
423 // draw fit function
424 TF1 *func = new TF1("test", "[0]*TMath::Exp(-[1]*x)", 0, 6);
425 func->SetParameter(0, (*GetParameters())(0));
426 func->SetParameter(1, (*GetParameters())(1));
427 func->Draw("same");
dc697472 428
429 // 2 . robust fit
4b23a517 430 TVectorD *vec2 = new TVectorD(2);
431 (*vec2)(0) = 1800;
432 (*vec2)(1) = 1;
433 SetInitialParam(vec2);
dc697472 434 EnableRobust(true, 10);
435 SetWeightFunction("box", 0.75);
4b23a517 436 FitHistogram(hist);
437 TF1 *func2 = new TF1("test2", "[0]*TMath::Exp(-[1]*x)", 0, 6);
438 func2->SetParameter(0, (*GetParameters())(0));
439 func2->SetParameter(1, (*GetParameters())(1));
440 func2->SetLineColor(kRed);
441 func2->Draw("same");
442
443}
444