]> git.uio.no Git - u/mrichter/AliRoot.git/blame_incremental - STAT/TStatToolkit.cxx
Changes by Christian Holm
[u/mrichter/AliRoot.git] / STAT / TStatToolkit.cxx
... / ...
CommitLineData
1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16
17///////////////////////////////////////////////////////////////////////////
18// Class TStatToolkit
19//
20// Subset of matheamtical functions not included in the TMath
21//
22
23///////////////////////////////////////////////////////////////////////////
24#include "TMath.h"
25#include "Riostream.h"
26#include "TH1F.h"
27#include "TH2F.h"
28#include "TH3.h"
29#include "TF1.h"
30#include "TTree.h"
31#include "TChain.h"
32#include "TObjString.h"
33#include "TLinearFitter.h"
34#include "TGraph2D.h"
35#include "TGraph.h"
36#include "TGraphErrors.h"
37#include "TMultiGraph.h"
38#include "TCanvas.h"
39#include "TLatex.h"
40#include "TCut.h"
41
42//
43// includes neccessary for test functions
44//
45#include "TSystem.h"
46#include "TRandom.h"
47#include "TStopwatch.h"
48#include "TTreeStream.h"
49
50#include "TStatToolkit.h"
51
52
53using std::cout;
54using std::cerr;
55using std::endl;
56
57
58ClassImp(TStatToolkit) // Class implementation to enable ROOT I/O
59
60TStatToolkit::TStatToolkit() : TObject()
61{
62 //
63 // Default constructor
64 //
65}
66///////////////////////////////////////////////////////////////////////////
67TStatToolkit::~TStatToolkit()
68{
69 //
70 // Destructor
71 //
72}
73
74
75//_____________________________________________________________________________
76void TStatToolkit::EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean
77 , Double_t &sigma, Int_t hh)
78{
79 //
80 // Robust estimator in 1D case MI version - (faster than ROOT version)
81 //
82 // For the univariate case
83 // estimates of location and scatter are returned in mean and sigma parameters
84 // the algorithm works on the same principle as in multivariate case -
85 // it finds a subset of size hh with smallest sigma, and then returns mean and
86 // sigma of this subset
87 //
88
89 if (hh==0)
90 hh=(nvectors+2)/2;
91 Double_t faclts[]={2.6477,2.5092,2.3826,2.2662,2.1587,2.0589,1.9660,1.879,1.7973,1.7203,1.6473};
92 Int_t *index=new Int_t[nvectors];
93 TMath::Sort(nvectors, data, index, kFALSE);
94
95 Int_t nquant = TMath::Min(Int_t(Double_t(((hh*1./nvectors)-0.5)*40))+1, 11);
96 Double_t factor = faclts[TMath::Max(0,nquant-1)];
97
98 Double_t sumx =0;
99 Double_t sumx2 =0;
100 Int_t bestindex = -1;
101 Double_t bestmean = 0;
102 Double_t bestsigma = (data[index[nvectors-1]]-data[index[0]]+1.); // maximal possible sigma
103 bestsigma *=bestsigma;
104
105 for (Int_t i=0; i<hh; i++){
106 sumx += data[index[i]];
107 sumx2 += data[index[i]]*data[index[i]];
108 }
109
110 Double_t norm = 1./Double_t(hh);
111 Double_t norm2 = (hh-1)>0 ? 1./Double_t(hh-1):1;
112 for (Int_t i=hh; i<nvectors; i++){
113 Double_t cmean = sumx*norm;
114 Double_t csigma = (sumx2 - hh*cmean*cmean)*norm2;
115 if (csigma<bestsigma){
116 bestmean = cmean;
117 bestsigma = csigma;
118 bestindex = i-hh;
119 }
120
121 sumx += data[index[i]]-data[index[i-hh]];
122 sumx2 += data[index[i]]*data[index[i]]-data[index[i-hh]]*data[index[i-hh]];
123 }
124
125 Double_t bstd=factor*TMath::Sqrt(TMath::Abs(bestsigma));
126 mean = bestmean;
127 sigma = bstd;
128 delete [] index;
129
130}
131
132
133
134void TStatToolkit::EvaluateUniExternal(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh, Float_t externalfactor)
135{
136 // Modified version of ROOT robust EvaluateUni
137 // robust estimator in 1D case MI version
138 // added external factor to include precision of external measurement
139 //
140
141 if (hh==0)
142 hh=(nvectors+2)/2;
143 Double_t faclts[]={2.6477,2.5092,2.3826,2.2662,2.1587,2.0589,1.9660,1.879,1.7973,1.7203,1.6473};
144 Int_t *index=new Int_t[nvectors];
145 TMath::Sort(nvectors, data, index, kFALSE);
146 //
147 Int_t nquant = TMath::Min(Int_t(Double_t(((hh*1./nvectors)-0.5)*40))+1, 11);
148 Double_t factor = faclts[0];
149 if (nquant>0){
150 // fix proper normalization - Anja
151 factor = faclts[nquant-1];
152 }
153
154 //
155 //
156 Double_t sumx =0;
157 Double_t sumx2 =0;
158 Int_t bestindex = -1;
159 Double_t bestmean = 0;
160 Double_t bestsigma = -1;
161 for (Int_t i=0; i<hh; i++){
162 sumx += data[index[i]];
163 sumx2 += data[index[i]]*data[index[i]];
164 }
165 //
166 Double_t kfactor = 2.*externalfactor - externalfactor*externalfactor;
167 Double_t norm = 1./Double_t(hh);
168 for (Int_t i=hh; i<nvectors; i++){
169 Double_t cmean = sumx*norm;
170 Double_t csigma = (sumx2*norm - cmean*cmean*kfactor);
171 if (csigma<bestsigma || bestsigma<0){
172 bestmean = cmean;
173 bestsigma = csigma;
174 bestindex = i-hh;
175 }
176 //
177 //
178 sumx += data[index[i]]-data[index[i-hh]];
179 sumx2 += data[index[i]]*data[index[i]]-data[index[i-hh]]*data[index[i-hh]];
180 }
181
182 Double_t bstd=factor*TMath::Sqrt(TMath::Abs(bestsigma));
183 mean = bestmean;
184 sigma = bstd;
185 delete [] index;
186}
187
188
189//_____________________________________________________________________________
190Int_t TStatToolkit::Freq(Int_t n, const Int_t *inlist
191 , Int_t *outlist, Bool_t down)
192{
193 //
194 // Sort eleements according occurancy
195 // The size of output array has is 2*n
196 //
197
198 Int_t * sindexS = new Int_t[n]; // temp array for sorting
199 Int_t * sindexF = new Int_t[2*n];
200 for (Int_t i=0;i<n;i++) sindexS[i]=0;
201 for (Int_t i=0;i<2*n;i++) sindexF[i]=0;
202 //
203 TMath::Sort(n,inlist, sindexS, down);
204 Int_t last = inlist[sindexS[0]];
205 Int_t val = last;
206 sindexF[0] = 1;
207 sindexF[0+n] = last;
208 Int_t countPos = 0;
209 //
210 // find frequency
211 for(Int_t i=1;i<n; i++){
212 val = inlist[sindexS[i]];
213 if (last == val) sindexF[countPos]++;
214 else{
215 countPos++;
216 sindexF[countPos+n] = val;
217 sindexF[countPos]++;
218 last =val;
219 }
220 }
221 if (last==val) countPos++;
222 // sort according frequency
223 TMath::Sort(countPos, sindexF, sindexS, kTRUE);
224 for (Int_t i=0;i<countPos;i++){
225 outlist[2*i ] = sindexF[sindexS[i]+n];
226 outlist[2*i+1] = sindexF[sindexS[i]];
227 }
228 delete [] sindexS;
229 delete [] sindexF;
230
231 return countPos;
232
233}
234
235//___TStatToolkit__________________________________________________________________________
236void TStatToolkit::TruncatedMean(const TH1 * his, TVectorD *param, Float_t down, Float_t up, Bool_t verbose){
237 //
238 //
239 //
240 Int_t nbins = his->GetNbinsX();
241 Float_t nentries = his->GetEntries();
242 Float_t sum =0;
243 Float_t mean = 0;
244 Float_t sigma2 = 0;
245 Float_t ncumul=0;
246 for (Int_t ibin=1;ibin<nbins; ibin++){
247 ncumul+= his->GetBinContent(ibin);
248 Float_t fraction = Float_t(ncumul)/Float_t(nentries);
249 if (fraction>down && fraction<up){
250 sum+=his->GetBinContent(ibin);
251 mean+=his->GetBinCenter(ibin)*his->GetBinContent(ibin);
252 sigma2+=his->GetBinCenter(ibin)*his->GetBinCenter(ibin)*his->GetBinContent(ibin);
253 }
254 }
255 mean/=sum;
256 sigma2= TMath::Sqrt(TMath::Abs(sigma2/sum-mean*mean));
257 if (param){
258 (*param)[0] = his->GetMaximum();
259 (*param)[1] = mean;
260 (*param)[2] = sigma2;
261
262 }
263 if (verbose) printf("Mean\t%f\t Sigma2\t%f\n", mean,sigma2);
264}
265
266void TStatToolkit::LTM(TH1F * his, TVectorD *param , Float_t fraction, Bool_t verbose){
267 //
268 // LTM
269 //
270 Int_t nbins = his->GetNbinsX();
271 Int_t nentries = (Int_t)his->GetEntries();
272 Double_t *data = new Double_t[nentries];
273 Int_t npoints=0;
274 for (Int_t ibin=1;ibin<nbins; ibin++){
275 Float_t entriesI = his->GetBinContent(ibin);
276 Float_t xcenter= his->GetBinCenter(ibin);
277 for (Int_t ic=0; ic<entriesI; ic++){
278 if (npoints<nentries){
279 data[npoints]= xcenter;
280 npoints++;
281 }
282 }
283 }
284 Double_t mean, sigma;
285 Int_t npoints2=TMath::Min(Int_t(fraction*Float_t(npoints)),npoints-1);
286 npoints2=TMath::Max(Int_t(0.5*Float_t(npoints)),npoints2);
287 TStatToolkit::EvaluateUni(npoints, data, mean,sigma,npoints2);
288 delete [] data;
289 if (verbose) printf("Mean\t%f\t Sigma2\t%f\n", mean,sigma);if (param){
290 (*param)[0] = his->GetMaximum();
291 (*param)[1] = mean;
292 (*param)[2] = sigma;
293 }
294}
295
296Double_t TStatToolkit::FitGaus(TH1* his, TVectorD *param, TMatrixD */*matrix*/, Float_t xmin, Float_t xmax, Bool_t verbose){
297 //
298 // Fit histogram with gaussian function
299 //
300 // Prameters:
301 // return value- chi2 - if negative ( not enough points)
302 // his - input histogram
303 // param - vector with parameters
304 // xmin, xmax - range to fit - if xmin=xmax=0 - the full histogram range used
305 // Fitting:
306 // 1. Step - make logarithm
307 // 2. Linear fit (parabola) - more robust - always converge
308 // 3. In case of small statistic bins are averaged
309 //
310 static TLinearFitter fitter(3,"pol2");
311 TVectorD par(3);
312 TVectorD sigma(3);
313 TMatrixD mat(3,3);
314 if (his->GetMaximum()<4) return -1;
315 if (his->GetEntries()<12) return -1;
316 if (his->GetRMS()<mat.GetTol()) return -1;
317 Float_t maxEstimate = his->GetEntries()*his->GetBinWidth(1)/TMath::Sqrt((TMath::TwoPi()*his->GetRMS()));
318 Int_t dsmooth = TMath::Nint(6./TMath::Sqrt(maxEstimate));
319
320 if (maxEstimate<1) return -1;
321 Int_t nbins = his->GetNbinsX();
322 Int_t npoints=0;
323 //
324
325
326 if (xmin>=xmax){
327 xmin = his->GetXaxis()->GetXmin();
328 xmax = his->GetXaxis()->GetXmax();
329 }
330 for (Int_t iter=0; iter<2; iter++){
331 fitter.ClearPoints();
332 npoints=0;
333 for (Int_t ibin=1;ibin<nbins+1; ibin++){
334 Int_t countB=1;
335 Float_t entriesI = his->GetBinContent(ibin);
336 for (Int_t delta = -dsmooth; delta<=dsmooth; delta++){
337 if (ibin+delta>1 &&ibin+delta<nbins-1){
338 entriesI += his->GetBinContent(ibin+delta);
339 countB++;
340 }
341 }
342 entriesI/=countB;
343 Double_t xcenter= his->GetBinCenter(ibin);
344 if (xcenter<xmin || xcenter>xmax) continue;
345 Double_t error=1./TMath::Sqrt(countB);
346 Float_t cont=2;
347 if (iter>0){
348 if (par[0]+par[1]*xcenter+par[2]*xcenter*xcenter>20) return 0;
349 cont = TMath::Exp(par[0]+par[1]*xcenter+par[2]*xcenter*xcenter);
350 if (cont>1.) error = 1./TMath::Sqrt(cont*Float_t(countB));
351 }
352 if (entriesI>1&&cont>1){
353 fitter.AddPoint(&xcenter,TMath::Log(Float_t(entriesI)),error);
354 npoints++;
355 }
356 }
357 if (npoints>3){
358 fitter.Eval();
359 fitter.GetParameters(par);
360 }else{
361 break;
362 }
363 }
364 if (npoints<=3){
365 return -1;
366 }
367 fitter.GetParameters(par);
368 fitter.GetCovarianceMatrix(mat);
369 if (TMath::Abs(par[1])<mat.GetTol()) return -1;
370 if (TMath::Abs(par[2])<mat.GetTol()) return -1;
371 Double_t chi2 = fitter.GetChisquare()/Float_t(npoints);
372 //fitter.GetParameters();
373 if (!param) param = new TVectorD(3);
374 // if (!matrix) matrix = new TMatrixD(3,3); // Covariance matrix to be implemented
375 (*param)[1] = par[1]/(-2.*par[2]);
376 (*param)[2] = 1./TMath::Sqrt(TMath::Abs(-2.*par[2]));
377 (*param)[0] = TMath::Exp(par[0]+ par[1]* (*param)[1] + par[2]*(*param)[1]*(*param)[1]);
378 if (verbose){
379 par.Print();
380 mat.Print();
381 param->Print();
382 printf("Chi2=%f\n",chi2);
383 TF1 * f1= new TF1("f1","[0]*exp(-(x-[1])^2/(2*[2]*[2]))",his->GetXaxis()->GetXmin(),his->GetXaxis()->GetXmax());
384 f1->SetParameter(0, (*param)[0]);
385 f1->SetParameter(1, (*param)[1]);
386 f1->SetParameter(2, (*param)[2]);
387 f1->Draw("same");
388 }
389 return chi2;
390}
391
392Double_t TStatToolkit::FitGaus(Float_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, TVectorD *param, TMatrixD */*matrix*/, Bool_t verbose){
393 //
394 // Fit histogram with gaussian function
395 //
396 // Prameters:
397 // nbins: size of the array and number of histogram bins
398 // xMin, xMax: histogram range
399 // param: paramters of the fit (0-Constant, 1-Mean, 2-Sigma)
400 // matrix: covariance matrix -- not implemented yet, pass dummy matrix!!!
401 //
402 // Return values:
403 // >0: the chi2 returned by TLinearFitter
404 // -3: only three points have been used for the calculation - no fitter was used
405 // -2: only two points have been used for the calculation - center of gravity was uesed for calculation
406 // -1: only one point has been used for the calculation - center of gravity was uesed for calculation
407 // -4: invalid result!!
408 //
409 // Fitting:
410 // 1. Step - make logarithm
411 // 2. Linear fit (parabola) - more robust - always converge
412 //
413 static TLinearFitter fitter(3,"pol2");
414 static TMatrixD mat(3,3);
415 static Double_t kTol = mat.GetTol();
416 fitter.StoreData(kFALSE);
417 fitter.ClearPoints();
418 TVectorD par(3);
419 TVectorD sigma(3);
420 TMatrixD matA(3,3);
421 TMatrixD b(3,1);
422 Float_t rms = TMath::RMS(nBins,arr);
423 Float_t max = TMath::MaxElement(nBins,arr);
424 Float_t binWidth = (xMax-xMin)/(Float_t)nBins;
425
426 Float_t meanCOG = 0;
427 Float_t rms2COG = 0;
428 Float_t sumCOG = 0;
429
430 Float_t entries = 0;
431 Int_t nfilled=0;
432
433 for (Int_t i=0; i<nBins; i++){
434 entries+=arr[i];
435 if (arr[i]>0) nfilled++;
436 }
437
438 if (max<4) return -4;
439 if (entries<12) return -4;
440 if (rms<kTol) return -4;
441
442 Int_t npoints=0;
443 //
444
445 //
446 for (Int_t ibin=0;ibin<nBins; ibin++){
447 Float_t entriesI = arr[ibin];
448 if (entriesI>1){
449 Double_t xcenter = xMin+(ibin+0.5)*binWidth;
450
451 Float_t error = 1./TMath::Sqrt(entriesI);
452 Float_t val = TMath::Log(Float_t(entriesI));
453 fitter.AddPoint(&xcenter,val,error);
454 if (npoints<3){
455 matA(npoints,0)=1;
456 matA(npoints,1)=xcenter;
457 matA(npoints,2)=xcenter*xcenter;
458 b(npoints,0)=val;
459 meanCOG+=xcenter*entriesI;
460 rms2COG +=xcenter*entriesI*xcenter;
461 sumCOG +=entriesI;
462 }
463 npoints++;
464 }
465 }
466
467
468 Double_t chi2 = 0;
469 if (npoints>=3){
470 if ( npoints == 3 ){
471 //analytic calculation of the parameters for three points
472 matA.Invert();
473 TMatrixD res(1,3);
474 res.Mult(matA,b);
475 par[0]=res(0,0);
476 par[1]=res(0,1);
477 par[2]=res(0,2);
478 chi2 = -3.;
479 } else {
480 // use fitter for more than three points
481 fitter.Eval();
482 fitter.GetParameters(par);
483 fitter.GetCovarianceMatrix(mat);
484 chi2 = fitter.GetChisquare()/Float_t(npoints);
485 }
486 if (TMath::Abs(par[1])<kTol) return -4;
487 if (TMath::Abs(par[2])<kTol) return -4;
488
489 if (!param) param = new TVectorD(3);
490 //if (!matrix) matrix = new TMatrixD(3,3); // !!!!might be a memory leek. use dummy matrix pointer to call this function! // Covariance matrix to be implemented
491
492 (*param)[1] = par[1]/(-2.*par[2]);
493 (*param)[2] = 1./TMath::Sqrt(TMath::Abs(-2.*par[2]));
494 Double_t lnparam0 = par[0]+ par[1]* (*param)[1] + par[2]*(*param)[1]*(*param)[1];
495 if ( lnparam0>307 ) return -4;
496 (*param)[0] = TMath::Exp(lnparam0);
497 if (verbose){
498 par.Print();
499 mat.Print();
500 param->Print();
501 printf("Chi2=%f\n",chi2);
502 TF1 * f1= new TF1("f1","[0]*exp(-(x-[1])^2/(2*[2]*[2]))",xMin,xMax);
503 f1->SetParameter(0, (*param)[0]);
504 f1->SetParameter(1, (*param)[1]);
505 f1->SetParameter(2, (*param)[2]);
506 f1->Draw("same");
507 }
508 return chi2;
509 }
510
511 if (npoints == 2){
512 //use center of gravity for 2 points
513 meanCOG/=sumCOG;
514 rms2COG /=sumCOG;
515 (*param)[0] = max;
516 (*param)[1] = meanCOG;
517 (*param)[2] = TMath::Sqrt(TMath::Abs(meanCOG*meanCOG-rms2COG));
518 chi2=-2.;
519 }
520 if ( npoints == 1 ){
521 meanCOG/=sumCOG;
522 (*param)[0] = max;
523 (*param)[1] = meanCOG;
524 (*param)[2] = binWidth/TMath::Sqrt(12);
525 chi2=-1.;
526 }
527 return chi2;
528
529}
530
531
532Float_t TStatToolkit::GetCOG(const Short_t *arr, Int_t nBins, Float_t xMin, Float_t xMax, Float_t *rms, Float_t *sum)
533{
534 //
535 // calculate center of gravity rms and sum for array 'arr' with nBins an a x range xMin to xMax
536 // return COG; in case of failure return xMin
537 //
538 Float_t meanCOG = 0;
539 Float_t rms2COG = 0;
540 Float_t sumCOG = 0;
541 Int_t npoints = 0;
542
543 Float_t binWidth = (xMax-xMin)/(Float_t)nBins;
544
545 for (Int_t ibin=0; ibin<nBins; ibin++){
546 Float_t entriesI = (Float_t)arr[ibin];
547 Double_t xcenter = xMin+(ibin+0.5)*binWidth;
548 if ( entriesI>0 ){
549 meanCOG += xcenter*entriesI;
550 rms2COG += xcenter*entriesI*xcenter;
551 sumCOG += entriesI;
552 npoints++;
553 }
554 }
555 if ( sumCOG == 0 ) return xMin;
556 meanCOG/=sumCOG;
557
558 if ( rms ){
559 rms2COG /=sumCOG;
560 (*rms) = TMath::Sqrt(TMath::Abs(meanCOG*meanCOG-rms2COG));
561 if ( npoints == 1 ) (*rms) = binWidth/TMath::Sqrt(12);
562 }
563
564 if ( sum )
565 (*sum) = sumCOG;
566
567 return meanCOG;
568}
569
570
571
572///////////////////////////////////////////////////////////////
573////////////// TEST functions /////////////////////////
574///////////////////////////////////////////////////////////////
575
576
577
578
579
580void TStatToolkit::TestGausFit(Int_t nhistos){
581 //
582 // Test performance of the parabolic - gaussian fit - compare it with
583 // ROOT gauss fit
584 // nhistos - number of histograms to be used for test
585 //
586 TTreeSRedirector *pcstream = new TTreeSRedirector("fitdebug.root");
587
588 Float_t *xTrue = new Float_t[nhistos];
589 Float_t *sTrue = new Float_t[nhistos];
590 TVectorD **par1 = new TVectorD*[nhistos];
591 TVectorD **par2 = new TVectorD*[nhistos];
592 TMatrixD dummy(3,3);
593
594
595 TH1F **h1f = new TH1F*[nhistos];
596 TF1 *myg = new TF1("myg","gaus");
597 TF1 *fit = new TF1("fit","gaus");
598 gRandom->SetSeed(0);
599
600 //init
601 for (Int_t i=0;i<nhistos; i++){
602 par1[i] = new TVectorD(3);
603 par2[i] = new TVectorD(3);
604 h1f[i] = new TH1F(Form("h1f%d",i),Form("h1f%d",i),20,-10,10);
605 xTrue[i]= gRandom->Rndm();
606 gSystem->Sleep(2);
607 sTrue[i]= .75+gRandom->Rndm()*.5;
608 myg->SetParameters(1,xTrue[i],sTrue[i]);
609 h1f[i]->FillRandom("myg");
610 }
611
612 TStopwatch s;
613 s.Start();
614 //standard gaus fit
615 for (Int_t i=0; i<nhistos; i++){
616 h1f[i]->Fit(fit,"0q");
617 (*par1[i])(0) = fit->GetParameter(0);
618 (*par1[i])(1) = fit->GetParameter(1);
619 (*par1[i])(2) = fit->GetParameter(2);
620 }
621 s.Stop();
622 printf("Gaussian fit\t");
623 s.Print();
624
625 s.Start();
626 //TStatToolkit gaus fit
627 for (Int_t i=0; i<nhistos; i++){
628 TStatToolkit::FitGaus(h1f[i]->GetArray()+1,h1f[i]->GetNbinsX(),h1f[i]->GetXaxis()->GetXmin(),h1f[i]->GetXaxis()->GetXmax(),par2[i],&dummy);
629 }
630
631 s.Stop();
632 printf("Parabolic fit\t");
633 s.Print();
634 //write stream
635 for (Int_t i=0;i<nhistos; i++){
636 Float_t xt = xTrue[i];
637 Float_t st = sTrue[i];
638 (*pcstream)<<"data"
639 <<"xTrue="<<xt
640 <<"sTrue="<<st
641 <<"pg.="<<(par1[i])
642 <<"pa.="<<(par2[i])
643 <<"\n";
644 }
645 //delete pointers
646 for (Int_t i=0;i<nhistos; i++){
647 delete par1[i];
648 delete par2[i];
649 delete h1f[i];
650 }
651 delete pcstream;
652 delete []h1f;
653 delete []xTrue;
654 delete []sTrue;
655 //
656 delete []par1;
657 delete []par2;
658
659}
660
661
662
663TGraph2D * TStatToolkit::MakeStat2D(TH3 * his, Int_t delta0, Int_t delta1, Int_t type){
664 //
665 //
666 //
667 // delta - number of bins to integrate
668 // type - 0 - mean value
669
670 TAxis * xaxis = his->GetXaxis();
671 TAxis * yaxis = his->GetYaxis();
672 // TAxis * zaxis = his->GetZaxis();
673 Int_t nbinx = xaxis->GetNbins();
674 Int_t nbiny = yaxis->GetNbins();
675 char name[1000];
676 Int_t icount=0;
677 TGraph2D *graph = new TGraph2D(nbinx*nbiny);
678 TF1 f1("f1","gaus");
679 for (Int_t ix=0; ix<nbinx;ix++)
680 for (Int_t iy=0; iy<nbiny;iy++){
681 Float_t xcenter = xaxis->GetBinCenter(ix);
682 Float_t ycenter = yaxis->GetBinCenter(iy);
683 snprintf(name,1000,"%s_%d_%d",his->GetName(), ix,iy);
684 TH1 *projection = his->ProjectionZ(name,ix-delta0,ix+delta0,iy-delta1,iy+delta1);
685 Float_t stat= 0;
686 if (type==0) stat = projection->GetMean();
687 if (type==1) stat = projection->GetRMS();
688 if (type==2 || type==3){
689 TVectorD vec(3);
690 TStatToolkit::LTM((TH1F*)projection,&vec,0.7);
691 if (type==2) stat= vec[1];
692 if (type==3) stat= vec[0];
693 }
694 if (type==4|| type==5){
695 projection->Fit(&f1);
696 if (type==4) stat= f1.GetParameter(1);
697 if (type==5) stat= f1.GetParameter(2);
698 }
699 //printf("%d\t%f\t%f\t%f\n", icount,xcenter, ycenter, stat);
700 graph->SetPoint(icount,xcenter, ycenter, stat);
701 icount++;
702 }
703 return graph;
704}
705
706TGraph * TStatToolkit::MakeStat1D(TH3 * his, Int_t delta1, Int_t type){
707 //
708 //
709 //
710 // delta - number of bins to integrate
711 // type - 0 - mean value
712
713 TAxis * xaxis = his->GetXaxis();
714 TAxis * yaxis = his->GetYaxis();
715 // TAxis * zaxis = his->GetZaxis();
716 Int_t nbinx = xaxis->GetNbins();
717 Int_t nbiny = yaxis->GetNbins();
718 char name[1000];
719 Int_t icount=0;
720 TGraph *graph = new TGraph(nbinx);
721 TF1 f1("f1","gaus");
722 for (Int_t ix=0; ix<nbinx;ix++){
723 Float_t xcenter = xaxis->GetBinCenter(ix);
724 // Float_t ycenter = yaxis->GetBinCenter(iy);
725 snprintf(name,1000,"%s_%d",his->GetName(), ix);
726 TH1 *projection = his->ProjectionZ(name,ix-delta1,ix+delta1,0,nbiny);
727 Float_t stat= 0;
728 if (type==0) stat = projection->GetMean();
729 if (type==1) stat = projection->GetRMS();
730 if (type==2 || type==3){
731 TVectorD vec(3);
732 TStatToolkit::LTM((TH1F*)projection,&vec,0.7);
733 if (type==2) stat= vec[1];
734 if (type==3) stat= vec[0];
735 }
736 if (type==4|| type==5){
737 projection->Fit(&f1);
738 if (type==4) stat= f1.GetParameter(1);
739 if (type==5) stat= f1.GetParameter(2);
740 }
741 //printf("%d\t%f\t%f\t%f\n", icount,xcenter, ycenter, stat);
742 graph->SetPoint(icount,xcenter, stat);
743 icount++;
744 }
745 return graph;
746}
747
748
749
750
751
752TString* TStatToolkit::FitPlane(TTree *tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac, Int_t start, Int_t stop,Bool_t fix0){
753 //
754 // fit an arbitrary function, specified by formula into the data, specified by drawCommand and cuts
755 // returns chi2, fitParam and covMatrix
756 // returns TString with fitted formula
757 //
758
759 TString formulaStr(formula);
760 TString drawStr(drawCommand);
761 TString cutStr(cuts);
762 TString ferr("1");
763
764 TString strVal(drawCommand);
765 if (strVal.Contains(":")){
766 TObjArray* valTokens = strVal.Tokenize(":");
767 drawStr = valTokens->At(0)->GetName();
768 ferr = valTokens->At(1)->GetName();
769 delete valTokens;
770 }
771
772
773 formulaStr.ReplaceAll("++", "~");
774 TObjArray* formulaTokens = formulaStr.Tokenize("~");
775 Int_t dim = formulaTokens->GetEntriesFast();
776
777 fitParam.ResizeTo(dim);
778 covMatrix.ResizeTo(dim,dim);
779
780 TLinearFitter* fitter = new TLinearFitter(dim+1, Form("hyp%d",dim));
781 fitter->StoreData(kTRUE);
782 fitter->ClearPoints();
783
784 Int_t entries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start, start);
785 if (entries == -1) {
786 delete formulaTokens;
787 return new TString("An ERROR has occured during fitting!");
788 }
789 Double_t **values = new Double_t*[dim+1] ;
790 for (Int_t i=0; i<dim+1; i++) values[i]=NULL;
791 //
792 entries = tree->Draw(ferr.Data(), cutStr.Data(), "goff", stop-start, start);
793 if (entries == -1) {
794 delete formulaTokens;
795 delete []values;
796 return new TString("An ERROR has occured during fitting!");
797 }
798 Double_t *errors = new Double_t[entries];
799 memcpy(errors, tree->GetV1(), entries*sizeof(Double_t));
800
801 for (Int_t i = 0; i < dim + 1; i++){
802 Int_t centries = 0;
803 if (i < dim) centries = tree->Draw(((TObjString*)formulaTokens->At(i))->GetName(), cutStr.Data(), "goff", stop-start,start);
804 else centries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start,start);
805
806 if (entries != centries) {
807 delete []errors;
808 delete []values;
809 return new TString("An ERROR has occured during fitting!");
810 }
811 values[i] = new Double_t[entries];
812 memcpy(values[i], tree->GetV1(), entries*sizeof(Double_t));
813 }
814
815 // add points to the fitter
816 for (Int_t i = 0; i < entries; i++){
817 Double_t x[1000];
818 for (Int_t j=0; j<dim;j++) x[j]=values[j][i];
819 fitter->AddPoint(x, values[dim][i], errors[i]);
820 }
821
822 fitter->Eval();
823 if (frac>0.5 && frac<1){
824 fitter->EvalRobust(frac);
825 }else{
826 if (fix0) {
827 fitter->FixParameter(0,0);
828 fitter->Eval();
829 }
830 }
831 fitter->GetParameters(fitParam);
832 fitter->GetCovarianceMatrix(covMatrix);
833 chi2 = fitter->GetChisquare();
834 npoints = entries;
835 TString *preturnFormula = new TString(Form("( %f+",fitParam[0])), &returnFormula = *preturnFormula;
836
837 for (Int_t iparam = 0; iparam < dim; iparam++) {
838 returnFormula.Append(Form("%s*(%f)",((TObjString*)formulaTokens->At(iparam))->GetName(),fitParam[iparam+1]));
839 if (iparam < dim-1) returnFormula.Append("+");
840 }
841 returnFormula.Append(" )");
842
843
844 for (Int_t j=0; j<dim+1;j++) delete [] values[j];
845
846
847 delete formulaTokens;
848 delete fitter;
849 delete[] values;
850 delete[] errors;
851 return preturnFormula;
852}
853
854TString* TStatToolkit::FitPlaneConstrain(TTree *tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac, Int_t start, Int_t stop,Double_t constrain){
855 //
856 // fit an arbitrary function, specified by formula into the data, specified by drawCommand and cuts
857 // returns chi2, fitParam and covMatrix
858 // returns TString with fitted formula
859 //
860
861 TString formulaStr(formula);
862 TString drawStr(drawCommand);
863 TString cutStr(cuts);
864 TString ferr("1");
865
866 TString strVal(drawCommand);
867 if (strVal.Contains(":")){
868 TObjArray* valTokens = strVal.Tokenize(":");
869 drawStr = valTokens->At(0)->GetName();
870 ferr = valTokens->At(1)->GetName();
871 delete valTokens;
872 }
873
874
875 formulaStr.ReplaceAll("++", "~");
876 TObjArray* formulaTokens = formulaStr.Tokenize("~");
877 Int_t dim = formulaTokens->GetEntriesFast();
878
879 fitParam.ResizeTo(dim);
880 covMatrix.ResizeTo(dim,dim);
881
882 TLinearFitter* fitter = new TLinearFitter(dim+1, Form("hyp%d",dim));
883 fitter->StoreData(kTRUE);
884 fitter->ClearPoints();
885
886 Int_t entries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start, start);
887 if (entries == -1) {
888 delete formulaTokens;
889 return new TString("An ERROR has occured during fitting!");
890 }
891 Double_t **values = new Double_t*[dim+1] ;
892 for (Int_t i=0; i<dim+1; i++) values[i]=NULL;
893 //
894 entries = tree->Draw(ferr.Data(), cutStr.Data(), "goff", stop-start, start);
895 if (entries == -1) {
896 delete formulaTokens;
897 delete [] values;
898 return new TString("An ERROR has occured during fitting!");
899 }
900 Double_t *errors = new Double_t[entries];
901 memcpy(errors, tree->GetV1(), entries*sizeof(Double_t));
902
903 for (Int_t i = 0; i < dim + 1; i++){
904 Int_t centries = 0;
905 if (i < dim) centries = tree->Draw(((TObjString*)formulaTokens->At(i))->GetName(), cutStr.Data(), "goff", stop-start,start);
906 else centries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start,start);
907
908 if (entries != centries) {
909 delete []errors;
910 delete []values;
911 delete formulaTokens;
912 return new TString("An ERROR has occured during fitting!");
913 }
914 values[i] = new Double_t[entries];
915 memcpy(values[i], tree->GetV1(), entries*sizeof(Double_t));
916 }
917
918 // add points to the fitter
919 for (Int_t i = 0; i < entries; i++){
920 Double_t x[1000];
921 for (Int_t j=0; j<dim;j++) x[j]=values[j][i];
922 fitter->AddPoint(x, values[dim][i], errors[i]);
923 }
924 if (constrain>0){
925 for (Int_t i = 0; i < dim; i++){
926 Double_t x[1000];
927 for (Int_t j=0; j<dim;j++) if (i!=j) x[j]=0;
928 x[i]=1.;
929 fitter->AddPoint(x, 0, constrain);
930 }
931 }
932
933
934 fitter->Eval();
935 if (frac>0.5 && frac<1){
936 fitter->EvalRobust(frac);
937 }
938 fitter->GetParameters(fitParam);
939 fitter->GetCovarianceMatrix(covMatrix);
940 chi2 = fitter->GetChisquare();
941 npoints = entries;
942
943 TString *preturnFormula = new TString(Form("( %f+",fitParam[0])), &returnFormula = *preturnFormula;
944
945 for (Int_t iparam = 0; iparam < dim; iparam++) {
946 returnFormula.Append(Form("%s*(%f)",((TObjString*)formulaTokens->At(iparam))->GetName(),fitParam[iparam+1]));
947 if (iparam < dim-1) returnFormula.Append("+");
948 }
949 returnFormula.Append(" )");
950
951 for (Int_t j=0; j<dim+1;j++) delete [] values[j];
952
953
954
955 delete formulaTokens;
956 delete fitter;
957 delete[] values;
958 delete[] errors;
959 return preturnFormula;
960}
961
962
963
964TString* TStatToolkit::FitPlaneFixed(TTree *tree, const char* drawCommand, const char* formula, const char* cuts, Double_t & chi2, Int_t &npoints, TVectorD &fitParam, TMatrixD &covMatrix, Float_t frac, Int_t start, Int_t stop){
965 //
966 // fit an arbitrary function, specified by formula into the data, specified by drawCommand and cuts
967 // returns chi2, fitParam and covMatrix
968 // returns TString with fitted formula
969 //
970
971 TString formulaStr(formula);
972 TString drawStr(drawCommand);
973 TString cutStr(cuts);
974 TString ferr("1");
975
976 TString strVal(drawCommand);
977 if (strVal.Contains(":")){
978 TObjArray* valTokens = strVal.Tokenize(":");
979 drawStr = valTokens->At(0)->GetName();
980 ferr = valTokens->At(1)->GetName();
981 delete valTokens;
982 }
983
984
985 formulaStr.ReplaceAll("++", "~");
986 TObjArray* formulaTokens = formulaStr.Tokenize("~");
987 Int_t dim = formulaTokens->GetEntriesFast();
988
989 fitParam.ResizeTo(dim);
990 covMatrix.ResizeTo(dim,dim);
991 TString fitString="x0";
992 for (Int_t i=1; i<dim; i++) fitString+=Form("++x%d",i);
993 TLinearFitter* fitter = new TLinearFitter(dim, fitString.Data());
994 fitter->StoreData(kTRUE);
995 fitter->ClearPoints();
996
997 Int_t entries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start, start);
998 if (entries == -1) {
999 delete formulaTokens;
1000 return new TString("An ERROR has occured during fitting!");
1001 }
1002 Double_t **values = new Double_t*[dim+1] ;
1003 for (Int_t i=0; i<dim+1; i++) values[i]=NULL;
1004 //
1005 entries = tree->Draw(ferr.Data(), cutStr.Data(), "goff", stop-start, start);
1006 if (entries == -1) {
1007 delete []values;
1008 delete formulaTokens;
1009 return new TString("An ERROR has occured during fitting!");
1010 }
1011 Double_t *errors = new Double_t[entries];
1012 memcpy(errors, tree->GetV1(), entries*sizeof(Double_t));
1013
1014 for (Int_t i = 0; i < dim + 1; i++){
1015 Int_t centries = 0;
1016 if (i < dim) centries = tree->Draw(((TObjString*)formulaTokens->At(i))->GetName(), cutStr.Data(), "goff", stop-start,start);
1017 else centries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff", stop-start,start);
1018
1019 if (entries != centries) {
1020 delete []errors;
1021 delete []values;
1022 delete formulaTokens;
1023 return new TString("An ERROR has occured during fitting!");
1024 }
1025 values[i] = new Double_t[entries];
1026 memcpy(values[i], tree->GetV1(), entries*sizeof(Double_t));
1027 }
1028
1029 // add points to the fitter
1030 for (Int_t i = 0; i < entries; i++){
1031 Double_t x[1000];
1032 for (Int_t j=0; j<dim;j++) x[j]=values[j][i];
1033 fitter->AddPoint(x, values[dim][i], errors[i]);
1034 }
1035
1036 fitter->Eval();
1037 if (frac>0.5 && frac<1){
1038 fitter->EvalRobust(frac);
1039 }
1040 fitter->GetParameters(fitParam);
1041 fitter->GetCovarianceMatrix(covMatrix);
1042 chi2 = fitter->GetChisquare();
1043 npoints = entries;
1044
1045 TString *preturnFormula = new TString("("), &returnFormula = *preturnFormula;
1046
1047 for (Int_t iparam = 0; iparam < dim; iparam++) {
1048 returnFormula.Append(Form("%s*(%f)",((TObjString*)formulaTokens->At(iparam))->GetName(),fitParam[iparam]));
1049 if (iparam < dim-1) returnFormula.Append("+");
1050 }
1051 returnFormula.Append(" )");
1052
1053
1054 for (Int_t j=0; j<dim+1;j++) delete [] values[j];
1055
1056 delete formulaTokens;
1057 delete fitter;
1058 delete[] values;
1059 delete[] errors;
1060 return preturnFormula;
1061}
1062
1063
1064
1065
1066
1067Int_t TStatToolkit::GetFitIndex(const TString fString, const TString subString){
1068 //
1069 // fitString - ++ separated list of fits
1070 // substring - ++ separated list of the requiered substrings
1071 //
1072 // return the last occurance of substring in fit string
1073 //
1074 TObjArray *arrFit = fString.Tokenize("++");
1075 TObjArray *arrSub = subString.Tokenize("++");
1076 Int_t index=-1;
1077 for (Int_t i=0; i<arrFit->GetEntries(); i++){
1078 Bool_t isOK=kTRUE;
1079 TString str =arrFit->At(i)->GetName();
1080 for (Int_t isub=0; isub<arrSub->GetEntries(); isub++){
1081 if (str.Contains(arrSub->At(isub)->GetName())==0) isOK=kFALSE;
1082 }
1083 if (isOK) index=i;
1084 }
1085 delete arrFit;
1086 delete arrSub;
1087 return index;
1088}
1089
1090
1091TString TStatToolkit::FilterFit(const TString &input, const TString filter, TVectorD &param, TMatrixD & covar){
1092 //
1093 // Filter fit expression make sub-fit
1094 //
1095 TObjArray *array0= input.Tokenize("++");
1096 TObjArray *array1= filter.Tokenize("++");
1097 //TString *presult=new TString("(0");
1098 TString result="(0.0";
1099 for (Int_t i=0; i<array0->GetEntries(); i++){
1100 Bool_t isOK=kTRUE;
1101 TString str(array0->At(i)->GetName());
1102 for (Int_t j=0; j<array1->GetEntries(); j++){
1103 if (str.Contains(array1->At(j)->GetName())==0) isOK=kFALSE;
1104 }
1105 if (isOK) {
1106 result+="+"+str;
1107 result+=Form("*(%f)",param[i+1]);
1108 printf("%f\t%f\t%s\n",param[i+1], TMath::Sqrt(covar(i+1,i+1)),str.Data());
1109 }
1110 }
1111 result+="-0.)";
1112 delete array0;
1113 delete array1;
1114 return result;
1115}
1116
1117void TStatToolkit::Update1D(Double_t delta, Double_t sigma, Int_t s1, TMatrixD &vecXk, TMatrixD &covXk){
1118 //
1119 // Update parameters and covariance - with one measurement
1120 // Input:
1121 // vecXk - input vector - Updated in function
1122 // covXk - covariance matrix - Updated in function
1123 // delta, sigma, s1 - new measurement, rms of new measurement and the index of measurement
1124 const Int_t knMeas=1;
1125 Int_t knElem=vecXk.GetNrows();
1126
1127 TMatrixD mat1(knElem,knElem); // update covariance matrix
1128 TMatrixD matHk(1,knElem); // vector to mesurement
1129 TMatrixD vecYk(knMeas,1); // Innovation or measurement residual
1130 TMatrixD matHkT(knElem,knMeas); // helper matrix Hk transpose
1131 TMatrixD matSk(knMeas,knMeas); // Innovation (or residual) covariance
1132 TMatrixD matKk(knElem,knMeas); // Optimal Kalman gain
1133 TMatrixD covXk2(knElem,knElem); // helper matrix
1134 TMatrixD covXk3(knElem,knElem); // helper matrix
1135 TMatrixD vecZk(1,1);
1136 TMatrixD measR(1,1);
1137 vecZk(0,0)=delta;
1138 measR(0,0)=sigma*sigma;
1139 //
1140 // reset matHk
1141 for (Int_t iel=0;iel<knElem;iel++)
1142 for (Int_t ip=0;ip<knMeas;ip++) matHk(ip,iel)=0;
1143 //mat1
1144 for (Int_t iel=0;iel<knElem;iel++) {
1145 for (Int_t jel=0;jel<knElem;jel++) mat1(iel,jel)=0;
1146 mat1(iel,iel)=1;
1147 }
1148 //
1149 matHk(0, s1)=1;
1150 vecYk = vecZk-matHk*vecXk; // Innovation or measurement residual
1151 matHkT=matHk.T(); matHk.T();
1152 matSk = (matHk*(covXk*matHkT))+measR; // Innovation (or residual) covariance
1153 matSk.Invert();
1154 matKk = (covXk*matHkT)*matSk; // Optimal Kalman gain
1155 vecXk += matKk*vecYk; // updated vector
1156 covXk2= (mat1-(matKk*matHk));
1157 covXk3 = covXk2*covXk;
1158 covXk = covXk3;
1159 Int_t nrows=covXk3.GetNrows();
1160
1161 for (Int_t irow=0; irow<nrows; irow++)
1162 for (Int_t icol=0; icol<nrows; icol++){
1163 // rounding problems - make matrix again symteric
1164 covXk(irow,icol)=(covXk3(irow,icol)+covXk3(icol,irow))*0.5;
1165 }
1166}
1167
1168
1169
1170void TStatToolkit::Constrain1D(const TString &input, const TString filter, TVectorD &param, TMatrixD & covar, Double_t mean, Double_t sigma){
1171 //
1172 // constrain linear fit
1173 // input - string description of fit function
1174 // filter - string filter to select sub fits
1175 // param,covar - parameters and covariance matrix of the fit
1176 // mean,sigma - new measurement uning which the fit is updated
1177 //
1178
1179 TObjArray *array0= input.Tokenize("++");
1180 TObjArray *array1= filter.Tokenize("++");
1181 TMatrixD paramM(param.GetNrows(),1);
1182 for (Int_t i=0; i<=array0->GetEntries(); i++){paramM(i,0)=param(i);}
1183
1184 if (filter.Length()==0){
1185 TStatToolkit::Update1D(mean, sigma, 0, paramM, covar);//
1186 }else{
1187 for (Int_t i=0; i<array0->GetEntries(); i++){
1188 Bool_t isOK=kTRUE;
1189 TString str(array0->At(i)->GetName());
1190 for (Int_t j=0; j<array1->GetEntries(); j++){
1191 if (str.Contains(array1->At(j)->GetName())==0) isOK=kFALSE;
1192 }
1193 if (isOK) {
1194 TStatToolkit::Update1D(mean, sigma, i+1, paramM, covar);//
1195 }
1196 }
1197 }
1198 for (Int_t i=0; i<=array0->GetEntries(); i++){
1199 param(i)=paramM(i,0);
1200 }
1201 delete array0;
1202 delete array1;
1203}
1204
1205TString TStatToolkit::MakeFitString(const TString &input, const TVectorD &param, const TMatrixD & covar, Bool_t verbose){
1206 //
1207 //
1208 //
1209 TObjArray *array0= input.Tokenize("++");
1210 TString result=Form("(%f",param[0]);
1211 printf("%f\t%f\t\n", param[0], TMath::Sqrt(covar(0,0)));
1212 for (Int_t i=0; i<array0->GetEntries(); i++){
1213 TString str(array0->At(i)->GetName());
1214 result+="+"+str;
1215 result+=Form("*(%f)",param[i+1]);
1216 if (verbose) printf("%f\t%f\t%s\n", param[i+1], TMath::Sqrt(covar(i+1,i+1)),str.Data());
1217 }
1218 result+="-0.)";
1219 delete array0;
1220 return result;
1221}
1222
1223TGraphErrors * TStatToolkit::MakeGraphErrors(TTree * tree, const char * expr, const char * cut, Int_t mstyle, Int_t mcolor, Float_t msize, Float_t offset){
1224 //
1225 // Query a graph errors
1226 // return TGraphErrors specified by expr and cut
1227 // Example usage TStatToolkit::MakeGraphError(tree,"Y:X:ErrY","X>0", 25,2,0.4)
1228 // tree - tree with variable
1229 // expr - examp
1230 const Int_t entries = tree->Draw(expr,cut,"goff");
1231 if (entries<=0) {
1232 TStatToolkit t;
1233 t.Error("TStatToolkit::MakeGraphError",Form("Empty or Not valid expression (%s) or cut *%s)", expr,cut));
1234 return 0;
1235 }
1236 if ( tree->GetV2()==0){
1237 TStatToolkit t;
1238 t.Error("TStatToolkit::MakeGraphError",Form("Not valid expression (%s) ", expr));
1239 return 0;
1240 }
1241 TGraphErrors * graph=0;
1242 if ( tree->GetV3()!=0){
1243 graph = new TGraphErrors (entries, tree->GetV2(),tree->GetV1(),0,tree->GetV3());
1244 }else{
1245 graph = new TGraphErrors (entries, tree->GetV2(),tree->GetV1(),0,0);
1246 }
1247 graph->SetMarkerStyle(mstyle);
1248 graph->SetMarkerColor(mcolor);
1249 graph->SetLineColor(mcolor);
1250 if (msize>0) graph->SetMarkerSize(msize);
1251 for(Int_t i=0;i<graph->GetN();i++) graph->GetX()[i]+=offset;
1252 return graph;
1253
1254}
1255
1256
1257TGraph * TStatToolkit::MakeGraphSparse(TTree * tree, const char * expr, const char * cut, Int_t mstyle, Int_t mcolor, Float_t msize, Float_t offset){
1258 //
1259 // Make a sparse draw of the variables
1260 // Format of expr : Var:Run or Var:Run:ErrorY or Var:Run:ErrorY:ErrorX
1261 // offset : points can slightly be shifted in x for better visibility with more graphs
1262 //
1263 // Written by Weilin.Yu
1264 // updated & merged with QA-code by Patrick Reichelt
1265 //
1266 const Int_t entries = tree->Draw(expr,cut,"goff");
1267 if (entries<=0) {
1268 TStatToolkit t;
1269 t.Error("TStatToolkit::MakeGraphSparse",Form("Empty or Not valid expression (%s) or cut (%s)", expr, cut));
1270 return 0;
1271 }
1272 // TGraph * graph = (TGraph*)gPad->GetPrimitive("Graph"); // 2D
1273
1274 Double_t *graphY, *graphX;
1275 graphY = tree->GetV1();
1276 graphX = tree->GetV2();
1277
1278 // sort according to run number
1279 Int_t *index = new Int_t[entries*4];
1280 TMath::Sort(entries,graphX,index,kFALSE);
1281
1282 // define arrays for the new graph
1283 Double_t *unsortedX = new Double_t[entries];
1284 Int_t *runNumber = new Int_t[entries];
1285 Double_t count = 0.5;
1286
1287 // evaluate arrays for the new graph according to the run-number
1288 Int_t icount=0;
1289 //first entry
1290 unsortedX[index[0]] = count;
1291 runNumber[0] = graphX[index[0]];
1292 // loop the rest of entries
1293 for(Int_t i=1;i<entries;i++)
1294 {
1295 if(graphX[index[i]]==graphX[index[i-1]])
1296 unsortedX[index[i]] = count;
1297 else if(graphX[index[i]]!=graphX[index[i-1]]){
1298 count++;
1299 icount++;
1300 unsortedX[index[i]] = count;
1301 runNumber[icount]=graphX[index[i]];
1302 }
1303 }
1304
1305 // count the number of xbins (run-wise) for the new graph
1306 const Int_t newNbins = int(count+0.5);
1307 Double_t *newBins = new Double_t[newNbins+1];
1308 for(Int_t i=0; i<=count+1;i++){
1309 newBins[i] = i;
1310 }
1311
1312 // define and fill the new graph
1313 TGraph *graphNew = 0;
1314 if (tree->GetV3()) {
1315 if (tree->GetV4()) {
1316 graphNew = new TGraphErrors(entries,unsortedX,graphY,tree->GetV4(),tree->GetV3());
1317 }
1318 else { graphNew = new TGraphErrors(entries,unsortedX,graphY,0,tree->GetV3()); }
1319 }
1320 else { graphNew = new TGraphErrors(entries,unsortedX,graphY,0,0); }
1321 // with "Set(...)", the x-axis is being sorted
1322 graphNew->GetXaxis()->Set(newNbins,newBins);
1323
1324 // set the bins for the x-axis, apply shifting of points
1325 Char_t xName[50];
1326 for(Int_t i=0;i<count;i++){
1327 snprintf(xName,50,"%d",runNumber[i]);
1328 graphNew->GetXaxis()->SetBinLabel(i+1,xName);
1329 graphNew->GetX()[i]+=offset;
1330 }
1331
1332 graphNew->GetHistogram()->SetTitle("");
1333 graphNew->SetMarkerStyle(mstyle);
1334 graphNew->SetMarkerColor(mcolor);
1335 if (msize>0) graphNew->SetMarkerSize(msize);
1336 delete [] unsortedX;
1337 delete [] runNumber;
1338 delete [] index;
1339 delete [] newBins;
1340 //
1341 return graphNew;
1342}
1343
1344
1345
1346//
1347// functions used for the trending
1348//
1349
1350Int_t TStatToolkit::MakeStatAlias(TTree * tree, const char * expr, const char * cut, const char * alias)
1351{
1352 //
1353 // Add alias using statistical values of a given variable.
1354 // (by MI, Patrick Reichelt)
1355 //
1356 // tree - input tree
1357 // expr - variable expression
1358 // cut - selection criteria
1359 // Output - return number of entries used to define variable
1360 // In addition mean, rms, median, and robust mean and rms (choosing fraction of data with smallest RMS)
1361 //
1362 /* Example usage:
1363 1.) create the robust estimators for variable expr="QA.TPC.CPass1.meanTPCncl" and create a corresponding
1364 aliases with the prefix alias[0]="ncl", calculated using fraction alias[1]="0.90"
1365
1366 TStatToolkit::MakeStatAlias(tree,"QA.TPC.CPass1.meanTPCncl","QA.TPC.CPass1.status>0","ncl:0.9");
1367 root [4] tree->GetListOfAliases().Print()
1368 OBJ: TNamed ncl_Median (130.964333+0)
1369 OBJ: TNamed ncl_Mean (122.120387+0)
1370 OBJ: TNamed ncl_RMS (33.509623+0)
1371 OBJ: TNamed ncl_Mean90 (131.503862+0)
1372 OBJ: TNamed ncl_RMS90 (3.738260+0)
1373 */
1374 //
1375 Int_t entries = tree->Draw(expr,cut,"goff");
1376 if (entries<=1){
1377 printf("Expression or cut not valid:\t%s\t%s\n", expr, cut);
1378 return 0;
1379 }
1380 //
1381 TObjArray* oaAlias = TString(alias).Tokenize(":");
1382 if (oaAlias->GetEntries()<2) return 0;
1383 Float_t entryFraction = atof( oaAlias->At(1)->GetName() );
1384 //
1385 Double_t median = TMath::Median(entries,tree->GetV1());
1386 Double_t mean = TMath::Mean(entries,tree->GetV1());
1387 Double_t rms = TMath::RMS(entries,tree->GetV1());
1388 Double_t meanEF=0, rmsEF=0;
1389 TStatToolkit::EvaluateUni(entries, tree->GetV1(), meanEF, rmsEF, entries*entryFraction);
1390 //
1391 tree->SetAlias(Form("%s_Median",oaAlias->At(0)->GetName()), Form("(%f+0)",median));
1392 tree->SetAlias(Form("%s_Mean",oaAlias->At(0)->GetName()), Form("(%f+0)",mean));
1393 tree->SetAlias(Form("%s_RMS",oaAlias->At(0)->GetName()), Form("(%f+0)",rms));
1394 tree->SetAlias(Form("%s_Mean%d",oaAlias->At(0)->GetName(),Int_t(entryFraction*100)), Form("(%f+0)",meanEF));
1395 tree->SetAlias(Form("%s_RMS%d",oaAlias->At(0)->GetName(),Int_t(entryFraction*100)), Form("(%f+0)",rmsEF));
1396 delete oaAlias;
1397 return entries;
1398}
1399
1400Int_t TStatToolkit::SetStatusAlias(TTree * tree, const char * expr, const char * cut, const char * alias)
1401{
1402 //
1403 // Add alias to trending tree using statistical values of a given variable.
1404 // (by MI, Patrick Reichelt)
1405 //
1406 // format of expr : varname (e.g. meanTPCncl)
1407 // format of cut : char like in TCut
1408 // format of alias: alias:query:entryFraction(EF) (fraction of entries used for uniformity evaluation)
1409 // e.g.: varname_Out:(abs(varname-meanEF)>6.*rmsEF):0.8
1410 // available internal variables are: 'varname, Median, Mean, MeanEF, RMS, RMSEF'
1411 // in the alias, 'varname' will be replaced by its content, and 'EF' by the percentage (e.g. MeanEF -> Mean80)
1412 //
1413 /* Example usage:
1414 1.) Define robust mean (possible, but easier done with TStatToolkit::MakeStatAlias(...))
1415 TStatToolkit::SetStatusAlias(tree, "meanTPCnclF", "meanTPCnclF>0", "meanTPCnclF_MeanEF:MeanEF:0.80") ;
1416 root [10] tree->GetListOfAliases()->Print()
1417 Collection name='TList', class='TList', size=1
1418 OBJ: TNamed meanTPCnclF_Mean80 0.899308
1419 2.) create alias outlyers - 6 sigma cut
1420 TStatToolkit::SetStatusAlias(tree, "meanTPCnclF", "meanTPCnclF>0", "meanTPCnclF_Out:(abs(meanTPCnclF-MeanEF)>6.*RMSEF):0.8")
1421 meanTPCnclF_Out ==> (abs(meanTPCnclF-0.899308)>6.*0.016590)
1422 3.) the same functionality as in 2.)
1423 TStatToolkit::SetStatusAlias(tree, "meanTPCnclF", "meanTPCnclF>0", "varname_Out2:(abs(varname-MeanEF)>6.*RMSEF):0.8")
1424 meanTPCnclF_Out2 ==> (abs(meanTPCnclF-0.899308)>6.*0.016590)
1425 */
1426 //
1427 Int_t entries = tree->Draw(expr,cut,"goff");
1428 if (entries<=1){
1429 printf("Expression or cut not valid:\t%s\t%s\n", expr, cut);
1430 return 0;
1431 }
1432 //
1433 TObjArray* oaVar = TString(expr).Tokenize(":");
1434 char varname[50];
1435 snprintf(varname,50,"%s", oaVar->At(0)->GetName());
1436 //
1437 TObjArray* oaAlias = TString(alias).Tokenize(":");
1438 if (oaAlias->GetEntries()<3) return 0;
1439 Float_t entryFraction = atof( oaAlias->At(2)->GetName() );
1440 //
1441 Double_t median = TMath::Median(entries,tree->GetV1());
1442 Double_t mean = TMath::Mean(entries,tree->GetV1());
1443 Double_t rms = TMath::RMS(entries,tree->GetV1());
1444 Double_t meanEF=0, rmsEF=0;
1445 TStatToolkit::EvaluateUni(entries, tree->GetV1(), meanEF, rmsEF, entries*entryFraction);
1446 //
1447 TString sAlias( oaAlias->At(0)->GetName() );
1448 sAlias.ReplaceAll("varname",varname);
1449 sAlias.ReplaceAll("MeanEF", Form("Mean%1.0f",entryFraction*100) );
1450 sAlias.ReplaceAll("RMSEF", Form("RMS%1.0f",entryFraction*100) );
1451 TString sQuery( oaAlias->At(1)->GetName() );
1452 sQuery.ReplaceAll("varname",varname);
1453 sQuery.ReplaceAll("MeanEF", Form("%f",meanEF) );
1454 sQuery.ReplaceAll("RMSEF", Form("%f",rmsEF) ); //make sure to replace 'RMSEF' before 'RMS'...
1455 sQuery.ReplaceAll("Median", Form("%f",median) );
1456 sQuery.ReplaceAll("Mean", Form("%f",mean) );
1457 sQuery.ReplaceAll("RMS", Form("%f",rms) );
1458 printf("define alias:\t%s = %s\n", sAlias.Data(), sQuery.Data());
1459 //
1460 char query[200];
1461 char aname[200];
1462 snprintf(query,200,"%s", sQuery.Data());
1463 snprintf(aname,200,"%s", sAlias.Data());
1464 tree->SetAlias(aname, query);
1465 delete oaVar;
1466 delete oaAlias;
1467 return entries;
1468}
1469
1470TMultiGraph* TStatToolkit::MakeStatusMultGr(TTree * tree, const char * expr, const char * cut, const char * alias, Int_t igr)
1471{
1472 //
1473 // Compute a trending multigraph that shows for which runs a variable has outliers.
1474 // (by MI, Patrick Reichelt)
1475 //
1476 // format of expr : varname:xaxis (e.g. meanTPCncl:run)
1477 // format of cut : char like in TCut
1478 // format of alias: (1):(varname_Out==0):(varname_Out)[:(varname_Warning):...]
1479 // in the alias, 'varname' will be replaced by its content (e.g. varname_Out -> meanTPCncl_Out)
1480 // note: the aliases 'varname_Out' etc have to be defined by function TStatToolkit::SetStatusAlias(...)
1481 // counter igr is used to shift the multigraph in y when filling a TObjArray.
1482 //
1483 TObjArray* oaVar = TString(expr).Tokenize(":");
1484 if (oaVar->GetEntries()<2) return 0;
1485 char varname[50];
1486 char var_x[50];
1487 snprintf(varname,50,"%s", oaVar->At(0)->GetName());
1488 snprintf(var_x ,50,"%s", oaVar->At(1)->GetName());
1489 //
1490 TString sAlias(alias);
1491 sAlias.ReplaceAll("varname",varname);
1492 TObjArray* oaAlias = TString(sAlias.Data()).Tokenize(":");
1493 if (oaAlias->GetEntries()<3) return 0;
1494 //
1495 char query[200];
1496 TMultiGraph* multGr = new TMultiGraph();
1497 Int_t marArr[6] = {24+igr%2, 20+igr%2, 20+igr%2, 20+igr%2, 22, 23};
1498 Int_t colArr[6] = {kBlack, kBlack, kRed, kOrange, kMagenta, kViolet};
1499 Double_t sizArr[6] = {1.2, 1.1, 1.0, 1.0, 1, 1};
1500 const Int_t ngr = oaAlias->GetEntriesFast();
1501 for (Int_t i=0; i<ngr; i++){
1502 if (i==2) continue; // the Fatal(Out) graph will be added in the end to be plotted on top!
1503 snprintf(query,200, "%f*(%s-0.5):%s", 1.+igr, oaAlias->At(i)->GetName(), var_x);
1504 multGr->Add( (TGraphErrors*) TStatToolkit::MakeGraphSparse(tree,query,cut,marArr[i],colArr[i],sizArr[i]) );
1505 }
1506 snprintf(query,200, "%f*(%s-0.5):%s", 1.+igr, oaAlias->At(2)->GetName(), var_x);
1507 multGr->Add( (TGraphErrors*) TStatToolkit::MakeGraphSparse(tree,query,cut,marArr[2],colArr[2],sizArr[2]) );
1508 //
1509 multGr->SetName(varname);
1510 multGr->SetTitle(varname); // used for y-axis labels. // details to be included!
1511 delete oaVar;
1512 delete oaAlias;
1513 return multGr;
1514}
1515
1516
1517void TStatToolkit::AddStatusPad(TCanvas* c1, Float_t padratio, Float_t bottommargin)
1518{
1519 //
1520 // add pad to bottom of canvas for Status graphs (by Patrick Reichelt)
1521 // call function "DrawStatusGraphs(...)" afterwards
1522 //
1523 TCanvas* c1_clone = (TCanvas*) c1->Clone("c1_clone");
1524 c1->Clear();
1525 // produce new pads
1526 c1->cd();
1527 TPad* pad1 = new TPad("pad1", "pad1", 0., padratio, 1., 1.);
1528 pad1->Draw();
1529 pad1->SetNumber(1); // so it can be called via "c1->cd(1);"
1530 c1->cd();
1531 TPad* pad2 = new TPad("pad2", "pad2", 0., 0., 1., padratio);
1532 pad2->Draw();
1533 pad2->SetNumber(2);
1534 // draw original canvas into first pad
1535 c1->cd(1);
1536 c1_clone->DrawClonePad();
1537 pad1->SetBottomMargin(0.001);
1538 pad1->SetRightMargin(0.01);
1539 // set up second pad
1540 c1->cd(2);
1541 pad2->SetGrid(3);
1542 pad2->SetTopMargin(0);
1543 pad2->SetBottomMargin(bottommargin); // for the long x-axis labels (runnumbers)
1544 pad2->SetRightMargin(0.01);
1545}
1546
1547
1548void TStatToolkit::DrawStatusGraphs(TObjArray* oaMultGr)
1549{
1550 //
1551 // draw Status graphs into active pad of canvas (by MI, Patrick Reichelt)
1552 // ...into bottom pad, if called after "AddStatusPad(...)"
1553 //
1554 const Int_t nvars = oaMultGr->GetEntriesFast();
1555 TGraph* grAxis = (TGraph*) ((TMultiGraph*) oaMultGr->At(0))->GetListOfGraphs()->At(0);
1556 grAxis->SetMaximum(0.5*nvars+0.5);
1557 grAxis->SetMinimum(0);
1558 grAxis->GetYaxis()->SetLabelSize(0);
1559 Int_t entries = grAxis->GetN();
1560 printf("entries (via GetN()) = %d\n",entries);
1561 grAxis->GetXaxis()->SetLabelSize(5.7*TMath::Min(TMath::Max(5./entries,0.01),0.03));
1562 grAxis->GetXaxis()->LabelsOption("v");
1563 grAxis->Draw("ap");
1564 //
1565 // draw multigraphs & names of status variables on the y axis
1566 for (Int_t i=0; i<nvars; i++){
1567 ((TMultiGraph*) oaMultGr->At(i))->Draw("p");
1568 TLatex* ylabel = new TLatex(-0.1, 0.5*i+0.5, ((TMultiGraph*) oaMultGr->At(i))->GetTitle());
1569 ylabel->SetTextAlign(32); //hor:right & vert:centered
1570 ylabel->SetTextSize(0.025/gPad->GetHNDC());
1571 ylabel->Draw();
1572 }
1573}
1574
1575
1576void TStatToolkit::DrawHistogram(TTree * tree, const char* drawCommand, const char* cuts, const char* histoname, const char* histotitle, Int_t nsigma, Float_t fraction )
1577{
1578 //
1579 // Draw histogram from TTree with robust range
1580 // Only for 1D so far!
1581 //
1582 // Parameters:
1583 // - histoname: name of histogram
1584 // - histotitle: title of histgram
1585 // - fraction: fraction of data to define the robust mean
1586 // - nsigma: nsigma value for range
1587 //
1588
1589 TString drawStr(drawCommand);
1590 TString cutStr(cuts);
1591 Int_t dim = 1;
1592
1593 if(!tree) {
1594 cerr<<" Tree pointer is NULL!"<<endl;
1595 return;
1596 }
1597
1598 // get entries
1599 Int_t entries = tree->Draw(drawStr.Data(), cutStr.Data(), "goff");
1600 if (entries == -1) {
1601 cerr<<"TTree draw returns -1"<<endl;
1602 return;
1603 }
1604
1605 // get dimension
1606 if(tree->GetV1()) dim = 1;
1607 if(tree->GetV2()) dim = 2;
1608 if(tree->GetV3()) dim = 3;
1609 if(dim > 2){
1610 cerr<<"TTree has more than 2 dimensions (not yet supported)"<<endl;
1611 return;
1612 }
1613
1614 // draw robust
1615 Double_t meanX, rmsX=0;
1616 Double_t meanY, rmsY=0;
1617 TStatToolkit::EvaluateUni(entries, tree->GetV1(),meanX,rmsX, fraction*entries);
1618 if(dim==2){
1619 TStatToolkit::EvaluateUni(entries, tree->GetV1(),meanY,rmsY, fraction*entries);
1620 TStatToolkit::EvaluateUni(entries, tree->GetV2(),meanX,rmsX, fraction*entries);
1621 }
1622 TH1* hOut;
1623 if(dim==1){
1624 hOut = new TH1F(histoname, histotitle, 200, meanX-nsigma*rmsX, meanX+nsigma*rmsX);
1625 for (Int_t i=0; i<entries; i++) hOut->Fill(tree->GetV1()[i]);
1626 hOut->GetXaxis()->SetTitle(tree->GetHistogram()->GetXaxis()->GetTitle());
1627 hOut->Draw();
1628 }
1629 else if(dim==2){
1630 hOut = new TH2F(histoname, histotitle, 200, meanX-nsigma*rmsX, meanX+nsigma*rmsX,200, meanY-nsigma*rmsY, meanY+nsigma*rmsY);
1631 for (Int_t i=0; i<entries; i++) hOut->Fill(tree->GetV2()[i],tree->GetV1()[i]);
1632 hOut->GetXaxis()->SetTitle(tree->GetHistogram()->GetXaxis()->GetTitle());
1633 hOut->GetYaxis()->SetTitle(tree->GetHistogram()->GetYaxis()->GetTitle());
1634 hOut->Draw("colz");
1635 }
1636
1637}