]> git.uio.no Git - u/mrichter/AliRoot.git/blob - STAT/TKDInterpolator.cxx
CPU and Memory tests, coding violations fixed, library
[u/mrichter/AliRoot.git] / STAT / TKDInterpolator.cxx
1 #include "TKDInterpolator.h"
2
3 #include "TLinearFitter.h"
4 #include "TTree.h"
5 #include "TH2.h"
6 #include "TObjArray.h"
7 #include "TObjString.h"
8 #include "TBox.h"
9 #include "TGraph.h"
10 #include "TMarker.h"
11 #include "TVectorD.h"
12 #include "TMatrixD.h"
13
14 ClassImp(TKDInterpolator)
15 ClassImp(TKDInterpolator::TKDNodeInfo)
16
17 /////////////////////////////////////////////////////////////////////
18 // Memory setup of protected data members
19 // fRefPoints : evaluation point of PDF for each terminal node of underlying KD Tree.
20 // | 1st terminal node (fNDim point coordinates) | 2nd terminal node (fNDim point coordinates) | ...
21 //
22 // fRefValues : evaluation value/error of PDF for each terminal node of underlying KD Tree.
23 // | 1st terminal node (value) | 2nd terminal node (value) | ... | 1st terminal node (error) | 2nd terminal node (error) | ...
24 //
25 // status = |0|0|0|0|0|1(tri-cubic weights)|1(STORE)|1 INT(0 COG )|
26 /////////////////////////////////////////////////////////////////////
27
28 //_________________________________________________________________
29 TKDInterpolator::TKDNodeInfo::TKDNodeInfo(const Int_t dim): 
30         fNDim(dim)
31         ,fRefPoint(0x0)
32         ,fRefValue(0.)
33         ,fCov(0x0)
34         ,fPar(0x0)
35 {
36         if(fNDim) Build(dim);
37 }
38
39 //_________________________________________________________________
40 TKDInterpolator::TKDNodeInfo::~TKDNodeInfo()
41 {
42         if(fRefPoint) delete [] fRefPoint;
43         if(fCov){
44                 delete fPar;
45                 delete fCov;
46         }
47 }
48
49 //_________________________________________________________________
50 void TKDInterpolator::TKDNodeInfo::Build(const Int_t dim)
51 {
52 // Allocate/Reallocate space for this node.
53
54         if(!dim) return;
55
56         fNDim = dim;
57         Int_t lambda = Int_t(1 + fNDim + .5*fNDim*(fNDim+1));
58         if(fRefPoint) delete [] fRefPoint;
59         fRefPoint = new Float_t[fNDim];
60         if(fCov){
61                 fCov->ResizeTo(lambda, lambda);
62                 fPar->ResizeTo(lambda);
63         }
64         return;
65 }
66
67 //_________________________________________________________________
68 void TKDInterpolator::TKDNodeInfo::Store(const TVectorD &par, const TMatrixD &cov)
69 {
70         if(!fCov){
71                 fCov = new TMatrixD(cov.GetNrows(), cov.GetNrows());
72                 fPar = new TVectorD(par.GetNrows());
73         }
74         (*fPar) = par;
75         (*fCov) = cov;
76 }
77
78 //_________________________________________________________________
79 Double_t TKDInterpolator::TKDNodeInfo::CookPDF(const Double_t *point, Double_t &result, Double_t &error)
80 {
81 // Recalculate the PDF for one node from the results of interpolation (parameters and covariance matrix)
82
83         if(fNDim>10) return 0.; // support only up to 10 dimensions
84         
85         Int_t lambda = 1 + fNDim + (fNDim*(fNDim+1)>>1);
86         Double_t fdfdp[66];
87         Int_t ipar = 0;
88         fdfdp[ipar++] = 1.;
89         for(int idim=0; idim<fNDim; idim++){
90                 fdfdp[ipar++] = point[idim];
91                 for(int jdim=idim; jdim<fNDim; jdim++) fdfdp[ipar++] = point[idim]*point[jdim];
92         }
93
94         // calculate estimation
95         result =0.; error = 0.;
96         for(int i=0; i<lambda; i++){
97                 result += fdfdp[i]*(*fPar)(i);
98                 for(int j=0; j<lambda; j++) error += fdfdp[i]*fdfdp[j]*(*fCov)(i,j);
99         }       
100         error = TMath::Sqrt(error);
101         
102         //printf("TKDNodeInfo::CookPDF() : %6.3f +- %6.3f\n", result, error);
103
104         return 0.;
105 }
106
107
108 //_________________________________________________________________
109 TKDInterpolator::TKDInterpolator() : TKDTreeIF()
110         ,fNTNodes(0)
111         ,fTNodes(0x0)
112         ,fStatus(4)
113         ,fLambda(0)
114         ,fDepth(-1)
115         ,fAlpha(.5)
116         ,fRefPoints(0x0)
117         ,fBuffer(0x0)
118         ,fKDhelper(0x0)
119         ,fFitter(0x0)
120 {
121 // Default constructor. To be used with care since in this case building
122 // of data structure is completly left to the user responsability.
123 }
124
125 //_________________________________________________________________
126 TKDInterpolator::TKDInterpolator(Int_t npoints, Int_t ndim, UInt_t bsize, Float_t **data) : TKDTreeIF(npoints, ndim, bsize, data)
127         ,fNTNodes(GetNTNodes())
128         ,fTNodes(0x0)
129         ,fStatus(4)
130         ,fLambda(0)
131         ,fDepth(-1)
132         ,fAlpha(.5)
133         ,fRefPoints(0x0)
134         ,fBuffer(0x0)
135         ,fKDhelper(0x0)
136         ,fFitter(0x0)
137 {
138 // Wrapper constructor for the TKDTree.
139
140         Build();
141 }
142
143
144 //_________________________________________________________________
145 TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut, UInt_t bsize, Long64_t nentries, Long64_t firstentry) : TKDTreeIF()
146         ,fNTNodes(0)
147         ,fTNodes(0x0)
148         ,fStatus(4)
149         ,fLambda(0)
150         ,fDepth(-1)
151         ,fAlpha(.5)
152         ,fRefPoints(0x0)
153         ,fBuffer(0x0)
154         ,fKDhelper(0x0)
155         ,fFitter(0x0)
156 {
157 // Alocate data from a tree. The variables which have to be analysed are
158 // defined in the "var" parameter as a colon separated list. The format should
159 // be identical to that used by TTree::Draw().
160 //
161 // 
162
163         TObjArray *vars = TString(var).Tokenize(":");
164         fNDim = vars->GetEntriesFast(); fNDimm = 2*fNDim;
165         if(fNDim > 6/*kDimMax*/) Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Variable number exceed maximum dimension %d. Results are unpredictable.", 6/*kDimMax*/));
166         fBucketSize = bsize;
167
168         Int_t np;
169         Double_t *v;
170         for(int idim=0; idim<fNDim; idim++){
171                 if(!(np = t->Draw(((TObjString*)(*vars)[idim])->GetName(), cut, "goff", nentries, firstentry))){
172                         Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Can not access data for keys %s. Key defined on tree :", ((TObjString*)(*vars)[idim])->GetName() ));
173                         TIterator *it = (t->GetListOfLeaves())->MakeIterator();
174                         TObject *o;
175                         while((o = (*it)())) printf("\t%s\n", o->GetName());
176                         continue;
177                 }
178                 if(!fNpoints){
179                         fNpoints = np;
180                         //Info("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Allocating %d data points in %d dimensions.", fNpoints, fNDim));
181                         fData = new Float_t*[fNDim];
182                         for(int idim=0; idim<fNDim; idim++) fData[idim] = new Float_t[fNpoints];
183                         kDataOwner = kTRUE;
184                 }
185                 v = t->GetV1();
186                 for(int ip=0; ip<fNpoints; ip++) fData[idim][ip] = (Float_t)v[ip];
187         }
188         TKDTreeIF::Build();
189         Build();
190 }
191
192 //_________________________________________________________________
193 TKDInterpolator::~TKDInterpolator()
194 {
195         if(fFitter) delete fFitter;
196         if(fKDhelper) delete fKDhelper;
197         if(fBuffer) delete [] fBuffer;
198         
199         if(fRefPoints){
200                 for(int idim=0; idim<fNDim; idim++) delete [] fRefPoints[idim] ;
201                 delete [] fRefPoints;
202         }
203         if(fTNodes) delete [] fTNodes;
204 }
205
206 //_________________________________________________________________
207 void TKDInterpolator::Build()
208 {
209 // Fill interpolator's data array i.e.
210 //  - estimation points 
211 //  - corresponding PDF values
212
213         fNTNodes = TKDTreeIF::GetNTNodes();
214         if(!fBoundaries) MakeBoundaries();
215         fLambda = 1 + fNDim + (fNDim*(fNDim+1)>>1);
216         //printf("after MakeBoundaries() %d\n", memory());
217         
218         // allocate memory for data
219         fTNodes = new TKDNodeInfo[fNTNodes];
220         for(int in=0; in<fNTNodes; in++) fTNodes[in].Build(fNDim);
221         //printf("after BuildNodes() %d\n", memory());
222
223         Float_t *bounds = 0x0;
224         Int_t *indexPoints;
225         for(int inode=0, tnode = fNnodes; inode<fNTNodes-1; inode++, tnode++){
226                 fTNodes[inode].fRefValue =  Float_t(fBucketSize)/fNpoints;
227                 bounds = GetBoundary(tnode);
228                 for(int idim=0; idim<fNDim; idim++) fTNodes[inode].fRefValue /= (bounds[2*idim+1] - bounds[2*idim]);
229                 
230                 indexPoints = GetPointsIndexes(tnode);
231                 // loop points in this terminal node
232                 for(int idim=0; idim<fNDim; idim++){
233                         fTNodes[inode].fRefPoint[idim] = 0.;
234                         for(int ip = 0; ip<fBucketSize; ip++){
235 /*                              printf("\t\tindex[%d] = %d %f\n", ip, indexPoints[ip], fData[idim][indexPoints[ip]]);*/
236                                 fTNodes[inode].fRefPoint[idim] += fData[idim][indexPoints[ip]];
237                         }
238                         fTNodes[inode].fRefPoint[idim] /= fBucketSize;
239                 }
240         }
241
242         // analyze last (incomplete) terminal node
243         Int_t counts = fNpoints%fBucketSize;
244         counts = counts ? counts : fBucketSize;
245         Int_t inode = fNTNodes - 1, tnode = inode + fNnodes;
246         fTNodes[inode].fRefValue =  Float_t(counts)/fNpoints;
247         bounds = GetBoundary(tnode);
248         for(int idim=0; idim<fNDim; idim++) fTNodes[inode].fRefValue /= (bounds[2*idim+1] - bounds[2*idim]);
249
250         // loop points in this terminal node
251         indexPoints = GetPointsIndexes(tnode);
252         for(int idim=0; idim<fNDim; idim++){
253                 fTNodes[inode].fRefPoint[idim] = 0.;
254                 for(int ip = 0; ip<counts; ip++) fTNodes[inode].fRefPoint[idim] += fData[idim][indexPoints[ip]];
255                 fTNodes[inode].fRefPoint[idim] /= counts;
256         }
257 }
258
259 //__________________________________________________________________
260 void TKDInterpolator::GetStatus()
261 {
262 // Prints the status of the interpolator
263
264         printf("Interpolator Status :\n");
265         printf("  Method : %s\n", fStatus&1 ? "INT" : "COG");
266         printf("  Store  : %s\n", fStatus&2 ? "YES" : "NO");
267         printf("  Weights: %s\n", fStatus&4 ? "YES" : "NO");
268         return;
269         
270         printf("fNTNodes %d\n", fNTNodes);        //Number of evaluation data points
271         for(int i=0; i<fNTNodes; i++){
272                 printf("%d ", i);
273                 for(int idim=0; idim<fNDim; idim++) printf("%f ", fTNodes[i].fRefPoint[idim]);
274                 printf("[%f] %s\n", fTNodes[i].fRefValue, fTNodes[i].fCov ? "true" : "false");
275                 printf("Fit parameters : ");
276                 if(!fTNodes[i].fPar){
277                         printf("Not defined.\n");
278                         continue;
279                 }
280                 for(int ip=0; ip<3; ip++) printf("p%d[%f] ", ip, (*fTNodes[i].fPar)(ip));
281                 printf("\n");
282         }
283 }
284
285 //_________________________________________________________________
286 Double_t TKDInterpolator::Eval(const Double_t *point, Double_t &result, Double_t &error, Bool_t force)
287 {
288 // Evaluate PDF for "point". The result is returned in "result" and error in "error". The function returns the chi2 of the fit.
289 //
290 // Observations:
291 //
292 // 1. The default method used for interpolation is kCOG.
293 // 2. The initial number of neighbors used for the estimation is set to Int(alpha*fLambda) (alpha = 1.5)
294                         
295         Float_t pointF[50]; // local Float_t conversion for "point"
296         for(int idim=0; idim<fNDim; idim++) pointF[idim] = (Float_t)point[idim];
297         Int_t node = FindNode(pointF) - fNnodes;
298         if((fStatus&1) && fTNodes[node].fCov && !force) return fTNodes[node].CookPDF(point, result, error);
299
300         // Allocate memory
301         if(!fBuffer) fBuffer = new Double_t[2*fLambda];
302         if(!fKDhelper){ 
303                 fRefPoints = new Float_t*[fNDim];
304                 for(int id=0; id<fNDim; id++){ 
305                         fRefPoints[id] = new Float_t[fNTNodes];
306                         for(int in=0; in<fNTNodes; in++) fRefPoints[id][in] = fTNodes[in].fRefPoint[id];
307                 }
308 //              for(int in=0; in<fNTNodes; in++){
309 //                      printf("%3d ", in);
310 //                      for(int id=0; id<fNDim; id++) printf("%6.3f ", fTNodes[in].fRefPoint[id]/*fRefPoints[id][in]*/);
311 //                      printf("\n");
312 //              }
313                 fKDhelper = new TKDTreeIF(fNTNodes, fNDim, 30, fRefPoints);
314                 fKDhelper->MakeBoundaries();
315         }
316         if(!fFitter) fFitter = new TLinearFitter(fLambda, Form("hyp%d", fLambda-1));
317         
318         // generate parabolic for nD
319         //Float_t alpha = Float_t(2*lambda + 1) / fNTNodes; // the bandwidth or smoothing parameter
320         //Int_t npoints = Int_t(alpha * fNTNodes);
321         //printf("Params : %d NPoints %d\n", lambda, npoints);
322         // prepare workers
323
324         Int_t *index,  // indexes of NN 
325               ipar,    // local looping variable
326                                 npoints = Int_t((1.+fAlpha)*fLambda); // number of data points used for interpolation
327         Float_t *dist, // distances of NN
328                                         d,     // NN normalized distance
329                                         w0,    // work
330                                         w;     // tri-cubic weight function
331         Double_t sig   // bucket error 
332                 = TMath::Sqrt(1./fBucketSize);
333
334         do{
335                 // find nearest neighbors
336                 for(int idim=0; idim<fNDim; idim++) pointF[idim] = (Float_t)point[idim];
337                 if(!fKDhelper->FindNearestNeighbors(pointF, npoints+1, index, dist)){
338                         Error("Eval()", Form("Failed retriving %d neighbours for point:", npoints));
339                         for(int idim=0; idim<fNDim; idim++) printf("%f ", point[idim]);
340                         printf("\n");
341                         return -1;
342                 }
343                 // add points to fitter
344                 fFitter->ClearPoints();
345                 TKDNodeInfo *node = 0x0;
346                 for(int in=0; in<npoints; in++){
347                         node = &fTNodes[index[in]];
348                         if(fStatus&1){ // INT
349                                 Float_t *bounds = GetBoundary(FindNode(node->fRefPoint));
350                                 ipar = 0;
351                                 for(int idim=0; idim<fNDim; idim++){
352                                         fBuffer[ipar++] = .5*(bounds[2*idim] + bounds[2*idim+1]);
353                                         fBuffer[ipar++] = (bounds[2*idim]*bounds[2*idim] + bounds[2*idim] * bounds[2*idim+1] + bounds[2*idim+1] * bounds[2*idim+1])/3.;
354                                         for(int jdim=idim+1; jdim<fNDim; jdim++) fBuffer[ipar++] = (bounds[2*idim] + bounds[2*idim+1]) * (bounds[2*jdim] + bounds[2*jdim+1]) * .25;
355                                 }
356                         } else { // COG
357                                 Float_t *p = node->fRefPoint;
358                                 ipar = 0;
359                                 for(int idim=0; idim<fNDim; idim++){
360                                         fBuffer[ipar++] = p[idim];
361                                         for(int jdim=idim; jdim<fNDim; jdim++) fBuffer[ipar++] = p[idim]*p[jdim];
362                                 }
363                         }
364
365                         // calculate tri-cubic weighting function
366                         if(fStatus&4){
367                                 d = dist[in]/ dist[npoints];
368                                 w0 = (1. - d*d*d); w = w0*w0*w0;
369                         } else w = 1.;
370                          
371                         //for(int idim=0; idim<fNDim; idim++) printf("%f ", fBuffer[idim]);
372                         //printf("\nd[%f] w[%f] sig[%f]\n", d, w, sig);
373                         fFitter->AddPoint(fBuffer, node->fRefValue, node->fRefValue*sig/w);
374                 }
375                 npoints += 4;
376         } while(fFitter->Eval());
377
378         // retrive fitter results
379         TMatrixD cov(fLambda, fLambda);
380         TVectorD par(fLambda);
381         fFitter->GetCovarianceMatrix(cov);
382         fFitter->GetParameters(par);
383         Double_t chi2 = fFitter->GetChisquare()/(npoints - 4 - fLambda);
384
385         // store results
386         if(fStatus&2 && fStatus&1) fTNodes[node].Store(par, cov);
387                 
388         // Build df/dpi|x values
389         Double_t *fdfdp = &fBuffer[fLambda];
390         ipar = 0;
391         fdfdp[ipar++] = 1.;
392         for(int idim=0; idim<fNDim; idim++){
393                 fdfdp[ipar++] = point[idim];
394                 for(int jdim=idim; jdim<fNDim; jdim++) fdfdp[ipar++] = point[idim]*point[jdim];
395         }
396
397         // calculate estimation
398         result =0.; error = 0.;
399         for(int i=0; i<fLambda; i++){
400                 result += fdfdp[i]*par(i);
401                 for(int j=0; j<fLambda; j++) error += fdfdp[i]*fdfdp[j]*cov(i,j);
402         }       
403         error = TMath::Sqrt(error);
404
405         return chi2;
406 }
407
408 //_________________________________________________________________
409 void TKDInterpolator::DrawNodes(UInt_t ax1, UInt_t ax2, Int_t depth)
410 {
411 // Draw nodes structure projected on plane "ax1:ax2". The parameter
412 // "depth" specifies the bucket size per node. If depth == -1 draw only
413 // terminal nodes and evaluation points (default -1 i.e. bucket size per node equal bucket size specified by the user)
414 //
415 // Observation:
416 // This function creates the nodes (TBox) array for the specified depth
417 // but don't delete it. Abusing this function may cause memory leaks !
418
419
420         if(!fBoundaries) MakeBoundaries();
421
422         // Count nodes in specific view
423         Int_t nnodes = 0;
424         for(int inode = 0; inode <= 2*fNnodes; inode++){
425                 if(depth == -1){
426                         if(!IsTerminal(inode)) continue;
427                 } else if((inode+1) >> depth != 1) continue;
428                 nnodes++;
429         }
430
431         //printf("depth %d nodes %d\n", depth, nnodes);
432         
433         TH2 *h2 = new TH2S("hNodes", "", 100, fRange[2*ax1], fRange[2*ax1+1], 100, fRange[2*ax2], fRange[2*ax2+1]);
434         h2->GetXaxis()->SetTitle(Form("x_{%d}", ax1));
435         h2->GetYaxis()->SetTitle(Form("x_{%d}", ax2));
436         h2->Draw();
437         
438         const Float_t kBorder = 0.;//1.E-4;
439         TBox *nodeArray = new TBox[nnodes], *node;
440         Float_t *bounds = 0x0;
441         nnodes = 0;
442         for(int inode = 0; inode <= 2*fNnodes; inode++){
443                 if(depth == -1){
444                         if(!IsTerminal(inode)) continue;
445                 } else if((inode+1) >> depth != 1) continue;
446
447                 node = &nodeArray[nnodes++];
448                 //node = new TBox(bounds[2*ax1]+border, bounds[2*ax2]+border, bounds[2*ax1+1]-border, bounds[2*ax2+1]-border);
449                 node->SetFillStyle(3002);       
450                 node->SetFillColor(50+inode/*Int_t(gRandom->Uniform()*50.)*/);
451                 bounds = GetBoundary(inode);
452                 node->DrawBox(bounds[2*ax1]+kBorder, bounds[2*ax2]+kBorder, bounds[2*ax1+1]-kBorder, bounds[2*ax2+1]-kBorder);
453         }
454         if(depth != -1) return;
455
456         // Draw reference points
457         TGraph *ref = new TGraph(fNTNodes);
458         ref->SetMarkerStyle(3);
459         ref->SetMarkerSize(.7);
460         ref->SetMarkerColor(2);
461         for(int inode = 0; inode < fNTNodes; inode++) ref->SetPoint(inode, fTNodes[inode].fRefPoint[ax1], fTNodes[inode].fRefPoint[ax2]);
462         ref->Draw("p");
463         return;
464 }
465
466 //_________________________________________________________________
467 void TKDInterpolator::DrawNode(Int_t tnode, UInt_t ax1, UInt_t ax2)
468 {
469 // Draw node "node" and the data points within.
470 //
471 // Observation:
472 // This function creates some graphical objects
473 // but don't delete it. Abusing this function may cause memory leaks !
474
475         if(tnode < 0 || tnode >= fNTNodes){
476                 Warning("DrawNode()", Form("Terminal node %d outside defined range.", tnode));
477                 return;
478         }
479
480         Int_t inode = tnode;
481         tnode += fNnodes;
482         // select zone of interest in the indexes array
483         Int_t *index = GetPointsIndexes(tnode);
484         Int_t nPoints = (tnode == 2*fNnodes) ? fNpoints%fBucketSize : fBucketSize;
485
486         // draw data points
487         TGraph *g = new TGraph(nPoints);
488         g->SetMarkerStyle(7);
489         for(int ip = 0; ip<nPoints; ip++) g->SetPoint(ip, fData[ax1][index[ip]], fData[ax2][index[ip]]);
490
491         // draw estimation point
492         TMarker *m=new TMarker(fTNodes[inode].fRefPoint[ax1], fTNodes[inode].fRefPoint[ax2], 20);
493         m->SetMarkerColor(2);
494         m->SetMarkerSize(1.7);
495         
496         // draw node contour
497         Float_t *bounds = GetBoundary(tnode);
498         TBox *n = new TBox(bounds[2*ax1], bounds[2*ax2], bounds[2*ax1+1], bounds[2*ax2+1]);
499         n->SetFillStyle(0);
500
501         g->Draw("ap");
502         m->Draw();
503         n->Draw();
504         
505         return;
506 }
507
508
509 //__________________________________________________________________
510 void TKDInterpolator::SetInterpolationMethod(const Bool_t on)
511 {
512 // Set interpolation bit to "on".
513         
514         if(on) fStatus += fStatus&1 ? 0 : 1;
515         else fStatus += fStatus&1 ? -1 : 0;
516 }
517
518
519 //_________________________________________________________________
520 void TKDInterpolator::SetStore(const Bool_t on)
521 {
522 // Set store bit to "on"
523         
524         if(on) fStatus += fStatus&2 ? 0 : 2;
525         else fStatus += fStatus&2 ? -2 : 0;
526 }
527
528 //_________________________________________________________________
529 void TKDInterpolator::SetWeights(const Bool_t on)
530 {
531 // Set weights bit to "on"
532         
533         if(on) fStatus += fStatus&4 ? 0 : 4;
534         else fStatus += fStatus&4 ? -4 : 0;
535 }