Implementation of local interpolation based on COG points
[u/mrichter/AliRoot.git] / STAT / TKDInterpolator.cxx
CommitLineData
f2040a8f 1#include "TKDInterpolator.h"
2
3#include "TLinearFitter.h"
4#include "TVector.h"
5#include "TTree.h"
6#include "TH2.h"
7#include "TObjArray.h"
8#include "TObjString.h"
df84bc73 9#include "TPad.h"
f2040a8f 10#include "TBox.h"
11#include "TGraph.h"
12#include "TMarker.h"
df84bc73 13#include "TRandom.h"
14#include "TROOT.h"
f2040a8f 15
16
17
18ClassImp(TKDInterpolator)
19
20/////////////////////////////////////////////////////////////////////
21// Memory setup of protected data memebers
22// fRefPoints : evaluation point of PDF for each terminal node of underlying KD Tree.
23// | 1st terminal node (fNDim point coordinates) | 2nd terminal node (fNDim point coordinates) | ...
24//
25// fRefValues : evaluation value/error of PDF for each terminal node of underlying KD Tree.
26// | 1st terminal node (value) | 2nd terminal node (value) | ... | 1st terminal node (error) | 2nd terminal node (error) | ...
27/////////////////////////////////////////////////////////////////////
28
29//_________________________________________________________________
30TKDInterpolator::TKDInterpolator() : TKDTreeIF()
31 ,fNTNodes(0)
32 ,fRefPoints(0x0)
33 ,fRefValues(0x0)
34 ,fDepth(-1)
35 ,fTmpPoint(0x0)
36 ,fKDhelper(0x0)
37 ,fFitter(0x0)
38{
df84bc73 39// Default constructor. To be used with care since in this case building
40// of data structure is completly left to the user responsability.
f2040a8f 41}
42
43//_________________________________________________________________
44TKDInterpolator::TKDInterpolator(Int_t npoints, Int_t ndim, UInt_t bsize, Float_t **data) : TKDTreeIF(npoints, ndim, bsize, data)
45 ,fNTNodes(GetNTerminalNodes())
46 ,fRefPoints(0x0)
47 ,fRefValues(0x0)
48 ,fDepth(-1)
49 ,fTmpPoint(0x0)
50 ,fKDhelper(0x0)
51 ,fFitter(0x0)
52{
df84bc73 53// Wrapper constructor for the similar TKDTree one.
54
f2040a8f 55 Build();
56}
57
58
59//_________________________________________________________________
60TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut, UInt_t bsize) : TKDTreeIF()
61 ,fNTNodes(0)
62 ,fRefPoints(0x0)
63 ,fRefValues(0x0)
64 ,fDepth(-1)
65 ,fTmpPoint(0x0)
66 ,fKDhelper(0x0)
67 ,fFitter(0x0)
68{
69// Alocate data from a tree. The variables which have to be analysed are
70// defined in the "var" parameter as a colon separated list. The format should
71// be identical to that used by TTree::Draw().
72//
73//
74
f2040a8f 75 TObjArray *vars = TString(var).Tokenize(":");
76 fNDim = vars->GetEntriesFast();
df84bc73 77 if(fNDim > 6/*kDimMax*/) Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Variable number exceed maximum dimension %d. Results are unpredictable.", 6/*kDimMax*/));
f2040a8f 78 fBucketSize = bsize;
79
df84bc73 80 Int_t np;
f2040a8f 81 Double_t *v;
82 for(int idim=0; idim<fNDim; idim++){
df84bc73 83 if(!(np = t->Draw(((TObjString*)(*vars)[idim])->GetName(), cut, "goff"))){
84 Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Can not access data for %s", ((TObjString*)(*vars)[idim])->GetName() ));
f2040a8f 85 continue;
86 }
df84bc73 87 if(!fNpoints){
88 fNpoints = np;
89 Info("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Allocating %d data points in %d dimensions.", fNpoints, fNDim));
90 //Float_t *mem = new Float_t[fNDim*fNpoints];
91 fData = new Float_t*[fNDim];
92 for(int idim=0; idim<fNDim; idim++) fData[idim] = new Float_t[fNpoints]; //&mem[idim*fNpoints];
93 kDataOwner = kTRUE;
94 }
f2040a8f 95 v = t->GetV1();
96 for(int ip=0; ip<fNpoints; ip++) fData[idim][ip] = (Float_t)v[ip];
97 }
98 TKDTreeIF::Build();
99 fNTNodes = GetNTerminalNodes();
100 Build();
101}
102
103//_________________________________________________________________
104TKDInterpolator::~TKDInterpolator()
105{
106 if(fFitter) delete fFitter;
107 if(fKDhelper) delete fKDhelper;
108 if(fTmpPoint) delete [] fTmpPoint;
109
110 if(fRefPoints){
111 for(int idim=0; idim<fNDim; idim++) delete [] fRefPoints[idim] ;
112 delete [] fRefPoints;
113 }
114 if(fRefValues) delete [] fRefValues;
115}
116
117//_________________________________________________________________
118void TKDInterpolator::Build()
119{
df84bc73 120// Fill interpolator's data array i.e.
121// - estimation points
122// - corresponding PDF values
123
f2040a8f 124 if(!fBoundaries) MakeBoundaries();
125
126 // allocate memory for data
127 fRefValues = new Float_t[fNTNodes];
128 fRefPoints = new Float_t*[fNDim];
129 for(int id=0; id<fNDim; id++){
130 fRefPoints[id] = new Float_t[fNTNodes];
131 for(int in=0; in<fNTNodes; in++) fRefPoints[id][in] = 0.;
132 }
133
134 Float_t *bounds = 0x0;
135 Int_t *indexPoints;
136 for(int inode=0, tnode = fNnodes; inode<fNTNodes-1; inode++, tnode++){
137 fRefValues[inode] = Float_t(fBucketSize)/fNpoints;
138 bounds = GetBoundary(tnode);
139 for(int idim=0; idim<fNDim; idim++) fRefValues[inode] /= (bounds[2*idim+1] - bounds[2*idim]);
140
141 indexPoints = GetPointsIndexes(tnode);
142 // loop points in this terminal node
143 for(int idim=0; idim<fNDim; idim++){
144 for(int ip = 0; ip<fBucketSize; ip++) fRefPoints[idim][inode] += fData[idim][indexPoints[ip]];
145 fRefPoints[idim][inode] /= fBucketSize;
146 }
147 }
148
149 // analyze last (incomplete) terminal node
150 Int_t counts = fNpoints%fBucketSize;
151 counts = counts ? counts : fBucketSize;
152 Int_t inode = fNTNodes - 1, tnode = inode + fNnodes;
153 fRefValues[inode] = Float_t(counts)/fNpoints;
154 bounds = GetBoundary(tnode);
155 for(int idim=0; idim<fNDim; idim++) fRefValues[inode] /= (bounds[2*idim+1] - bounds[2*idim]);
156
157 indexPoints = GetPointsIndexes(tnode);
158 // loop points in this terminal node
159 for(int idim=0; idim<fNDim; idim++){
160 for(int ip = 0; ip<counts; ip++) fRefPoints[idim][inode] += fData[idim][indexPoints[ip]];
161 fRefPoints[idim][inode] /= counts;
162 }
163}
164
165//_________________________________________________________________
df84bc73 166Double_t TKDInterpolator::Eval(const Double_t *point, Int_t npoints)
f2040a8f 167{
df84bc73 168// Evaluate PDF at k-dimensional position "point". The initial number of
169// neighbour estimation points is set to "npoints"
170
171 //Int_t npoints = Int_t(alpha * fNTNodes);
172 //printf("Params : %d NPoints %d\n", lambda, npoints);
f2040a8f 173 // prepare workers
174 if(!fTmpPoint) fTmpPoint = new Double_t[fNDim];
df84bc73 175 if(!fKDhelper) fKDhelper = new TKDTreeIF(GetNTerminalNodes(), fNDim, npoints, fRefPoints);
f2040a8f 176 if(!fFitter){
df84bc73 177 // generate parabolic for nD
178
179 // calculate number of parameters in the parabolic expresion
180 Int_t lambda = 1 + fNDim + fNDim*(fNDim+1)/2;
181 //Float_t alpha = Float_t(2*lambda + 1) / fNTNodes; // the bandwidth or smoothing parameter
f2040a8f 182 TString formula("1");
183 for(int idim=0; idim<fNDim; idim++){
184 formula += Form("++x[%d]", idim);
185 for(int jdim=idim; jdim<fNDim; jdim++) formula += Form("++x[%d]*x[%d]", idim, jdim);
186 }
df84bc73 187 fFitter = new TLinearFitter(lambda, formula.Data());
188 Info("Eval(const Double_t*, Int_t)", Form("Using %s for local interpolation.", formula.Data()));
f2040a8f 189 }
df84bc73 190
191 Float_t pointF[50];
192 for(int idim=0; idim<fNDim; idim++) pointF[idim] = point[idim];
193 Int_t istart = 0;
f2040a8f 194 Int_t *index;
df84bc73 195 Float_t dist, d0, w0, w;
196 Double_t uncertainty = TMath::Sqrt(1./fBucketSize);
f2040a8f 197 fFitter->ClearPoints();
198 do{
df84bc73 199 if(!fKDhelper->FindNearestNeighbors(pointF, npoints+1, index, dist)){
200 Error("Eval()", Form("Failed retriving %d neighbours for point:", npoints));
f2040a8f 201 for(int idim=0; idim<fNDim; idim++) printf("%f ", point[idim]);
202 printf("\n");
203 return -1;
204 }
df84bc73 205 for(int in=istart; in<npoints; in++){
206 //printf("%d index[%2d] x(", in, index[in]);
207 d0 = 0.;
208 for(int idim=0; idim<fNDim; idim++){
209 fTmpPoint[idim] = fRefPoints[idim][index[in]];
210 //printf("%6.4f ", fTmpPoint[idim]);
211 d0 += TMath::Abs(fTmpPoint[idim] - point[idim]);
212 }
213 d0 /= dist;
214 w0 = (1. - d0*d0*d0);
215 w = w0*w0*w0;
216
217 //printf(") f = %f [%f] d0 = %6.4f w = %6.4f \n", fRefValues[index[in]], TMath::Log(fRefValues[index[in]]), d0, w);
218 fFitter->AddPoint(fTmpPoint, TMath::Log(fRefValues[index[in]]), uncertainty/w);
f2040a8f 219 }
df84bc73 220 istart = npoints;
221 npoints += 4;
f2040a8f 222 } while(fFitter->Eval());
223
224 // calculate evaluation
f2040a8f 225 Int_t ipar = 0;
df84bc73 226 Double_t result = fFitter->GetParameter(ipar++);
f2040a8f 227 for(int idim=0; idim<fNDim; idim++){
df84bc73 228 result += fFitter->GetParameter(ipar++)*point[idim];
229 for(int jdim=idim; jdim<fNDim; jdim++) result += fFitter->GetParameter(ipar++)*point[idim]*point[jdim];
f2040a8f 230 }
df84bc73 231 //printf("\tResult : %f [%f]\n", TMath::Exp(result), result);
f2040a8f 232 return TMath::Exp(result);
233}
234
235
236//_________________________________________________________________
df84bc73 237void TKDInterpolator::DrawNodes(UInt_t ax1, UInt_t ax2, Int_t depth)
f2040a8f 238{
239// Draw nodes structure projected on plane "ax1:ax2". The parameter
240// "depth" specifies the bucket size per node. If depth == -1 draw only
241// terminal nodes and evaluation points (default -1 i.e. bucket size per node equal bucket size specified by the user)
df84bc73 242//
243// Observation:
244// This function creates the nodes (TBox) array for the specified depth
245// but don't delete it. Abusing this function may cause memory leaks !
246
f2040a8f 247
248 if(!fBoundaries) MakeBoundaries();
249
250 // Count nodes in specific view
251 Int_t nnodes = 0;
252 for(int inode = 0; inode <= 2*fNnodes; inode++){
253 if(depth == -1){
254 if(!IsTerminal(inode)) continue;
255 } else if((inode+1) >> depth != 1) continue;
256 nnodes++;
257 }
258
259 //printf("depth %d nodes %d\n", depth, nnodes);
260
df84bc73 261 TH2 *h2 = 0x0;
262 if(!(h2 = (TH2S*)gROOT->FindObject("hNodes"))) h2 = new TH2S("hNodes", "", 100, fRange[2*ax1], fRange[2*ax1+1], 100, fRange[2*ax2], fRange[2*ax2+1]);
263 h2->GetXaxis()->SetTitle(Form("x_{%d}", ax1));
264 h2->GetYaxis()->SetTitle(Form("x_{%d}", ax2));
f2040a8f 265 h2->Draw();
266
267 const Float_t border = 0.;//1.E-4;
df84bc73 268 TBox *node_array = new TBox[nnodes], *node;
f2040a8f 269 Float_t *bounds = 0x0;
270 nnodes = 0;
271 for(int inode = 0; inode <= 2*fNnodes; inode++){
272 if(depth == -1){
273 if(!IsTerminal(inode)) continue;
274 } else if((inode+1) >> depth != 1) continue;
275
df84bc73 276 node = &node_array[nnodes++];
277 //node = new TBox(bounds[2*ax1]+border, bounds[2*ax2]+border, bounds[2*ax1+1]-border, bounds[2*ax2+1]-border);
278 node->SetFillStyle(3002);
279 node->SetFillColor(50+Int_t(gRandom->Uniform()*50.));
f2040a8f 280 bounds = GetBoundary(inode);
df84bc73 281 node->DrawBox(bounds[2*ax1]+border, bounds[2*ax2]+border, bounds[2*ax1+1]-border, bounds[2*ax2+1]-border);
f2040a8f 282 }
283 if(depth != -1) return;
284
285 // Draw reference points
286 TGraph *ref = new TGraph(GetNTerminalNodes());
df84bc73 287 ref->SetMarkerStyle(3);
288 ref->SetMarkerSize(.7);
f2040a8f 289 ref->SetMarkerColor(2);
f2040a8f 290 for(int inode = 0; inode < GetNTerminalNodes(); inode++) ref->SetPoint(inode, fRefPoints[ax1][inode], fRefPoints[ax2][inode]);
291 ref->Draw("p");
292 return;
293}
294
295//_________________________________________________________________
df84bc73 296void TKDInterpolator::DrawNode(Int_t tnode, UInt_t ax1, UInt_t ax2)
f2040a8f 297{
298// Draw node "node" and the data points within.
df84bc73 299//
300// Observation:
301// This function creates some graphical objects
302// but don't delete it. Abusing this function may cause memory leaks !
f2040a8f 303
304 if(tnode < 0 || tnode >= GetNTerminalNodes()){
305 Warning("DrawNode()", Form("Terminal node %d outside defined range.", tnode));
306 return;
307 }
308
f2040a8f 309 Int_t inode = tnode;
310 tnode += fNnodes;
311 // select zone of interest in the indexes array
312 Int_t *index = GetPointsIndexes(tnode);
313 Int_t nPoints = (tnode == 2*fNnodes) ? fNpoints%fBucketSize : fBucketSize;
314
f2040a8f 315 // draw data points
316 TGraph *g = new TGraph(nPoints);
df84bc73 317 g->SetMarkerStyle(7);
f2040a8f 318 for(int ip = 0; ip<nPoints; ip++) g->SetPoint(ip, fData[ax1][index[ip]], fData[ax2][index[ip]]);
f2040a8f 319
320 // draw estimation point
df84bc73 321 TMarker *m=new TMarker(fRefPoints[ax1][inode], fRefPoints[ax2][inode], 20);
f2040a8f 322 m->SetMarkerColor(2);
df84bc73 323 m->SetMarkerSize(1.7);
f2040a8f 324
325 // draw node contour
326 Float_t *bounds = GetBoundary(tnode);
327 TBox *n = new TBox(bounds[2*ax1], bounds[2*ax2], bounds[2*ax1+1], bounds[2*ax2+1]);
328 n->SetFillStyle(0);
df84bc73 329
330 if(gPad) gPad->Clear();
331 g->Draw("ap");
332 m->Draw();
f2040a8f 333 n->Draw();
334
335 return;
336}
337