kNN algorithm improved. IO Defined
[u/mrichter/AliRoot.git] / STAT / TKDInterpolator.cxx
index 9be1a2f69633d00c13ed86306437d3b5497994c1..ad7f08d991fa0638ceb49a656e554410b24cceff 100644 (file)
@@ -13,9 +13,8 @@
 #include "TRandom.h"
 #include "TROOT.h"
 
-
-
 ClassImp(TKDInterpolator)
+ClassImp(TKDInterpolator::TKDNodeInfo)
 
 /////////////////////////////////////////////////////////////////////
 // Memory setup of protected data memebers
@@ -24,6 +23,8 @@ ClassImp(TKDInterpolator)
 //
 // fRefValues : evaluation value/error of PDF for each terminal node of underlying KD Tree.
 // | 1st terminal node (value) | 2nd terminal node (value) | ... | 1st terminal node (error) | 2nd terminal node (error) | ...
+//
+// status = |0|0|0|0|0|1(tri-cubic weights)|1(STORE)|1 INT(0 COG )|
 /////////////////////////////////////////////////////////////////////
 
 //_________________________________________________________________
@@ -31,8 +32,13 @@ TKDInterpolator::TKDInterpolator() : TKDTreeIF()
        ,fNTNodes(0)
        ,fRefPoints(0x0)
        ,fRefValues(0x0)
+       ,fCov(0x0)
+       ,fPar(0x0)
+       ,fPDFstatus(0x0)
+       ,fStatus(4)
+       ,fLambda(0)
        ,fDepth(-1)
-       ,fTmpPoint(0x0)
+       ,fBuffer(0x0)
        ,fKDhelper(0x0)
        ,fFitter(0x0)
 {
@@ -45,8 +51,13 @@ TKDInterpolator::TKDInterpolator(Int_t npoints, Int_t ndim, UInt_t bsize, Float_
        ,fNTNodes(GetNTerminalNodes())
        ,fRefPoints(0x0)
        ,fRefValues(0x0)
+       ,fCov(0x0)
+       ,fPar(0x0)
+       ,fPDFstatus(0x0)
+       ,fStatus(4)
+       ,fLambda(0)
        ,fDepth(-1)
-       ,fTmpPoint(0x0)
+       ,fBuffer(0x0)
        ,fKDhelper(0x0)
        ,fFitter(0x0)
 {
@@ -57,12 +68,17 @@ TKDInterpolator::TKDInterpolator(Int_t npoints, Int_t ndim, UInt_t bsize, Float_
 
 
 //_________________________________________________________________
-TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut, UInt_t bsize) : TKDTreeIF()
+TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut, UInt_t bsize, Long64_t nentries, Long64_t firstentry) : TKDTreeIF()
        ,fNTNodes(0)
        ,fRefPoints(0x0)
        ,fRefValues(0x0)
+       ,fCov(0x0)
+       ,fPar(0x0)
+       ,fPDFstatus(0x0)
+       ,fStatus(4)
+       ,fLambda(0)
        ,fDepth(-1)
-       ,fTmpPoint(0x0)
+       ,fBuffer(0x0)
        ,fKDhelper(0x0)
        ,fFitter(0x0)
 {
@@ -73,23 +89,25 @@ TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut,
 // 
 
        TObjArray *vars = TString(var).Tokenize(":");
-       fNDim = vars->GetEntriesFast();
+       fNDim = vars->GetEntriesFast(); fNDimm = 2*fNDim;
        if(fNDim > 6/*kDimMax*/) Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Variable number exceed maximum dimension %d. Results are unpredictable.", 6/*kDimMax*/));
        fBucketSize = bsize;
 
        Int_t np;
        Double_t *v;
        for(int idim=0; idim<fNDim; idim++){
-               if(!(np = t->Draw(((TObjString*)(*vars)[idim])->GetName(), cut, "goff"))){
-                       Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Can not access data for %s", ((TObjString*)(*vars)[idim])->GetName() ));
+               if(!(np = t->Draw(((TObjString*)(*vars)[idim])->GetName(), cut, "goff", nentries, firstentry))){
+                       Warning("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Can not access data for keys %s. Key defined on tree :", ((TObjString*)(*vars)[idim])->GetName() ));
+                       TIterator *it = (t->GetListOfLeaves())->MakeIterator();
+                       TObject *o;
+                       while(o = (*it)()) printf("\t%s\n", o->GetName());
                        continue;
                }
                if(!fNpoints){
                        fNpoints = np;
                        Info("TKDInterpolator(TTree*, const Char_t, const Char_t, UInt_t)", Form("Allocating %d data points in %d dimensions.", fNpoints, fNDim));
-                       //Float_t *mem = new Float_t[fNDim*fNpoints];
                        fData = new Float_t*[fNDim];
-                       for(int idim=0; idim<fNDim; idim++) fData[idim] = new Float_t[fNpoints]; //&mem[idim*fNpoints];
+                       for(int idim=0; idim<fNDim; idim++) fData[idim] = new Float_t[fNpoints];
                        kDataOwner = kTRUE;
                }
                v = t->GetV1();
@@ -103,9 +121,15 @@ TKDInterpolator::TKDInterpolator(TTree *t, const Char_t *var, const Char_t *cut,
 //_________________________________________________________________
 TKDInterpolator::~TKDInterpolator()
 {
+       if(fCov){
+               delete [] fPar;
+               delete [] fCov;
+               delete [] fPDFstatus;
+       }
+       
        if(fFitter) delete fFitter;
        if(fKDhelper) delete fKDhelper;
-       if(fTmpPoint) delete [] fTmpPoint;
+       if(fBuffer) delete [] fBuffer;
        
        if(fRefPoints){
                for(int idim=0; idim<fNDim; idim++) delete [] fRefPoints[idim] ;
@@ -122,7 +146,8 @@ void TKDInterpolator::Build()
 //  - corresponding PDF values
 
        if(!fBoundaries) MakeBoundaries();
-       
+       fLambda = 1 + fNDim + fNDim*(fNDim+1)/2;
+
        // allocate memory for data
        fRefValues = new Float_t[fNTNodes];
        fRefPoints = new Float_t*[fNDim];
@@ -162,76 +187,217 @@ void TKDInterpolator::Build()
        }
 }
 
+//__________________________________________________________________
+void TKDInterpolator::GetStatus()
+{
+       printf("Interpolator Status :\n");
+       printf("  Method : %s\n", fStatus&1 ? "INT" : "COG");
+       printf("  Store  : %s\n", fStatus&2 ? "YES" : "NO");
+       printf("  Weights: %s\n", fStatus&4 ? "YES" : "NO");
+
+       printf("nodes %d\n", fNTNodes);        //Number of evaluation data points
+       printf("refs 0x%x\n", fRefPoints);    //[fNDim][fNTNodes]
+       printf("vals 0x%x\n", fRefValues);     //[fNTNodes]
+       for(int i=0; i<fNTNodes; i++){
+               for(int idim=0; idim<fNDim; idim++) printf("%f ", fRefPoints[idim][i]);
+               printf("[%f]\n", fRefValues[i]);
+       }
+
+       printf("cov 0x%x\n", fCov);           //[fNTNodes] cov matrix array for nodes
+       printf("pars 0x%x\n", fPar);           //[fNTNodes] parameters array for nodes
+       for(int i=0; i<fNTNodes; i++){
+               printf("%d ", i);
+               for(int ip=0; ip<3; ip++) printf("p%d[%f] ", ip, fPar[i](ip));
+               printf("\n");
+       }
+       printf("pdf 0x%x\n", fPDFstatus);     //[fNTNodes] status bit for node's PDF
+}
+
 //_________________________________________________________________
-Double_t TKDInterpolator::Eval(const Double_t *point, Int_t npoints)
+Double_t TKDInterpolator::Eval(const Double_t *point, Double_t &result, Double_t &error)
 {
-// Evaluate PDF at k-dimensional position "point". The initial number of
-// neighbour estimation points is set to "npoints"
+// Evaluate PDF for "point". The result is returned in "result" and error in "error". The function returns the chi2 of the fit.
+//
+// Observations:
+//
+// 1. The default method used for interpolation is kCOG.
+// 2. The initial number of neighbors used for the estimation is set to Int(alpha*fLambda) (alpha = 1.5)
+                       
+       Float_t pointF[50]; // local Float_t conversion for "point"
+       for(int idim=0; idim<fNDim; idim++) pointF[idim] = (Float_t)point[idim];
+       Int_t node = FindNode(pointF) - fNnodes;
+       if(fPDFstatus && (fStatus&1) && fPDFstatus[node]) return CookPDF(point, node, result, error);
+
+       // Allocate memory
+       if(!fBuffer) fBuffer = new Double_t[2*fLambda];
+       if(!fKDhelper) fKDhelper = new TKDTreeIF(fNTNodes, fNDim, 30, fRefPoints);
+       if(!fFitter) SetIntInterpolation(kFALSE);
        
+       // generate parabolic for nD
+       //Float_t alpha = Float_t(2*lambda + 1) / fNTNodes; // the bandwidth or smoothing parameter
        //Int_t npoints = Int_t(alpha * fNTNodes);
        //printf("Params : %d NPoints %d\n", lambda, npoints);
        // prepare workers
-       if(!fTmpPoint) fTmpPoint = new Double_t[fNDim];
-       if(!fKDhelper) fKDhelper = new TKDTreeIF(GetNTerminalNodes(), fNDim, npoints, fRefPoints);
-       if(!fFitter){
-               // generate parabolic for nD
-               
-               // calculate number of parameters in the parabolic expresion
-               Int_t lambda = 1 + fNDim + fNDim*(fNDim+1)/2;
-               //Float_t alpha = Float_t(2*lambda + 1) / fNTNodes; // the bandwidth or smoothing parameter
-               TString formula("1");
-               for(int idim=0; idim<fNDim; idim++){
-                       formula += Form("++x[%d]", idim);
-                       for(int jdim=idim; jdim<fNDim; jdim++) formula += Form("++x[%d]*x[%d]", idim, jdim);
-               }
-               fFitter = new TLinearFitter(lambda, formula.Data());
-               Info("Eval(const Double_t*, Int_t)", Form("Using %s for local interpolation.", formula.Data()));
-       }
 
-       Float_t pointF[50];
-       for(int idim=0; idim<fNDim; idim++) pointF[idim] = point[idim];
-       Int_t istart = 0;
-       Int_t *index;
-       Float_t dist, d0, w0, w;
-       Double_t uncertainty = TMath::Sqrt(1./fBucketSize); 
-       fFitter->ClearPoints();
+       Int_t *index,  // indexes of NN 
+             ipar,    // local looping variable
+                               npoints = Int_t(1.5*fLambda); // number of data points used for interpolation
+       Float_t *dist, // distances of NN
+                                       d,     // NN normalized distance
+                                       w0,    // work
+                                       w;     // tri-cubic weight function
+       Double_t sig   // bucket error 
+               = TMath::Sqrt(1./fBucketSize);
        do{
+               // find nearest neighbors
+               for(int idim=0; idim<fNDim; idim++) pointF[idim] = (Float_t)point[idim];
                if(!fKDhelper->FindNearestNeighbors(pointF, npoints+1, index, dist)){
                        Error("Eval()", Form("Failed retriving %d neighbours for point:", npoints));
                        for(int idim=0; idim<fNDim; idim++) printf("%f ", point[idim]);
                        printf("\n");
                        return -1;
                }
-               for(int in=istart; in<npoints; in++){
-                       //printf("%d index[%2d] x(", in, index[in]);
-                       d0 = 0.;
-                       for(int idim=0; idim<fNDim; idim++){
-                               fTmpPoint[idim] = fRefPoints[idim][index[in]];
-                               //printf("%6.4f ", fTmpPoint[idim]);
-                               d0 += TMath::Abs(fTmpPoint[idim] - point[idim]);
+               // add points to fitter
+               fFitter->ClearPoints();
+               for(int in=0; in<npoints; in++){
+                       if(fStatus&1){ // INT
+                               for(int idim=0; idim<fNDim; idim++) pointF[idim] = fRefPoints[idim][index[in]];
+                               Float_t *bounds = GetBoundary(FindNode(pointF));
+                               
+                               ipar = 0;
+                               for(int idim=0; idim<fNDim; idim++){
+                                       fBuffer[ipar++] = .5*(bounds[2*idim] + bounds[2*idim+1]);
+                                       fBuffer[ipar++] = (bounds[2*idim]*bounds[2*idim] + bounds[2*idim] * bounds[2*idim+1] + bounds[2*idim+1] * bounds[2*idim+1])/3.;
+                                       for(int jdim=idim+1; jdim<fNDim; jdim++) fBuffer[ipar++] = (bounds[2*idim] + bounds[2*idim+1]) * (bounds[2*jdim] + bounds[2*jdim+1]) * .25;
+                               }
+                       } else { // COG
+                               for(int idim=0; idim<fNDim; idim++) fBuffer[idim] = fRefPoints[idim][index[in]];
                        }
-                       d0 /= dist;
-                       w0 = (1. - d0*d0*d0);
-                       w = w0*w0*w0;
 
-                       //printf(") f = %f [%f] d0 = %6.4f   w = %6.4f  \n", fRefValues[index[in]], TMath::Log(fRefValues[index[in]]), d0, w);
-                       fFitter->AddPoint(fTmpPoint, TMath::Log(fRefValues[index[in]]), uncertainty/w);
+                       // calculate tri-cubic weighting function
+                       if(fStatus&4){
+                               d = dist[in]/ dist[npoints];
+                               w0 = (1. - d*d*d); w = w0*w0*w0;
+                       } else w = 1.;
+                        
+                       //for(int idim=0; idim<fNDim; idim++) printf("%f ", fBuffer[idim]);
+                       //printf("\nd[%f] w[%f] sig[%f]\n", d, w, sig);
+                       fFitter->AddPoint(fBuffer, fRefValues[index[in]], fRefValues[index[in]]*sig/w);
                }
-               istart = npoints;
                npoints += 4;
        } while(fFitter->Eval());
 
-       // calculate evaluation
-       Int_t ipar = 0;
-       Double_t result = fFitter->GetParameter(ipar++);
+       // retrive fitter results
+       TMatrixD cov(fLambda, fLambda);
+       TVectorD par(fLambda);
+       fFitter->GetCovarianceMatrix(cov);
+       fFitter->GetParameters(par);
+       Double_t chi2 = fFitter->GetChisquare()/(npoints - 4 - fLambda);
+
+       // store results
+       if(fStatus&2 && fStatus&1){
+               fPar[node] = par;
+               fCov[node] = cov;
+               fPDFstatus[node] = 1;
+       }
+               
+       // Build df/dpi|x values
+       Double_t *fdfdp = &fBuffer[fLambda];
+       ipar = 0;
+       fdfdp[ipar++] = 1.;
        for(int idim=0; idim<fNDim; idim++){
-               result += fFitter->GetParameter(ipar++)*point[idim];
-               for(int jdim=idim; jdim<fNDim; jdim++) result += fFitter->GetParameter(ipar++)*point[idim]*point[jdim];
+               fdfdp[ipar++] = point[idim];
+               for(int jdim=idim; jdim<fNDim; jdim++) fdfdp[ipar++] = point[idim]*point[jdim];
        }
-       //printf("\tResult : %f [%f]\n", TMath::Exp(result), result);
-       return TMath::Exp(result);
+
+       // calculate estimation
+       result =0.; error = 0.;
+       for(int i=0; i<fLambda; i++){
+               result += fdfdp[i]*par(i);
+               for(int j=0; j<fLambda; j++) error += fdfdp[i]*fdfdp[j]*cov(i,j);
+       }       
+       error = TMath::Sqrt(error);
+
+       return chi2;
 }
 
+// //_________________________________________________________________
+// Double_t TKDInterpolator::Eval1(const Double_t *point, Int_t npoints, Double_t &result, Double_t &error)
+// {
+// // Evaluate PDF at k-dimensional position "point". The initial number of
+// // neighbour estimation points is set to "npoints". The default method
+// // used for interpolation is kCOG.
+// 
+//     // calculate number of parameters in the parabolic expresion
+//     Int_t lambda = 1 + fNDim + fNDim*(fNDim+1)/2;
+// 
+//     if(!fBuffer) fBuffer = new Double_t[lambda-1];
+//     if(!fKDhelper) fKDhelper = new TKDTreeIF(GetNTerminalNodes(), fNDim, npoints, fRefPoints);
+// 
+//     if(!fFitter) fFitter = new TLinearFitter(lambda, Form("hyp%d", fNDim+1));
+//     else fFitter->SetFormula(Form("hyp%d", fNDim+1));
+// 
+// 
+//     Float_t pointF[50];
+//     for(int idim=0; idim<fNDim; idim++) pointF[idim] = point[idim];
+//     Int_t istart = 0;
+//     Int_t *index, ipar;
+//     Float_t *bounds, *dist, *w = new Float_t[fNDim];
+//     Double_t uncertainty = TMath::Sqrt(1./fBucketSize);
+//     fFitter->ClearPoints();
+//     do{
+//             if(!fKDhelper->FindNearestNeighbors(pointF, npoints+1, index, dist)){
+//                     Error("Eval()", Form("Failed retriving %d neighbours for point:", npoints));
+//                     for(int idim=0; idim<fNDim; idim++) printf("%f ", point[idim]);
+//                     printf("\n");
+//                     return -1;
+//             }
+//             for(int in=istart; in<npoints; in++){
+//                     for(int idim=0; idim<fNDim; idim++) w[idim] = fRefPoints[idim][index[in]];
+//                     bounds = GetBoundary(FindNode(w));
+// 
+//                     ipar = 0;
+//                     for(int idim=0; idim<fNDim; idim++){
+//                             fBuffer[ipar++] = .5*(bounds[2*idim] + bounds[2*idim+1]);
+//                             fBuffer[ipar++] = (bounds[2*idim]*bounds[2*idim] + bounds[2*idim] * bounds[2*idim+1] + bounds[2*idim+1] * bounds[2*idim+1])/3.;
+//                             for(int jdim=idim+1; jdim<fNDim; jdim++) fBuffer[ipar++] = (bounds[2*idim] + bounds[2*idim+1]) * (bounds[2*jdim] + bounds[2*jdim+1]) * .25;
+//                     }
+// 
+//                     fFitter->AddPoint(fBuffer, fRefValues[index[in]], fRefValues[index[in]]*uncertainty);
+//             }
+//             istart = npoints;
+//             npoints += 4;
+//     } while(fFitter->Eval());
+//     delete [] w;
+// 
+//     // calculate evaluation
+// //  fFitter->PrintResults(3);
+//     TMatrixD cov(lambda, lambda);
+//     TVectorD par(lambda);
+//     fFitter->GetCovarianceMatrix(cov);
+//     fFitter->GetParameters(par);
+// 
+//     // Build temporary array to keep values df/dpi|x
+//     Double_t f[100];
+//     ipar = 0;
+//     f[ipar++] = 1.;
+//     for(int idim=0; idim<fNDim; idim++){
+//             f[ipar++] = point[idim];
+//             for(int jdim=idim; jdim<fNDim; jdim++) f[ipar++] = point[idim]*point[jdim];
+//     }
+//     result =0.; error = 0.;
+//     for(int i=0; i<lambda; i++){
+//             result += f[i]*par[i];
+//             for(int j=0; j<lambda; j++) error += f[i]*f[j]*cov(i,j);
+//     }
+//     error = TMath::Sqrt(error);
+//     Double_t chi2 = fFitter->GetChisquare()/(npoints - 4 - lambda);
+// 
+//     for(int ipar=0; ipar<lambda; ipar++) printf("%d %8.6e %8.6e\n", ipar, par[ipar], TMath::Sqrt(cov(ipar, ipar)));
+//     printf("result %6.3f +- %6.3f [%f]\n", result, error, chi2);
+//     return chi2;
+// }
+
 
 //_________________________________________________________________
 void TKDInterpolator::DrawNodes(UInt_t ax1, UInt_t ax2, Int_t depth)
@@ -335,3 +501,88 @@ void TKDInterpolator::DrawNode(Int_t tnode, UInt_t ax1, UInt_t ax2)
        return;
 }
 
+
+//__________________________________________________________________
+void TKDInterpolator::SetIntInterpolation(const Bool_t on)
+{
+// Set interpolation bit to "on" and build/delete memory
+       
+       if(on) fStatus += fStatus&1 ? 0 : 1;
+       else fStatus += fStatus&1 ? -1 : 0;
+       TString formula;
+       if(on) formula = Form("hyp%d", fLambda-1);
+       else {
+               formula = "1";
+               for(int idim=0; idim<fNDim; idim++){
+                       formula += Form("++x[%d]", idim);
+                       for(int jdim=idim; jdim<fNDim; jdim++) formula += Form("++x[%d]*x[%d]", idim, jdim);
+               }
+       }
+       if(!fFitter) fFitter = new TLinearFitter(fLambda, formula.Data());
+       else fFitter->SetFormula(formula.Data());
+}
+
+
+//_________________________________________________________________
+void TKDInterpolator::SetSetStore(const Bool_t on)
+{
+// Set store bit to "on" and build/delete memory
+       
+       if(on){
+               fStatus += fStatus&2 ? 0 : 2;
+               if(!fCov){
+                       fPDFstatus = new Bool_t[fNTNodes];
+                       fCov = new TMatrixD[fNTNodes];
+                       fPar = new TVectorD[fNTNodes];
+                       for(int i=0; i<fNTNodes; i++){
+                               fPDFstatus[i] = kFALSE;
+                               fCov[i].ResizeTo(fLambda, fLambda);
+                               fPar[i].ResizeTo(fLambda);
+                       }
+               }
+       } else {
+               fStatus += fStatus&2 ? -2 : 0;
+               if(fCov){
+                       delete [] fPar;
+                       delete [] fCov;
+                       delete [] fPDFstatus;
+               }
+       }
+}
+
+//_________________________________________________________________
+void TKDInterpolator::SetUseWeights(const Bool_t on)
+{
+       if(on) fStatus += fStatus&4 ? 0 : 4;
+       else fStatus += fStatus&4 ? -4 : 0;
+}
+
+
+//_________________________________________________________________
+Double_t TKDInterpolator::CookPDF(const Double_t *point, const Int_t node, Double_t &result, Double_t &error)
+{
+// Recalculate the PDF for one node from the results of interpolation (parameters and covariance matrix)
+
+       Info("CookPDF()", Form("Called for node %d", node));
+
+       if(!fBuffer) fBuffer = new Double_t[2*fLambda];
+       Double_t *fdfdp = &fBuffer[fLambda];
+       Int_t ipar = 0;
+       fdfdp[ipar++] = 1.;
+       for(int idim=0; idim<fNDim; idim++){
+               fdfdp[ipar++] = point[idim];
+               for(int jdim=idim; jdim<fNDim; jdim++) fdfdp[ipar++] = point[idim]*point[jdim];
+       }
+
+       // calculate estimation
+       result =0.; error = 0.;
+       for(int i=0; i<fLambda; i++){
+               result += fdfdp[i]*fPar[node](i);
+               for(int j=0; j<fLambda; j++) error += fdfdp[i]*fdfdp[j]*fCov[node](i,j);
+       }       
+       error = TMath::Sqrt(error);
+       printf("result[CookPDF] %6.3f +- %6.3f\n", result, error);
+
+       return 0.;
+}
+