[u/mrichter/AliRoot.git] / JETAN / AliKMeansClustering.cxx

/**************************************************************************
 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
 *                                                                        *
 * Author: The ALICE Off-line Project.                                    *
 * Contributors are mentioned in the code where appropriate.              *
 *                                                                        *
 * Permission to use, copy, modify and distribute this software and its   *
 * documentation strictly for non-commercial purposes is hereby granted   *
 * without fee, provided that the above copyright notice appears in all   *
 * copies and that both the copyright notice and this permission notice   *
 * appear in the supporting documentation. The authors make no claims     *
 * about the suitability of this software for any purpose. It is          *
 * provided "as is" without express or implied warranty.                  *
 **************************************************************************/

// Implemenatation of the K-Means Clustering Algorithm
// See http://en.wikipedia.org/wiki/K-means_clustering and references therein.
//
// This particular implementation is the so called Soft K-means algorithm.
// It has been modified to work on the cylindrical topology in eta-phi space.
//
// Author: Andreas Morsch (CERN)
// andreas.morsch@cern.ch
 
#include "AliKMeansClustering.h"
#include <TMath.h>
#include <TRandom.h>
#include <TH1F.h>

ClassImp(AliKMeansClustering)

Double_t AliKMeansClustering::fBeta = 10.;

 
Int_t AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk )
{
    //
    // The soft K-means algorithm
    //
    Int_t i,j;
    //
    // (1) Initialisation of the k means

    for (i = 0; i < k; i++) {
	mx[i] = 2. * TMath::Pi() * gRandom->Rndm();
	my[i] = -1. + 2. * gRandom->Rndm();
    }

    //
    // (2a) The responsibilities
    Double_t** r = new Double_t*[n]; // responsibilities
    for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
    //
    // (2b) Normalisation
    Double_t* nr   = new Double_t[n];
    // (3) Iterations
    Int_t nit = 0;
    
    while(1) {
	nit++;
      //
      // Assignment step
      //
      for (j = 0; j < n; j++) {
	nr[j] = 0.;
	for (i = 0; i < k; i++) {
	  r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	  nr[j] += r[j][i];
	} // mean i
      } // data point j
	
      for (j = 0; j < n; j++) {
	for (i = 0; i < k; i++) {
	  r[j][i] /=  nr[j];
	} // mean i
      } // data point j
      
	//
	// Update step
      Double_t di = 0;
      
      for (i = 0; i < k; i++) {
	  Double_t oldx = mx[i];
	  Double_t oldy = my[i];
	  
	  mx[i] = x[0];
	  my[i] = y[0];
	  rk[i] = r[0][i];
	
	for (j = 1; j < n; j++) {
	    Double_t xx =  x[j];
//
// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi

	    Double_t dx = mx[i] - x[j];
	    if (dx >  TMath::Pi()) xx += 2. * TMath::Pi();
	    if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	    mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	    my[i] = my[i] * rk[i] + r[j][i] * y[j];
	    rk[i] += r[j][i];
	    mx[i] /= rk[i];
	    my[i] /= rk[i];	    
	    if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	    if (mx[i] < 0.              ) mx[i] += 2. * TMath::Pi();
	} // Data
	di += d(mx[i], my[i], oldx, oldy);
      } // means 
	//
	// ending condition
      if (di < 1.e-8 || nit > 1000) break;
    } // while

// Clean-up    
    delete[] nr;
    for (j = 0; j < n; j++) delete[] r[j];
    delete[] r;
// 
    return (nit < 1000);
    
}

Int_t AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk )
{
    //
    // The soft K-means algorithm
    //
    Int_t i,j;
    //
    // (1) Initialisation of the k means using k-means++ recipe
    // 
     OptimalInit(k, n, x, y, mx, my);
    //
    // (2a) The responsibilities
    Double_t** r = new Double_t*[n]; // responsibilities
    for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
    //
    // (2b) Normalisation
    Double_t* nr = new Double_t[n];
    //
    // (2c) Weights 
    Double_t* pi = new Double_t[k];
    //
    //
    // (2d) Initialise the responsibilties and weights
    for (j = 0; j < n; j++) {
      nr[j] = 0.;
      for (i = 0; i < k; i++) {
	r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	nr[j] += r[j][i];
      } // mean i
    } // data point j
    
    for (i = 0; i < k; i++) {
      rk[i]    = 0.;
      sigma2[i] = 1./fBeta;
 
      for (j = 0; j < n; j++) {
	r[j][i] /=  nr[j];
	rk[i] += r[j][i];
      } // mean i
      pi[i] = rk[i] / Double_t(n);
    } // data point j
    // (3) Iterations
    Int_t nit = 0;
    Bool_t rmovalStep = kFALSE;

    while(1) {
	nit++;
      //
      // Assignment step
      //
      for (j = 0; j < n; j++) {
	nr[j] = 0.;
	for (i = 0; i < k; i++) {
	  r[j][i] = pi[i] * TMath::Exp(- d(mx[i], my[i], x[j], y[j]) / sigma2[i] ) 
	    / (2. * sigma2[i] * TMath::Pi() * TMath::Pi());
	  nr[j] += r[j][i];
	} // mean i
      } // data point j
	
      for (i = 0; i < k; i++) {
	for (j = 0; j < n; j++) {
	  r[j][i] /=  nr[j];
	} // mean i
      } // data point j
      
	//
	// Update step
      Double_t di = 0;
      
      for (i = 0; i < k; i++) {
	  Double_t oldx = mx[i];
	  Double_t oldy = my[i];
	  
	  mx[i] = x[0];
	  my[i] = y[0];
	  rk[i] = r[0][i];
	for (j = 1; j < n; j++) {
	    Double_t xx =  x[j];
//
// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi

	    Double_t dx = mx[i] - x[j];
	    if (dx >  TMath::Pi()) xx += 2. * TMath::Pi();
	    if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	    if (r[j][i] > 1.e-15) {
	      mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	      my[i] = my[i] * rk[i] + r[j][i] * y[j];
	      rk[i] += r[j][i];
	      mx[i] /= rk[i];
	      my[i] /= rk[i];	
	    }    
	    if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	    if (mx[i] < 0.              ) mx[i] += 2. * TMath::Pi();
	} // Data
	di += d(mx[i], my[i], oldx, oldy);

      } // means 
      //
      // Sigma
      for (i = 0; i < k; i++) {
	sigma2[i] = 0.;
	for (j = 1; j < n; j++) {
	  sigma2[i] += r[j][i] * d(mx[i], my[i], x[j], y[j]);
	} // Data
	sigma2[i] /= rk[i];
	if (sigma2[i] < 0.0025) sigma2[i] = 0.0025;
      } // Clusters    
      //
      // Fractions
      for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
      //
// ending condition
      if (di < 1.e-8 || nit > 1000) break;
    } // while

// Clean-up    
    delete[] nr;
    delete[] pi;
    for (j = 0; j < n; j++) delete[] r[j];
    delete[] r;
// 
    return (nit < 1000);
}

Int_t AliKMeansClustering::SoftKMeans3(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , 
				       Double_t* sigmax2, Double_t* sigmay2, Double_t* rk )
{
    //
    // The soft K-means algorithm
    //
    Int_t i,j;
    //
    // (1) Initialisation of the k means using k-means++ recipe
    // 
     OptimalInit(k, n, x, y, mx, my);
    //
    // (2a) The responsibilities
    Double_t** r = new Double_t*[n]; // responsibilities
    for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
    //
    // (2b) Normalisation
    Double_t* nr = new Double_t[n];
    //
    // (2c) Weights 
    Double_t* pi = new Double_t[k];
    //
    //
    // (2d) Initialise the responsibilties and weights
    for (j = 0; j < n; j++) {
      nr[j] = 0.;
      for (i = 0; i < k; i++) {

	r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	nr[j] += r[j][i];
      } // mean i
    } // data point j
    
    for (i = 0; i < k; i++) {
      rk[i]    = 0.;
      sigmax2[i] = 1./fBeta;
      sigmay2[i] = 1./fBeta;
 
      for (j = 0; j < n; j++) {
	r[j][i] /=  nr[j];
	rk[i] += r[j][i];
      } // mean i
      pi[i] = rk[i] / Double_t(n);
    } // data point j
    // (3) Iterations
    Int_t nit = 0;
    Bool_t rmovalStep = kFALSE;

    while(1) {
	nit++;
      //
      // Assignment step
      //
      for (j = 0; j < n; j++) {
	nr[j] = 0.;
	for (i = 0; i < k; i++) {

	  Double_t dx = TMath::Abs(mx[i]-x[j]);
	  if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	  Double_t dy = TMath::Abs(my[i]-y[j]);
	  r[j][i] = pi[i] * TMath::Exp(-0.5 *  (dx * dx / sigmax2[i] + dy * dy / sigmay2[i])) 
	    / (2. * TMath::Sqrt(sigmax2[i] * sigmay2[i]) * TMath::Pi() * TMath::Pi());
	  nr[j] += r[j][i];
	} // mean i
      } // data point j
	
      for (i = 0; i < k; i++) {
	for (j = 0; j < n; j++) {
	  r[j][i] /=  nr[j];
	} // mean i
      } // data point j
      
	//
	// Update step
      Double_t di = 0;
      
      for (i = 0; i < k; i++) {
	  Double_t oldx = mx[i];
	  Double_t oldy = my[i];
	  
	  mx[i] = x[0];
	  my[i] = y[0];
	  rk[i] = r[0][i];
	for (j = 1; j < n; j++) {
	    Double_t xx =  x[j];
//
// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi

	    Double_t dx = mx[i] - x[j];
	    if (dx >  TMath::Pi()) xx += 2. * TMath::Pi();
	    if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	    if (r[j][i] > 1.e-15) {
	      mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	      my[i] = my[i] * rk[i] + r[j][i] * y[j];
	      rk[i] += r[j][i];
	      mx[i] /= rk[i];
	      my[i] /= rk[i];	
	    }    
	    if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	    if (mx[i] < 0.              ) mx[i] += 2. * TMath::Pi();
	} // Data
	di += d(mx[i], my[i], oldx, oldy);

      } // means 
      //
      // Sigma
      for (i = 0; i < k; i++) {
	sigmax2[i] = 0.;
	sigmay2[i] = 0.;

	for (j = 1; j < n; j++) {
	  Double_t dx = TMath::Abs(mx[i]-x[j]);
	  if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	  Double_t dy = TMath::Abs(my[i]-y[j]);
	  sigmax2[i] += r[j][i] * dx * dx;
	  sigmay2[i] += r[j][i] * dy * dy;
	} // Data
	sigmax2[i] /= rk[i];
	sigmay2[i] /= rk[i];
	if (sigmax2[i] < 0.0025) sigmax2[i] = 0.0025;
	if (sigmay2[i] < 0.0025) sigmay2[i] = 0.0025;
      } // Clusters    
      //
      // Fractions
      for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
      //
// ending condition
      if (di < 1.e-8 || nit > 1000) break;
    } // while

// Clean-up    
    delete[] nr;
    delete[] pi;
    for (j = 0; j < n; j++) delete[] r[j];
    delete[] r;
// 
    return (nit < 1000);
}

Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y)
{
    //
    // Distance definition 
    // Quasi - Euclidian on the eta-phi cylinder
    
    Double_t dx = TMath::Abs(mx-x);
    if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
    
    return (0.5*(dx * dx + (my - y) * (my - y)));
}


void AliKMeansClustering::OptimalInit(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my)
{
  //  
  // Optimal initialisation using the k-means++ algorithm
  // http://en.wikipedia.org/wiki/K-means%2B%2B
  //
  // k-means++ is an algorithm for choosing the initial values for k-means clustering in statistics and machine learning. 
  // It was proposed in 2007 by David Arthur and Sergei Vassilvitskii as an approximation algorithm for the NP-hard k-means problem---
  // a way of avoiding the sometimes poor clusterings found by the standard k-means algorithm.
  //
  //
  TH1F d2("d2", "", n, -0.5, Float_t(n)-0.5);
  d2.Reset();

  // (1) Chose first center as a random point among the input data.
  Int_t ir = Int_t(Float_t(n) * gRandom->Rndm());
  mx[0] = x[ir];
  my[0] = y[ir];

  // (2) Iterate
  Int_t icl = 1;
  while(icl < k)
    {
      // find min distance to existing clusters
      for (Int_t j = 0; j < n; j++) {
	Double_t dmin = 1.e10;
	for (Int_t i = 0; i < icl; i++) {
	  Double_t dij = d(mx[i], my[i], x[j], y[j]);
	  if (dij < dmin) dmin = dij;
	} // clusters
	d2.Fill(Float_t(j), dmin);
      } // data points
      // select a new cluster from data points with probability ~d2
      ir = Int_t(d2.GetRandom() + 0.5);
      mx[icl] = x[ir];
      my[icl] = y[ir];
      icl++;
    } // icl
}


Int_t AliKMeansClustering::NTwoSigma(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my, 
					   Double_t* sigma2x, Double_t* sigma2y)
{
  //  
  // Counting the number of data points within 2sigma of a cluster
  //
  Int_t n2sigma = 0;
  for (Int_t j = 0; j < n; j++) {
    Double_t nassign = 0;
    for (Int_t i = 0; i < k; i++) {
      Double_t dx = TMath::Abs(mx[i]-x[j]);
      if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx; 
      Double_t dy = TMath::Abs(my[i]-y[j]);
      if ((dx * dx / sigma2x[i] + dy * dy / sigma2y[i]) < 4.) nassign++;
    } // clusters
    if (nassign > 0) n2sigma++;
  } // data points
  return (n2sigma);
}
Commit	Line	Data
	1	/**************************************************************************
	2	* Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
	3	* *
	4	* Author: The ALICE Off-line Project. *
	5	* Contributors are mentioned in the code where appropriate. *
	6	* *
	7	* Permission to use, copy, modify and distribute this software and its *
	8	* documentation strictly for non-commercial purposes is hereby granted *
	9	* without fee, provided that the above copyright notice appears in all *
	10	* copies and that both the copyright notice and this permission notice *
	11	* appear in the supporting documentation. The authors make no claims *
	12	* about the suitability of this software for any purpose. It is *
	13	* provided "as is" without express or implied warranty. *
	14	**************************************************************************/
	15
	16	// Implemenatation of the K-Means Clustering Algorithm
	17	// See http://en.wikipedia.org/wiki/K-means_clustering and references therein.
	18	//
	19	// This particular implementation is the so called Soft K-means algorithm.
	20	// It has been modified to work on the cylindrical topology in eta-phi space.
	21	//
	22	// Author: Andreas Morsch (CERN)
	23	// andreas.morsch@cern.ch
	24
	25	#include "AliKMeansClustering.h"
	26	#include <TMath.h>
	27	#include <TRandom.h>
	28	#include <TH1F.h>
	29
	30	ClassImp(AliKMeansClustering)
	31
	32	Double_t AliKMeansClustering::fBeta = 10.;
	33
	34
	35	Int_t AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk )
	36	{
	37	//
	38	// The soft K-means algorithm
	39	//
	40	Int_t i,j;
	41	//
	42	// (1) Initialisation of the k means
	43
	44	for (i = 0; i < k; i++) {
	45	mx[i] = 2. * TMath::Pi() * gRandom->Rndm();
	46	my[i] = -1. + 2. * gRandom->Rndm();
	47	}
	48
	49	//
	50	// (2a) The responsibilities
	51	Double_t** r = new Double_t*[n]; // responsibilities
	52	for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
	53	//
	54	// (2b) Normalisation
	55	Double_t* nr = new Double_t[n];
	56	// (3) Iterations
	57	Int_t nit = 0;
	58
	59	while(1) {
	60	nit++;
	61	//
	62	// Assignment step
	63	//
	64	for (j = 0; j < n; j++) {
	65	nr[j] = 0.;
	66	for (i = 0; i < k; i++) {
	67	r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	68	nr[j] += r[j][i];
	69	} // mean i
	70	} // data point j
	71
	72	for (j = 0; j < n; j++) {
	73	for (i = 0; i < k; i++) {
	74	r[j][i] /= nr[j];
	75	} // mean i
	76	} // data point j
	77
	78	//
	79	// Update step
	80	Double_t di = 0;
	81
	82	for (i = 0; i < k; i++) {
	83	Double_t oldx = mx[i];
	84	Double_t oldy = my[i];
	85
	86	mx[i] = x[0];
	87	my[i] = y[0];
	88	rk[i] = r[0][i];
	89
	90	for (j = 1; j < n; j++) {
	91	Double_t xx = x[j];
	92	//
	93	// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
	94	// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
	95
	96	Double_t dx = mx[i] - x[j];
	97	if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
	98	if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	99	mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	100	my[i] = my[i] * rk[i] + r[j][i] * y[j];
	101	rk[i] += r[j][i];
	102	mx[i] /= rk[i];
	103	my[i] /= rk[i];
	104	if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	105	if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
	106	} // Data
	107	di += d(mx[i], my[i], oldx, oldy);
	108	} // means
	109	//
	110	// ending condition
	111	if (di < 1.e-8 \|\| nit > 1000) break;
	112	} // while
	113
	114	// Clean-up
	115	delete[] nr;
	116	for (j = 0; j < n; j++) delete[] r[j];
	117	delete[] r;
	118	//
	119	return (nit < 1000);
	120
	121	}
	122
	123	Int_t AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk )
	124	{
	125	//
	126	// The soft K-means algorithm
	127	//
	128	Int_t i,j;
	129	//
	130	// (1) Initialisation of the k means using k-means++ recipe
	131	//
	132	OptimalInit(k, n, x, y, mx, my);
	133	//
	134	// (2a) The responsibilities
	135	Double_t** r = new Double_t*[n]; // responsibilities
	136	for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
	137	//
	138	// (2b) Normalisation
	139	Double_t* nr = new Double_t[n];
	140	//
	141	// (2c) Weights
	142	Double_t* pi = new Double_t[k];
	143	//
	144	//
	145	// (2d) Initialise the responsibilties and weights
	146	for (j = 0; j < n; j++) {
	147	nr[j] = 0.;
	148	for (i = 0; i < k; i++) {
	149	r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	150	nr[j] += r[j][i];
	151	} // mean i
	152	} // data point j
	153
	154	for (i = 0; i < k; i++) {
	155	rk[i] = 0.;
	156	sigma2[i] = 1./fBeta;
	157
	158	for (j = 0; j < n; j++) {
	159	r[j][i] /= nr[j];
	160	rk[i] += r[j][i];
	161	} // mean i
	162	pi[i] = rk[i] / Double_t(n);
	163	} // data point j
	164	// (3) Iterations
	165	Int_t nit = 0;
	166	Bool_t rmovalStep = kFALSE;
	167
	168	while(1) {
	169	nit++;
	170	//
	171	// Assignment step
	172	//
	173	for (j = 0; j < n; j++) {
	174	nr[j] = 0.;
	175	for (i = 0; i < k; i++) {
	176	r[j][i] = pi[i] * TMath::Exp(- d(mx[i], my[i], x[j], y[j]) / sigma2[i] )
	177	/ (2. * sigma2[i] * TMath::Pi() * TMath::Pi());
	178	nr[j] += r[j][i];
	179	} // mean i
	180	} // data point j
	181
	182	for (i = 0; i < k; i++) {
	183	for (j = 0; j < n; j++) {
	184	r[j][i] /= nr[j];
	185	} // mean i
	186	} // data point j
	187
	188	//
	189	// Update step
	190	Double_t di = 0;
	191
	192	for (i = 0; i < k; i++) {
	193	Double_t oldx = mx[i];
	194	Double_t oldy = my[i];
	195
	196	mx[i] = x[0];
	197	my[i] = y[0];
	198	rk[i] = r[0][i];
	199	for (j = 1; j < n; j++) {
	200	Double_t xx = x[j];
	201	//
	202	// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
	203	// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
	204
	205	Double_t dx = mx[i] - x[j];
	206	if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
	207	if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	208	if (r[j][i] > 1.e-15) {
	209	mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	210	my[i] = my[i] * rk[i] + r[j][i] * y[j];
	211	rk[i] += r[j][i];
	212	mx[i] /= rk[i];
	213	my[i] /= rk[i];
	214	}
	215	if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	216	if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
	217	} // Data
	218	di += d(mx[i], my[i], oldx, oldy);
	219
	220	} // means
	221	//
	222	// Sigma
	223	for (i = 0; i < k; i++) {
	224	sigma2[i] = 0.;
	225	for (j = 1; j < n; j++) {
	226	sigma2[i] += r[j][i] * d(mx[i], my[i], x[j], y[j]);
	227	} // Data
	228	sigma2[i] /= rk[i];
	229	if (sigma2[i] < 0.0025) sigma2[i] = 0.0025;
	230	} // Clusters
	231	//
	232	// Fractions
	233	for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
	234	//
	235	// ending condition
	236	if (di < 1.e-8 \|\| nit > 1000) break;
	237	} // while
	238
	239	// Clean-up
	240	delete[] nr;
	241	delete[] pi;
	242	for (j = 0; j < n; j++) delete[] r[j];
	243	delete[] r;
	244	//
	245	return (nit < 1000);
	246	}
	247
	248	Int_t AliKMeansClustering::SoftKMeans3(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my ,
	249	Double_t* sigmax2, Double_t* sigmay2, Double_t* rk )
	250	{
	251	//
	252	// The soft K-means algorithm
	253	//
	254	Int_t i,j;
	255	//
	256	// (1) Initialisation of the k means using k-means++ recipe
	257	//
	258	OptimalInit(k, n, x, y, mx, my);
	259	//
	260	// (2a) The responsibilities
	261	Double_t** r = new Double_t*[n]; // responsibilities
	262	for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
	263	//
	264	// (2b) Normalisation
	265	Double_t* nr = new Double_t[n];
	266	//
	267	// (2c) Weights
	268	Double_t* pi = new Double_t[k];
	269	//
	270	//
	271	// (2d) Initialise the responsibilties and weights
	272	for (j = 0; j < n; j++) {
	273	nr[j] = 0.;
	274	for (i = 0; i < k; i++) {
	275
	276	r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
	277	nr[j] += r[j][i];
	278	} // mean i
	279	} // data point j
	280
	281	for (i = 0; i < k; i++) {
	282	rk[i] = 0.;
	283	sigmax2[i] = 1./fBeta;
	284	sigmay2[i] = 1./fBeta;
	285
	286	for (j = 0; j < n; j++) {
	287	r[j][i] /= nr[j];
	288	rk[i] += r[j][i];
	289	} // mean i
	290	pi[i] = rk[i] / Double_t(n);
	291	} // data point j
	292	// (3) Iterations
	293	Int_t nit = 0;
	294	Bool_t rmovalStep = kFALSE;
	295
	296	while(1) {
	297	nit++;
	298	//
	299	// Assignment step
	300	//
	301	for (j = 0; j < n; j++) {
	302	nr[j] = 0.;
	303	for (i = 0; i < k; i++) {
	304
	305	Double_t dx = TMath::Abs(mx[i]-x[j]);
	306	if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	307	Double_t dy = TMath::Abs(my[i]-y[j]);
	308	r[j][i] = pi[i] * TMath::Exp(-0.5 * (dx * dx / sigmax2[i] + dy * dy / sigmay2[i]))
	309	/ (2. * TMath::Sqrt(sigmax2[i] * sigmay2[i]) * TMath::Pi() * TMath::Pi());
	310	nr[j] += r[j][i];
	311	} // mean i
	312	} // data point j
	313
	314	for (i = 0; i < k; i++) {
	315	for (j = 0; j < n; j++) {
	316	r[j][i] /= nr[j];
	317	} // mean i
	318	} // data point j
	319
	320	//
	321	// Update step
	322	Double_t di = 0;
	323
	324	for (i = 0; i < k; i++) {
	325	Double_t oldx = mx[i];
	326	Double_t oldy = my[i];
	327
	328	mx[i] = x[0];
	329	my[i] = y[0];
	330	rk[i] = r[0][i];
	331	for (j = 1; j < n; j++) {
	332	Double_t xx = x[j];
	333	//
	334	// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
	335	// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
	336
	337	Double_t dx = mx[i] - x[j];
	338	if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
	339	if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
	340	if (r[j][i] > 1.e-15) {
	341	mx[i] = mx[i] * rk[i] + r[j][i] * xx;
	342	my[i] = my[i] * rk[i] + r[j][i] * y[j];
	343	rk[i] += r[j][i];
	344	mx[i] /= rk[i];
	345	my[i] /= rk[i];
	346	}
	347	if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
	348	if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
	349	} // Data
	350	di += d(mx[i], my[i], oldx, oldy);
	351
	352	} // means
	353	//
	354	// Sigma
	355	for (i = 0; i < k; i++) {
	356	sigmax2[i] = 0.;
	357	sigmay2[i] = 0.;
	358
	359	for (j = 1; j < n; j++) {
	360	Double_t dx = TMath::Abs(mx[i]-x[j]);
	361	if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	362	Double_t dy = TMath::Abs(my[i]-y[j]);
	363	sigmax2[i] += r[j][i] * dx * dx;
	364	sigmay2[i] += r[j][i] * dy * dy;
	365	} // Data
	366	sigmax2[i] /= rk[i];
	367	sigmay2[i] /= rk[i];
	368	if (sigmax2[i] < 0.0025) sigmax2[i] = 0.0025;
	369	if (sigmay2[i] < 0.0025) sigmay2[i] = 0.0025;
	370	} // Clusters
	371	//
	372	// Fractions
	373	for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
	374	//
	375	// ending condition
	376	if (di < 1.e-8 \|\| nit > 1000) break;
	377	} // while
	378
	379	// Clean-up
	380	delete[] nr;
	381	delete[] pi;
	382	for (j = 0; j < n; j++) delete[] r[j];
	383	delete[] r;
	384	//
	385	return (nit < 1000);
	386	}
	387
	388	Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y)
	389	{
	390	//
	391	// Distance definition
	392	// Quasi - Euclidian on the eta-phi cylinder
	393
	394	Double_t dx = TMath::Abs(mx-x);
	395	if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	396
	397	return (0.5(dx dx + (my - y) * (my - y)));
	398	}
	399
	400
	401
	402	void AliKMeansClustering::OptimalInit(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my)
	403	{
	404	//
	405	// Optimal initialisation using the k-means++ algorithm
	406	// http://en.wikipedia.org/wiki/K-means%2B%2B
	407	//
	408	// k-means++ is an algorithm for choosing the initial values for k-means clustering in statistics and machine learning.
	409	// It was proposed in 2007 by David Arthur and Sergei Vassilvitskii as an approximation algorithm for the NP-hard k-means problem---
	410	// a way of avoiding the sometimes poor clusterings found by the standard k-means algorithm.
	411	//
	412	//
	413	TH1F d2("d2", "", n, -0.5, Float_t(n)-0.5);
	414	d2.Reset();
	415
	416	// (1) Chose first center as a random point among the input data.
	417	Int_t ir = Int_t(Float_t(n) * gRandom->Rndm());
	418	mx[0] = x[ir];
	419	my[0] = y[ir];
	420
	421	// (2) Iterate
	422	Int_t icl = 1;
	423	while(icl < k)
	424	{
	425	// find min distance to existing clusters
	426	for (Int_t j = 0; j < n; j++) {
	427	Double_t dmin = 1.e10;
	428	for (Int_t i = 0; i < icl; i++) {
	429	Double_t dij = d(mx[i], my[i], x[j], y[j]);
	430	if (dij < dmin) dmin = dij;
	431	} // clusters
	432	d2.Fill(Float_t(j), dmin);
	433	} // data points
	434	// select a new cluster from data points with probability ~d2
	435	ir = Int_t(d2.GetRandom() + 0.5);
	436	mx[icl] = x[ir];
	437	my[icl] = y[ir];
	438	icl++;
	439	} // icl
	440	}
	441
	442
	443	Int_t AliKMeansClustering::NTwoSigma(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my,
	444	Double_t* sigma2x, Double_t* sigma2y)
	445	{
	446	//
	447	// Counting the number of data points within 2sigma of a cluster
	448	//
	449	Int_t n2sigma = 0;
	450	for (Int_t j = 0; j < n; j++) {
	451	Double_t nassign = 0;
	452	for (Int_t i = 0; i < k; i++) {
	453	Double_t dx = TMath::Abs(mx[i]-x[j]);
	454	if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
	455	Double_t dy = TMath::Abs(my[i]-y[j]);
	456	if ((dx * dx / sigma2x[i] + dy * dy / sigma2y[i]) < 4.) nassign++;
	457	} // clusters
	458	if (nassign > 0) n2sigma++;
	459	} // data points
	460	return (n2sigma);
	461	}
	462
	463