update from Per Thomas
[u/mrichter/AliRoot.git] / JETAN / AliKMeansClustering.cxx
CommitLineData
70f2ce9d 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16// Implemenatation of the K-Means Clustering Algorithm
17// See http://en.wikipedia.org/wiki/K-means_clustering and references therein.
18//
19// This particular implementation is the so called Soft K-means algorithm.
20// It has been modified to work on the cylindrical topology in eta-phi space.
21//
22// Author: Andreas Morsch (CERN)
23// andreas.morsch@cern.ch
24
25#include "AliKMeansClustering.h"
26#include <TMath.h>
27#include <TRandom.h>
28
29ClassImp(AliKMeansClustering)
30
31Double_t AliKMeansClustering::fBeta = 10.;
32
33void AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk )
34{
35 //
36 // The soft K-means algorithm
37 //
38 Int_t i,j;
39 //
40 // (1) Initialisation of the k means
41
42 for (i = 0; i < k; i++) {
43 mx[i] = 2. * TMath::Pi() * gRandom->Rndm();
44 my[i] = -1. + 2. * gRandom->Rndm();
45 }
46
47 //
48 // (2a) The responsibilities
49 Double_t** r = new Double_t*[n]; // responsibilities
50 for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
51 //
52 // (2b) Normalisation
53 Double_t* nr = new Double_t[n];
54
55 // (3) Iterations
56 Int_t nit = 0;
57
58 while(1) {
59 nit++;
60 //
61 // Assignment step
62 //
63 for (j = 0; j < n; j++) {
64 nr[j] = 0.;
65 for (i = 0; i < k; i++) {
66 r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
67 nr[j] += r[j][i];
68 } // mean i
69 } // data point j
70
71 for (j = 0; j < n; j++) {
72 for (i = 0; i < k; i++) {
73 r[j][i] /= nr[j];
74 } // mean i
75 } // data point j
76
77 //
78 // Update step
79 Double_t di = 0;
80
81 for (i = 0; i < k; i++) {
82 Double_t oldx = mx[i];
83 Double_t oldy = my[i];
84
85 mx[i] = x[0];
86 my[i] = y[0];
87 rk[i] = r[0][i];
88
89 for (j = 1; j < n; j++) {
90 Double_t xx = x[j];
91//
92// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
93// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
94
95 Double_t dx = mx[i] - x[j];
96 if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
97 if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
98 mx[i] = mx[i] * rk[i] + r[j][i] * xx;
99 my[i] = my[i] * rk[i] + r[j][i] * y[j];
100 rk[i] += r[j][i];
101 mx[i] /= rk[i];
102 my[i] /= rk[i];
103 if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
104 if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
105 } // Data
106 di += d(mx[i], my[i], oldx, oldy);
107 } // means
108 //
109 // ending condition
110 if (di < 1.e-8 || nit > 1000) break;
111 } // while
112
113// Clean-up
114 delete[] nr;
115 for (j = 0; j < n; j++) delete[] r[j];
116 delete[] r;
117}
118
77f42a25 119void AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk )
120{
121 //
122 // The soft K-means algorithm
123 //
124 Int_t i,j;
125 //
126 // (1) Initialisation of the k means
127 //
128 // (there is an optimized version for initialisation called kmeans++)
129 for (i = 0; i < k; i++) {
130 mx[i] = 2. * TMath::Pi() * gRandom->Rndm();
131 my[i] = -1. + 2. * gRandom->Rndm();
132 }
133
134 //
135 // (2a) The responsibilities
136 Double_t** r = new Double_t*[n]; // responsibilities
137 for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
138 //
139 // (2b) Normalisation
140 Double_t* nr = new Double_t[n];
141 //
142 // (2c) Weights
143 Double_t* pi = new Double_t[k];
144 //
145 // (2d) Initialise the responsibilties and weights
146 for (j = 0; j < n; j++) {
147 nr[j] = 0.;
148 for (i = 0; i < k; i++) {
149 r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
150 nr[j] += r[j][i];
151 } // mean i
152 } // data point j
153
154 for (i = 0; i < k; i++) {
155 rk[i] = 0.;
156 sigma2[i] = 1./fBeta;
157
158 for (j = 0; j < n; j++) {
159 r[j][i] /= nr[j];
160 rk[i] += r[j][i];
161 } // mean i
162 pi[i] = rk[i] / Double_t(n);
163 } // data point j
77f42a25 164 // (3) Iterations
165 Int_t nit = 0;
166
167 while(1) {
168 nit++;
169 //
170 // Assignment step
171 //
172 for (j = 0; j < n; j++) {
173 nr[j] = 0.;
174 for (i = 0; i < k; i++) {
5b3f00b2 175 r[j][i] = pi[i] * TMath::Exp(- d(mx[i], my[i], x[j], y[j]) / sigma2[i] )
176 / (2. * sigma2[i] * TMath::Pi() * TMath::Pi());
77f42a25 177 nr[j] += r[j][i];
178 } // mean i
179 } // data point j
180
181 for (i = 0; i < k; i++) {
182 for (j = 0; j < n; j++) {
183 r[j][i] /= nr[j];
184 } // mean i
185 } // data point j
186
187 //
188 // Update step
189 Double_t di = 0;
190
191 for (i = 0; i < k; i++) {
192 Double_t oldx = mx[i];
193 Double_t oldy = my[i];
194
195 mx[i] = x[0];
196 my[i] = y[0];
197 rk[i] = r[0][i];
198
199 for (j = 1; j < n; j++) {
200 Double_t xx = x[j];
201//
202// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
203// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
204
205 Double_t dx = mx[i] - x[j];
206 if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
207 if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
208 mx[i] = mx[i] * rk[i] + r[j][i] * xx;
209 my[i] = my[i] * rk[i] + r[j][i] * y[j];
210 rk[i] += r[j][i];
211 mx[i] /= rk[i];
212 my[i] /= rk[i];
f2b222ea 213
77f42a25 214 if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
215 if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
216 } // Data
217 di += d(mx[i], my[i], oldx, oldy);
f2b222ea 218
77f42a25 219 } // means
220 //
221 // Sigma
222 for (i = 0; i < k; i++) {
223 sigma2[i] = 0.;
224 for (j = 1; j < n; j++) {
5b3f00b2 225 sigma2[i] += r[j][i] * d(mx[i], my[i], x[j], y[j]);
77f42a25 226 } // Data
227 sigma2[i] /= rk[i];
f2b222ea 228 if (sigma2[i] < 0.025) sigma2[i] = 0.025;
77f42a25 229 } // Clusters
230 //
231 // Fractions
232 for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
233 //
234// ending condition
235 if (di < 1.e-8 || nit > 1000) break;
236 } // while
237
238// Clean-up
239 delete[] nr;
240 delete[] pi;
241 for (j = 0; j < n; j++) delete[] r[j];
242 delete[] r;
243}
244
70f2ce9d 245Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y)
246{
247 //
248 // Distance definition
249 // Quasi - Euclidian on the eta-phi cylinder
250
251 Double_t dx = TMath::Abs(mx-x);
252 if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
253
8422e0a8 254 return (0.5*(dx * dx + (my - y) * (my - y)));
70f2ce9d 255}