output of the slice tracker is compressed in order to minimize the network traffic
[u/mrichter/AliRoot.git] / JETAN / AliKMeansClustering.cxx
CommitLineData
70f2ce9d 1/**************************************************************************
2 * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. *
3 * *
4 * Author: The ALICE Off-line Project. *
5 * Contributors are mentioned in the code where appropriate. *
6 * *
7 * Permission to use, copy, modify and distribute this software and its *
8 * documentation strictly for non-commercial purposes is hereby granted *
9 * without fee, provided that the above copyright notice appears in all *
10 * copies and that both the copyright notice and this permission notice *
11 * appear in the supporting documentation. The authors make no claims *
12 * about the suitability of this software for any purpose. It is *
13 * provided "as is" without express or implied warranty. *
14 **************************************************************************/
15
16// Implemenatation of the K-Means Clustering Algorithm
17// See http://en.wikipedia.org/wiki/K-means_clustering and references therein.
18//
19// This particular implementation is the so called Soft K-means algorithm.
20// It has been modified to work on the cylindrical topology in eta-phi space.
21//
22// Author: Andreas Morsch (CERN)
23// andreas.morsch@cern.ch
24
25#include "AliKMeansClustering.h"
26#include <TMath.h>
27#include <TRandom.h>
5145d1b5 28#include <TH1F.h>
70f2ce9d 29
30ClassImp(AliKMeansClustering)
31
32Double_t AliKMeansClustering::fBeta = 10.;
5145d1b5 33
70f2ce9d 34
b7aa0494 35Int_t AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk )
70f2ce9d 36{
37 //
38 // The soft K-means algorithm
39 //
40 Int_t i,j;
41 //
42 // (1) Initialisation of the k means
43
44 for (i = 0; i < k; i++) {
45 mx[i] = 2. * TMath::Pi() * gRandom->Rndm();
46 my[i] = -1. + 2. * gRandom->Rndm();
47 }
48
49 //
50 // (2a) The responsibilities
51 Double_t** r = new Double_t*[n]; // responsibilities
52 for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
53 //
54 // (2b) Normalisation
5145d1b5 55 Double_t* nr = new Double_t[n];
70f2ce9d 56 // (3) Iterations
57 Int_t nit = 0;
58
59 while(1) {
60 nit++;
61 //
62 // Assignment step
63 //
64 for (j = 0; j < n; j++) {
65 nr[j] = 0.;
66 for (i = 0; i < k; i++) {
67 r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
68 nr[j] += r[j][i];
69 } // mean i
70 } // data point j
71
72 for (j = 0; j < n; j++) {
73 for (i = 0; i < k; i++) {
74 r[j][i] /= nr[j];
75 } // mean i
76 } // data point j
77
78 //
79 // Update step
80 Double_t di = 0;
81
82 for (i = 0; i < k; i++) {
83 Double_t oldx = mx[i];
84 Double_t oldy = my[i];
85
86 mx[i] = x[0];
87 my[i] = y[0];
88 rk[i] = r[0][i];
89
90 for (j = 1; j < n; j++) {
91 Double_t xx = x[j];
92//
93// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
94// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
95
96 Double_t dx = mx[i] - x[j];
97 if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
98 if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
99 mx[i] = mx[i] * rk[i] + r[j][i] * xx;
100 my[i] = my[i] * rk[i] + r[j][i] * y[j];
101 rk[i] += r[j][i];
102 mx[i] /= rk[i];
103 my[i] /= rk[i];
104 if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
105 if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
106 } // Data
107 di += d(mx[i], my[i], oldx, oldy);
108 } // means
109 //
110 // ending condition
111 if (di < 1.e-8 || nit > 1000) break;
112 } // while
113
114// Clean-up
115 delete[] nr;
116 for (j = 0; j < n; j++) delete[] r[j];
117 delete[] r;
b7aa0494 118//
119 return (nit < 1000);
120
70f2ce9d 121}
122
b7aa0494 123Int_t AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk )
77f42a25 124{
125 //
126 // The soft K-means algorithm
127 //
128 Int_t i,j;
129 //
5145d1b5 130 // (1) Initialisation of the k means using k-means++ recipe
77f42a25 131 //
5145d1b5 132 OptimalInit(k, n, x, y, mx, my);
77f42a25 133 //
134 // (2a) The responsibilities
135 Double_t** r = new Double_t*[n]; // responsibilities
136 for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
137 //
138 // (2b) Normalisation
139 Double_t* nr = new Double_t[n];
140 //
141 // (2c) Weights
142 Double_t* pi = new Double_t[k];
143 //
5145d1b5 144 //
77f42a25 145 // (2d) Initialise the responsibilties and weights
146 for (j = 0; j < n; j++) {
147 nr[j] = 0.;
148 for (i = 0; i < k; i++) {
149 r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
150 nr[j] += r[j][i];
151 } // mean i
152 } // data point j
153
154 for (i = 0; i < k; i++) {
155 rk[i] = 0.;
156 sigma2[i] = 1./fBeta;
157
158 for (j = 0; j < n; j++) {
159 r[j][i] /= nr[j];
160 rk[i] += r[j][i];
161 } // mean i
162 pi[i] = rk[i] / Double_t(n);
163 } // data point j
77f42a25 164 // (3) Iterations
165 Int_t nit = 0;
5145d1b5 166 Bool_t rmovalStep = kFALSE;
167
77f42a25 168 while(1) {
169 nit++;
170 //
171 // Assignment step
172 //
173 for (j = 0; j < n; j++) {
174 nr[j] = 0.;
175 for (i = 0; i < k; i++) {
5b3f00b2 176 r[j][i] = pi[i] * TMath::Exp(- d(mx[i], my[i], x[j], y[j]) / sigma2[i] )
177 / (2. * sigma2[i] * TMath::Pi() * TMath::Pi());
77f42a25 178 nr[j] += r[j][i];
179 } // mean i
180 } // data point j
181
182 for (i = 0; i < k; i++) {
183 for (j = 0; j < n; j++) {
184 r[j][i] /= nr[j];
185 } // mean i
186 } // data point j
187
188 //
189 // Update step
190 Double_t di = 0;
191
192 for (i = 0; i < k; i++) {
193 Double_t oldx = mx[i];
194 Double_t oldy = my[i];
195
196 mx[i] = x[0];
197 my[i] = y[0];
198 rk[i] = r[0][i];
77f42a25 199 for (j = 1; j < n; j++) {
200 Double_t xx = x[j];
201//
202// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
203// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
204
205 Double_t dx = mx[i] - x[j];
206 if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
207 if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
987ff6ef 208 if (r[j][i] > 1.e-15) {
209 mx[i] = mx[i] * rk[i] + r[j][i] * xx;
210 my[i] = my[i] * rk[i] + r[j][i] * y[j];
211 rk[i] += r[j][i];
212 mx[i] /= rk[i];
213 my[i] /= rk[i];
214 }
77f42a25 215 if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
216 if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
217 } // Data
218 di += d(mx[i], my[i], oldx, oldy);
f2b222ea 219
77f42a25 220 } // means
221 //
222 // Sigma
223 for (i = 0; i < k; i++) {
224 sigma2[i] = 0.;
225 for (j = 1; j < n; j++) {
5b3f00b2 226 sigma2[i] += r[j][i] * d(mx[i], my[i], x[j], y[j]);
77f42a25 227 } // Data
228 sigma2[i] /= rk[i];
b7aa0494 229 if (sigma2[i] < 0.0025) sigma2[i] = 0.0025;
77f42a25 230 } // Clusters
231 //
232 // Fractions
233 for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
234 //
235// ending condition
236 if (di < 1.e-8 || nit > 1000) break;
237 } // while
238
239// Clean-up
240 delete[] nr;
241 delete[] pi;
242 for (j = 0; j < n; j++) delete[] r[j];
243 delete[] r;
b7aa0494 244//
245 return (nit < 1000);
77f42a25 246}
247
d31bcb3b 248Int_t AliKMeansClustering::SoftKMeans3(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my ,
249 Double_t* sigmax2, Double_t* sigmay2, Double_t* rk )
250{
251 //
252 // The soft K-means algorithm
253 //
254 Int_t i,j;
255 //
256 // (1) Initialisation of the k means using k-means++ recipe
257 //
258 OptimalInit(k, n, x, y, mx, my);
259 //
260 // (2a) The responsibilities
261 Double_t** r = new Double_t*[n]; // responsibilities
262 for (j = 0; j < n; j++) {r[j] = new Double_t[k];}
263 //
264 // (2b) Normalisation
265 Double_t* nr = new Double_t[n];
266 //
267 // (2c) Weights
268 Double_t* pi = new Double_t[k];
269 //
270 //
271 // (2d) Initialise the responsibilties and weights
272 for (j = 0; j < n; j++) {
273 nr[j] = 0.;
274 for (i = 0; i < k; i++) {
275
276 r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j]));
277 nr[j] += r[j][i];
278 } // mean i
279 } // data point j
280
281 for (i = 0; i < k; i++) {
282 rk[i] = 0.;
283 sigmax2[i] = 1./fBeta;
284 sigmay2[i] = 1./fBeta;
285
286 for (j = 0; j < n; j++) {
287 r[j][i] /= nr[j];
288 rk[i] += r[j][i];
289 } // mean i
290 pi[i] = rk[i] / Double_t(n);
291 } // data point j
292 // (3) Iterations
293 Int_t nit = 0;
294 Bool_t rmovalStep = kFALSE;
295
296 while(1) {
297 nit++;
298 //
299 // Assignment step
300 //
301 for (j = 0; j < n; j++) {
302 nr[j] = 0.;
303 for (i = 0; i < k; i++) {
304
305 Double_t dx = TMath::Abs(mx[i]-x[j]);
306 if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
307 Double_t dy = TMath::Abs(my[i]-y[j]);
308 r[j][i] = pi[i] * TMath::Exp(-0.5 * (dx * dx / sigmax2[i] + dy * dy / sigmay2[i]))
309 / (2. * TMath::Sqrt(sigmax2[i] * sigmay2[i]) * TMath::Pi() * TMath::Pi());
310 nr[j] += r[j][i];
311 } // mean i
312 } // data point j
313
314 for (i = 0; i < k; i++) {
315 for (j = 0; j < n; j++) {
316 r[j][i] /= nr[j];
317 } // mean i
318 } // data point j
319
320 //
321 // Update step
322 Double_t di = 0;
323
324 for (i = 0; i < k; i++) {
325 Double_t oldx = mx[i];
326 Double_t oldy = my[i];
327
328 mx[i] = x[0];
329 my[i] = y[0];
330 rk[i] = r[0][i];
331 for (j = 1; j < n; j++) {
332 Double_t xx = x[j];
333//
334// Here we have to take into acount the cylinder topology where phi is defined mod 2xpi
335// If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi
336
337 Double_t dx = mx[i] - x[j];
338 if (dx > TMath::Pi()) xx += 2. * TMath::Pi();
339 if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi();
340 if (r[j][i] > 1.e-15) {
341 mx[i] = mx[i] * rk[i] + r[j][i] * xx;
342 my[i] = my[i] * rk[i] + r[j][i] * y[j];
343 rk[i] += r[j][i];
344 mx[i] /= rk[i];
345 my[i] /= rk[i];
346 }
347 if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi();
348 if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi();
349 } // Data
350 di += d(mx[i], my[i], oldx, oldy);
351
352 } // means
353 //
354 // Sigma
355 for (i = 0; i < k; i++) {
356 sigmax2[i] = 0.;
357 sigmay2[i] = 0.;
358
359 for (j = 1; j < n; j++) {
360 Double_t dx = TMath::Abs(mx[i]-x[j]);
361 if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
362 Double_t dy = TMath::Abs(my[i]-y[j]);
363 sigmax2[i] += r[j][i] * dx * dx;
364 sigmay2[i] += r[j][i] * dy * dy;
365 } // Data
366 sigmax2[i] /= rk[i];
367 sigmay2[i] /= rk[i];
368 if (sigmax2[i] < 0.0025) sigmax2[i] = 0.0025;
369 if (sigmay2[i] < 0.0025) sigmay2[i] = 0.0025;
370 } // Clusters
371 //
372 // Fractions
373 for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n);
374 //
375// ending condition
376 if (di < 1.e-8 || nit > 1000) break;
377 } // while
378
379// Clean-up
380 delete[] nr;
381 delete[] pi;
382 for (j = 0; j < n; j++) delete[] r[j];
383 delete[] r;
384//
385 return (nit < 1000);
386}
387
70f2ce9d 388Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y)
389{
390 //
391 // Distance definition
392 // Quasi - Euclidian on the eta-phi cylinder
393
394 Double_t dx = TMath::Abs(mx-x);
395 if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx;
396
8422e0a8 397 return (0.5*(dx * dx + (my - y) * (my - y)));
70f2ce9d 398}
5145d1b5 399
400
401
402void AliKMeansClustering::OptimalInit(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my)
403{
404 //
405 // Optimal initialisation using the k-means++ algorithm
406 // http://en.wikipedia.org/wiki/K-means%2B%2B
407 //
408 // k-means++ is an algorithm for choosing the initial values for k-means clustering in statistics and machine learning.
409 // It was proposed in 2007 by David Arthur and Sergei Vassilvitskii as an approximation algorithm for the NP-hard k-means problem---
410 // a way of avoiding the sometimes poor clusterings found by the standard k-means algorithm.
411 //
412 //
413 TH1F d2("d2", "", n, -0.5, Float_t(n)-0.5);
414 d2.Reset();
415
416 // (1) Chose first center as a random point among the input data.
417 Int_t ir = Int_t(Float_t(n) * gRandom->Rndm());
418 mx[0] = x[ir];
419 my[0] = y[ir];
420
421 // (2) Iterate
422 Int_t icl = 1;
423 while(icl < k)
424 {
425 // find min distance to existing clusters
426 for (Int_t j = 0; j < n; j++) {
427 Double_t dmin = 1.e10;
428 for (Int_t i = 0; i < icl; i++) {
429 Double_t dij = d(mx[i], my[i], x[j], y[j]);
430 if (dij < dmin) dmin = dij;
431 } // clusters
432 d2.Fill(Float_t(j), dmin);
433 } // data points
434 // select a new cluster from data points with probability ~d2
987ff6ef 435 ir = Int_t(d2.GetRandom() + 0.5);
5145d1b5 436 mx[icl] = x[ir];
437 my[icl] = y[ir];
438 icl++;
439 } // icl
440}
441
442
19c5a36a 443ClassImp(AliKMeansResult)
444
445
446
447AliKMeansResult::AliKMeansResult(Int_t k):
448 TObject(),
449 fK(k),
450 fMx (new Double_t[k]),
451 fMy (new Double_t[k]),
452 fSigma2(new Double_t[k]),
453 fRk (new Double_t[k]),
454 fTarget(new Double_t[k]),
455 fInd (new Int_t[k])
fe366828 456{
19c5a36a 457// Constructor
458}
459
460AliKMeansResult::~AliKMeansResult()
461{
462// Destructor
463 delete[] fMx;
464 delete[] fMy;
465 delete[] fSigma2;
466 delete[] fRk;
467 delete[] fInd;
468 delete[] fTarget;
469}
470
471void AliKMeansResult::Sort()
472{
473// Sort clusters
474 for (Int_t i = 0; i < fK; i++) {
475 if (fRk[i] > 1.) fRk[i] /= fSigma2[i];
476 else fRk[i] = 0.;
477 }
478
479 TMath::Sort(fK, fRk, fInd);
480
fe366828 481}
482
19c5a36a 483void AliKMeansResult::Sort(Int_t n, Double_t* x, Double_t* y)
484{
485 // Build target array and sort
486 for (Int_t i = 0; i < fK; i++)
487 {
488 Int_t nc = 0;
489 for (Int_t j = 0; j < n; j++)
490 {
491 if (2. * AliKMeansClustering::d(fMx[i], fMy[i], x[j], y[j]) < fSigma2[i]) nc++;
492 }
493 if (nc > 1) {
494 fTarget[i] = Double_t(nc) / fSigma2[i];
495 } else {
496 fTarget[i] = 0.;
497 }
498 }
fe366828 499
19c5a36a 500 TMath::Sort(fK, fTarget, fInd);
501}