70f2ce9d |
1 | /************************************************************************** |
2 | * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. * |
3 | * * |
4 | * Author: The ALICE Off-line Project. * |
5 | * Contributors are mentioned in the code where appropriate. * |
6 | * * |
7 | * Permission to use, copy, modify and distribute this software and its * |
8 | * documentation strictly for non-commercial purposes is hereby granted * |
9 | * without fee, provided that the above copyright notice appears in all * |
10 | * copies and that both the copyright notice and this permission notice * |
11 | * appear in the supporting documentation. The authors make no claims * |
12 | * about the suitability of this software for any purpose. It is * |
13 | * provided "as is" without express or implied warranty. * |
14 | **************************************************************************/ |
15 | |
16 | // Implemenatation of the K-Means Clustering Algorithm |
17 | // See http://en.wikipedia.org/wiki/K-means_clustering and references therein. |
18 | // |
19 | // This particular implementation is the so called Soft K-means algorithm. |
20 | // It has been modified to work on the cylindrical topology in eta-phi space. |
21 | // |
22 | // Author: Andreas Morsch (CERN) |
23 | // andreas.morsch@cern.ch |
24 | |
25 | #include "AliKMeansClustering.h" |
26 | #include <TMath.h> |
27 | #include <TRandom.h> |
5145d1b5 |
28 | #include <TH1F.h> |
70f2ce9d |
29 | |
30 | ClassImp(AliKMeansClustering) |
31 | |
32 | Double_t AliKMeansClustering::fBeta = 10.; |
5145d1b5 |
33 | |
70f2ce9d |
34 | |
b7aa0494 |
35 | Int_t AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk ) |
70f2ce9d |
36 | { |
37 | // |
38 | // The soft K-means algorithm |
39 | // |
40 | Int_t i,j; |
41 | // |
42 | // (1) Initialisation of the k means |
43 | |
44 | for (i = 0; i < k; i++) { |
45 | mx[i] = 2. * TMath::Pi() * gRandom->Rndm(); |
46 | my[i] = -1. + 2. * gRandom->Rndm(); |
47 | } |
48 | |
49 | // |
50 | // (2a) The responsibilities |
51 | Double_t** r = new Double_t*[n]; // responsibilities |
52 | for (j = 0; j < n; j++) {r[j] = new Double_t[k];} |
53 | // |
54 | // (2b) Normalisation |
5145d1b5 |
55 | Double_t* nr = new Double_t[n]; |
70f2ce9d |
56 | // (3) Iterations |
57 | Int_t nit = 0; |
58 | |
59 | while(1) { |
60 | nit++; |
61 | // |
62 | // Assignment step |
63 | // |
64 | for (j = 0; j < n; j++) { |
65 | nr[j] = 0.; |
66 | for (i = 0; i < k; i++) { |
67 | r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])); |
68 | nr[j] += r[j][i]; |
69 | } // mean i |
70 | } // data point j |
71 | |
72 | for (j = 0; j < n; j++) { |
73 | for (i = 0; i < k; i++) { |
74 | r[j][i] /= nr[j]; |
75 | } // mean i |
76 | } // data point j |
77 | |
78 | // |
79 | // Update step |
80 | Double_t di = 0; |
81 | |
82 | for (i = 0; i < k; i++) { |
83 | Double_t oldx = mx[i]; |
84 | Double_t oldy = my[i]; |
85 | |
86 | mx[i] = x[0]; |
87 | my[i] = y[0]; |
88 | rk[i] = r[0][i]; |
89 | |
90 | for (j = 1; j < n; j++) { |
91 | Double_t xx = x[j]; |
92 | // |
93 | // Here we have to take into acount the cylinder topology where phi is defined mod 2xpi |
94 | // If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi |
95 | |
96 | Double_t dx = mx[i] - x[j]; |
97 | if (dx > TMath::Pi()) xx += 2. * TMath::Pi(); |
98 | if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi(); |
99 | mx[i] = mx[i] * rk[i] + r[j][i] * xx; |
100 | my[i] = my[i] * rk[i] + r[j][i] * y[j]; |
101 | rk[i] += r[j][i]; |
102 | mx[i] /= rk[i]; |
103 | my[i] /= rk[i]; |
104 | if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi(); |
105 | if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi(); |
106 | } // Data |
107 | di += d(mx[i], my[i], oldx, oldy); |
108 | } // means |
109 | // |
110 | // ending condition |
111 | if (di < 1.e-8 || nit > 1000) break; |
112 | } // while |
113 | |
114 | // Clean-up |
115 | delete[] nr; |
116 | for (j = 0; j < n; j++) delete[] r[j]; |
117 | delete[] r; |
b7aa0494 |
118 | // |
119 | return (nit < 1000); |
120 | |
70f2ce9d |
121 | } |
122 | |
b7aa0494 |
123 | Int_t AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk ) |
77f42a25 |
124 | { |
125 | // |
126 | // The soft K-means algorithm |
127 | // |
128 | Int_t i,j; |
129 | // |
5145d1b5 |
130 | // (1) Initialisation of the k means using k-means++ recipe |
77f42a25 |
131 | // |
5145d1b5 |
132 | OptimalInit(k, n, x, y, mx, my); |
77f42a25 |
133 | // |
134 | // (2a) The responsibilities |
135 | Double_t** r = new Double_t*[n]; // responsibilities |
136 | for (j = 0; j < n; j++) {r[j] = new Double_t[k];} |
137 | // |
138 | // (2b) Normalisation |
139 | Double_t* nr = new Double_t[n]; |
140 | // |
141 | // (2c) Weights |
142 | Double_t* pi = new Double_t[k]; |
143 | // |
5145d1b5 |
144 | // |
77f42a25 |
145 | // (2d) Initialise the responsibilties and weights |
146 | for (j = 0; j < n; j++) { |
147 | nr[j] = 0.; |
148 | for (i = 0; i < k; i++) { |
149 | r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])); |
150 | nr[j] += r[j][i]; |
151 | } // mean i |
152 | } // data point j |
153 | |
154 | for (i = 0; i < k; i++) { |
155 | rk[i] = 0.; |
156 | sigma2[i] = 1./fBeta; |
157 | |
158 | for (j = 0; j < n; j++) { |
159 | r[j][i] /= nr[j]; |
160 | rk[i] += r[j][i]; |
161 | } // mean i |
162 | pi[i] = rk[i] / Double_t(n); |
163 | } // data point j |
77f42a25 |
164 | // (3) Iterations |
165 | Int_t nit = 0; |
5145d1b5 |
166 | Bool_t rmovalStep = kFALSE; |
167 | |
77f42a25 |
168 | while(1) { |
169 | nit++; |
170 | // |
171 | // Assignment step |
172 | // |
173 | for (j = 0; j < n; j++) { |
174 | nr[j] = 0.; |
175 | for (i = 0; i < k; i++) { |
5b3f00b2 |
176 | r[j][i] = pi[i] * TMath::Exp(- d(mx[i], my[i], x[j], y[j]) / sigma2[i] ) |
177 | / (2. * sigma2[i] * TMath::Pi() * TMath::Pi()); |
77f42a25 |
178 | nr[j] += r[j][i]; |
179 | } // mean i |
180 | } // data point j |
181 | |
182 | for (i = 0; i < k; i++) { |
183 | for (j = 0; j < n; j++) { |
184 | r[j][i] /= nr[j]; |
185 | } // mean i |
186 | } // data point j |
187 | |
188 | // |
189 | // Update step |
190 | Double_t di = 0; |
191 | |
192 | for (i = 0; i < k; i++) { |
193 | Double_t oldx = mx[i]; |
194 | Double_t oldy = my[i]; |
195 | |
196 | mx[i] = x[0]; |
197 | my[i] = y[0]; |
198 | rk[i] = r[0][i]; |
77f42a25 |
199 | for (j = 1; j < n; j++) { |
200 | Double_t xx = x[j]; |
201 | // |
202 | // Here we have to take into acount the cylinder topology where phi is defined mod 2xpi |
203 | // If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi |
204 | |
205 | Double_t dx = mx[i] - x[j]; |
206 | if (dx > TMath::Pi()) xx += 2. * TMath::Pi(); |
207 | if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi(); |
987ff6ef |
208 | if (r[j][i] > 1.e-15) { |
209 | mx[i] = mx[i] * rk[i] + r[j][i] * xx; |
210 | my[i] = my[i] * rk[i] + r[j][i] * y[j]; |
211 | rk[i] += r[j][i]; |
212 | mx[i] /= rk[i]; |
213 | my[i] /= rk[i]; |
214 | } |
77f42a25 |
215 | if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi(); |
216 | if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi(); |
217 | } // Data |
218 | di += d(mx[i], my[i], oldx, oldy); |
f2b222ea |
219 | |
77f42a25 |
220 | } // means |
221 | // |
222 | // Sigma |
223 | for (i = 0; i < k; i++) { |
224 | sigma2[i] = 0.; |
225 | for (j = 1; j < n; j++) { |
5b3f00b2 |
226 | sigma2[i] += r[j][i] * d(mx[i], my[i], x[j], y[j]); |
77f42a25 |
227 | } // Data |
228 | sigma2[i] /= rk[i]; |
b7aa0494 |
229 | if (sigma2[i] < 0.0025) sigma2[i] = 0.0025; |
77f42a25 |
230 | } // Clusters |
231 | // |
232 | // Fractions |
233 | for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n); |
234 | // |
235 | // ending condition |
236 | if (di < 1.e-8 || nit > 1000) break; |
237 | } // while |
238 | |
239 | // Clean-up |
240 | delete[] nr; |
241 | delete[] pi; |
242 | for (j = 0; j < n; j++) delete[] r[j]; |
243 | delete[] r; |
b7aa0494 |
244 | // |
245 | return (nit < 1000); |
77f42a25 |
246 | } |
247 | |
d31bcb3b |
248 | Int_t AliKMeansClustering::SoftKMeans3(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , |
249 | Double_t* sigmax2, Double_t* sigmay2, Double_t* rk ) |
250 | { |
251 | // |
252 | // The soft K-means algorithm |
253 | // |
254 | Int_t i,j; |
255 | // |
256 | // (1) Initialisation of the k means using k-means++ recipe |
257 | // |
258 | OptimalInit(k, n, x, y, mx, my); |
259 | // |
260 | // (2a) The responsibilities |
261 | Double_t** r = new Double_t*[n]; // responsibilities |
262 | for (j = 0; j < n; j++) {r[j] = new Double_t[k];} |
263 | // |
264 | // (2b) Normalisation |
265 | Double_t* nr = new Double_t[n]; |
266 | // |
267 | // (2c) Weights |
268 | Double_t* pi = new Double_t[k]; |
269 | // |
270 | // |
271 | // (2d) Initialise the responsibilties and weights |
272 | for (j = 0; j < n; j++) { |
273 | nr[j] = 0.; |
274 | for (i = 0; i < k; i++) { |
275 | |
276 | r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])); |
277 | nr[j] += r[j][i]; |
278 | } // mean i |
279 | } // data point j |
280 | |
281 | for (i = 0; i < k; i++) { |
282 | rk[i] = 0.; |
283 | sigmax2[i] = 1./fBeta; |
284 | sigmay2[i] = 1./fBeta; |
285 | |
286 | for (j = 0; j < n; j++) { |
287 | r[j][i] /= nr[j]; |
288 | rk[i] += r[j][i]; |
289 | } // mean i |
290 | pi[i] = rk[i] / Double_t(n); |
291 | } // data point j |
292 | // (3) Iterations |
293 | Int_t nit = 0; |
294 | Bool_t rmovalStep = kFALSE; |
295 | |
296 | while(1) { |
297 | nit++; |
298 | // |
299 | // Assignment step |
300 | // |
301 | for (j = 0; j < n; j++) { |
302 | nr[j] = 0.; |
303 | for (i = 0; i < k; i++) { |
304 | |
305 | Double_t dx = TMath::Abs(mx[i]-x[j]); |
306 | if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx; |
307 | Double_t dy = TMath::Abs(my[i]-y[j]); |
308 | r[j][i] = pi[i] * TMath::Exp(-0.5 * (dx * dx / sigmax2[i] + dy * dy / sigmay2[i])) |
309 | / (2. * TMath::Sqrt(sigmax2[i] * sigmay2[i]) * TMath::Pi() * TMath::Pi()); |
310 | nr[j] += r[j][i]; |
311 | } // mean i |
312 | } // data point j |
313 | |
314 | for (i = 0; i < k; i++) { |
315 | for (j = 0; j < n; j++) { |
316 | r[j][i] /= nr[j]; |
317 | } // mean i |
318 | } // data point j |
319 | |
320 | // |
321 | // Update step |
322 | Double_t di = 0; |
323 | |
324 | for (i = 0; i < k; i++) { |
325 | Double_t oldx = mx[i]; |
326 | Double_t oldy = my[i]; |
327 | |
328 | mx[i] = x[0]; |
329 | my[i] = y[0]; |
330 | rk[i] = r[0][i]; |
331 | for (j = 1; j < n; j++) { |
332 | Double_t xx = x[j]; |
333 | // |
334 | // Here we have to take into acount the cylinder topology where phi is defined mod 2xpi |
335 | // If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi |
336 | |
337 | Double_t dx = mx[i] - x[j]; |
338 | if (dx > TMath::Pi()) xx += 2. * TMath::Pi(); |
339 | if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi(); |
340 | if (r[j][i] > 1.e-15) { |
341 | mx[i] = mx[i] * rk[i] + r[j][i] * xx; |
342 | my[i] = my[i] * rk[i] + r[j][i] * y[j]; |
343 | rk[i] += r[j][i]; |
344 | mx[i] /= rk[i]; |
345 | my[i] /= rk[i]; |
346 | } |
347 | if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi(); |
348 | if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi(); |
349 | } // Data |
350 | di += d(mx[i], my[i], oldx, oldy); |
351 | |
352 | } // means |
353 | // |
354 | // Sigma |
355 | for (i = 0; i < k; i++) { |
356 | sigmax2[i] = 0.; |
357 | sigmay2[i] = 0.; |
358 | |
359 | for (j = 1; j < n; j++) { |
360 | Double_t dx = TMath::Abs(mx[i]-x[j]); |
361 | if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx; |
362 | Double_t dy = TMath::Abs(my[i]-y[j]); |
363 | sigmax2[i] += r[j][i] * dx * dx; |
364 | sigmay2[i] += r[j][i] * dy * dy; |
365 | } // Data |
366 | sigmax2[i] /= rk[i]; |
367 | sigmay2[i] /= rk[i]; |
368 | if (sigmax2[i] < 0.0025) sigmax2[i] = 0.0025; |
369 | if (sigmay2[i] < 0.0025) sigmay2[i] = 0.0025; |
370 | } // Clusters |
371 | // |
372 | // Fractions |
373 | for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n); |
374 | // |
375 | // ending condition |
376 | if (di < 1.e-8 || nit > 1000) break; |
377 | } // while |
378 | |
379 | // Clean-up |
380 | delete[] nr; |
381 | delete[] pi; |
382 | for (j = 0; j < n; j++) delete[] r[j]; |
383 | delete[] r; |
384 | // |
385 | return (nit < 1000); |
386 | } |
387 | |
70f2ce9d |
388 | Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y) |
389 | { |
390 | // |
391 | // Distance definition |
392 | // Quasi - Euclidian on the eta-phi cylinder |
393 | |
394 | Double_t dx = TMath::Abs(mx-x); |
395 | if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx; |
396 | |
8422e0a8 |
397 | return (0.5*(dx * dx + (my - y) * (my - y))); |
70f2ce9d |
398 | } |
5145d1b5 |
399 | |
400 | |
401 | |
402 | void AliKMeansClustering::OptimalInit(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my) |
403 | { |
404 | // |
405 | // Optimal initialisation using the k-means++ algorithm |
406 | // http://en.wikipedia.org/wiki/K-means%2B%2B |
407 | // |
408 | // k-means++ is an algorithm for choosing the initial values for k-means clustering in statistics and machine learning. |
409 | // It was proposed in 2007 by David Arthur and Sergei Vassilvitskii as an approximation algorithm for the NP-hard k-means problem--- |
410 | // a way of avoiding the sometimes poor clusterings found by the standard k-means algorithm. |
411 | // |
412 | // |
413 | TH1F d2("d2", "", n, -0.5, Float_t(n)-0.5); |
414 | d2.Reset(); |
415 | |
416 | // (1) Chose first center as a random point among the input data. |
417 | Int_t ir = Int_t(Float_t(n) * gRandom->Rndm()); |
418 | mx[0] = x[ir]; |
419 | my[0] = y[ir]; |
420 | |
421 | // (2) Iterate |
422 | Int_t icl = 1; |
423 | while(icl < k) |
424 | { |
425 | // find min distance to existing clusters |
426 | for (Int_t j = 0; j < n; j++) { |
427 | Double_t dmin = 1.e10; |
428 | for (Int_t i = 0; i < icl; i++) { |
429 | Double_t dij = d(mx[i], my[i], x[j], y[j]); |
430 | if (dij < dmin) dmin = dij; |
431 | } // clusters |
432 | d2.Fill(Float_t(j), dmin); |
433 | } // data points |
434 | // select a new cluster from data points with probability ~d2 |
987ff6ef |
435 | ir = Int_t(d2.GetRandom() + 0.5); |
5145d1b5 |
436 | mx[icl] = x[ir]; |
437 | my[icl] = y[ir]; |
438 | icl++; |
439 | } // icl |
440 | } |
441 | |
442 | |
19c5a36a |
443 | ClassImp(AliKMeansResult) |
444 | |
445 | |
446 | |
447 | AliKMeansResult::AliKMeansResult(Int_t k): |
448 | TObject(), |
449 | fK(k), |
450 | fMx (new Double_t[k]), |
451 | fMy (new Double_t[k]), |
452 | fSigma2(new Double_t[k]), |
453 | fRk (new Double_t[k]), |
454 | fTarget(new Double_t[k]), |
455 | fInd (new Int_t[k]) |
fe366828 |
456 | { |
19c5a36a |
457 | // Constructor |
458 | } |
459 | |
62c971ea |
460 | AliKMeansResult::AliKMeansResult(const AliKMeansResult &res): |
461 | TObject(res), |
462 | fK(res.GetK()), |
463 | fMx(0), |
464 | fMy(0), |
465 | fSigma2(0), |
466 | fRk(0), |
467 | fTarget(0), |
468 | fInd(0) |
469 | { |
470 | // Copy constructor |
471 | for (Int_t i = 0; i <fK; i++) { |
472 | fMx[i] = (res.GetMx()) [i]; |
473 | fMy[i] = (res.GetMy()) [i]; |
474 | fSigma2[i] = (res.GetSigma2())[i]; |
475 | fRk[i] = (res.GetRk()) [i]; |
476 | fTarget[i] = (res.GetTarget())[i]; |
477 | fInd[i] = (res.GetInd()) [i]; |
478 | } |
479 | } |
480 | |
481 | AliKMeansResult& AliKMeansResult::operator=(const AliKMeansResult& res) |
482 | { |
483 | // |
484 | // Assignment operator |
485 | if (this != &res) { |
486 | fK = res.GetK(); |
487 | for (Int_t i = 0; i <fK; i++) { |
488 | fMx[i] = (res.GetMx()) [i]; |
489 | fMy[i] = (res.GetMy()) [i]; |
490 | fSigma2[i] = (res.GetSigma2())[i]; |
491 | fRk[i] = (res.GetRk()) [i]; |
492 | fTarget[i] = (res.GetTarget())[i]; |
493 | fInd[i] = (res.GetInd()) [i]; |
494 | } |
495 | return *this; |
496 | } |
497 | } |
498 | |
499 | |
19c5a36a |
500 | AliKMeansResult::~AliKMeansResult() |
501 | { |
502 | // Destructor |
503 | delete[] fMx; |
504 | delete[] fMy; |
505 | delete[] fSigma2; |
506 | delete[] fRk; |
507 | delete[] fInd; |
508 | delete[] fTarget; |
509 | } |
510 | |
511 | void AliKMeansResult::Sort() |
512 | { |
513 | // Sort clusters |
514 | for (Int_t i = 0; i < fK; i++) { |
515 | if (fRk[i] > 1.) fRk[i] /= fSigma2[i]; |
516 | else fRk[i] = 0.; |
517 | } |
518 | |
519 | TMath::Sort(fK, fRk, fInd); |
520 | |
fe366828 |
521 | } |
522 | |
19c5a36a |
523 | void AliKMeansResult::Sort(Int_t n, Double_t* x, Double_t* y) |
524 | { |
525 | // Build target array and sort |
526 | for (Int_t i = 0; i < fK; i++) |
527 | { |
528 | Int_t nc = 0; |
529 | for (Int_t j = 0; j < n; j++) |
530 | { |
531 | if (2. * AliKMeansClustering::d(fMx[i], fMy[i], x[j], y[j]) < fSigma2[i]) nc++; |
532 | } |
533 | if (nc > 1) { |
534 | fTarget[i] = Double_t(nc) / fSigma2[i]; |
535 | } else { |
536 | fTarget[i] = 0.; |
537 | } |
538 | } |
fe366828 |
539 | |
19c5a36a |
540 | TMath::Sort(fK, fTarget, fInd); |
541 | } |
62c971ea |
542 | |
543 | void AliKMeansResult::CopyResults(AliKMeansResult* res) |
544 | { |
545 | fK = res->GetK(); |
546 | for (Int_t i = 0; i <fK; i++) { |
547 | fMx[i] = (res->GetMx()) [i]; |
548 | fMy[i] = (res->GetMy()) [i]; |
549 | fSigma2[i] = (res->GetSigma2())[i]; |
550 | fRk[i] = (res->GetRk()) [i]; |
551 | fTarget[i] = (res->GetTarget())[i]; |
552 | fInd[i] = (res->GetInd()) [i]; |
553 | } |
554 | } |