]>
Commit | Line | Data |
---|---|---|
70f2ce9d | 1 | /************************************************************************** |
2 | * Copyright(c) 1998-1999, ALICE Experiment at CERN, All rights reserved. * | |
3 | * * | |
4 | * Author: The ALICE Off-line Project. * | |
5 | * Contributors are mentioned in the code where appropriate. * | |
6 | * * | |
7 | * Permission to use, copy, modify and distribute this software and its * | |
8 | * documentation strictly for non-commercial purposes is hereby granted * | |
9 | * without fee, provided that the above copyright notice appears in all * | |
10 | * copies and that both the copyright notice and this permission notice * | |
11 | * appear in the supporting documentation. The authors make no claims * | |
12 | * about the suitability of this software for any purpose. It is * | |
13 | * provided "as is" without express or implied warranty. * | |
14 | **************************************************************************/ | |
15 | ||
16 | // Implemenatation of the K-Means Clustering Algorithm | |
17 | // See http://en.wikipedia.org/wiki/K-means_clustering and references therein. | |
18 | // | |
19 | // This particular implementation is the so called Soft K-means algorithm. | |
20 | // It has been modified to work on the cylindrical topology in eta-phi space. | |
21 | // | |
22 | // Author: Andreas Morsch (CERN) | |
23 | // andreas.morsch@cern.ch | |
24 | ||
25 | #include "AliKMeansClustering.h" | |
26 | #include <TMath.h> | |
27 | #include <TRandom.h> | |
28 | ||
29 | ClassImp(AliKMeansClustering) | |
30 | ||
31 | Double_t AliKMeansClustering::fBeta = 10.; | |
32 | ||
33 | void AliKMeansClustering::SoftKMeans(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* rk ) | |
34 | { | |
35 | // | |
36 | // The soft K-means algorithm | |
37 | // | |
38 | Int_t i,j; | |
39 | // | |
40 | // (1) Initialisation of the k means | |
41 | ||
42 | for (i = 0; i < k; i++) { | |
43 | mx[i] = 2. * TMath::Pi() * gRandom->Rndm(); | |
44 | my[i] = -1. + 2. * gRandom->Rndm(); | |
45 | } | |
46 | ||
47 | // | |
48 | // (2a) The responsibilities | |
49 | Double_t** r = new Double_t*[n]; // responsibilities | |
50 | for (j = 0; j < n; j++) {r[j] = new Double_t[k];} | |
51 | // | |
52 | // (2b) Normalisation | |
53 | Double_t* nr = new Double_t[n]; | |
54 | ||
55 | // (3) Iterations | |
56 | Int_t nit = 0; | |
57 | ||
58 | while(1) { | |
59 | nit++; | |
60 | // | |
61 | // Assignment step | |
62 | // | |
63 | for (j = 0; j < n; j++) { | |
64 | nr[j] = 0.; | |
65 | for (i = 0; i < k; i++) { | |
66 | r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])); | |
67 | nr[j] += r[j][i]; | |
68 | } // mean i | |
69 | } // data point j | |
70 | ||
71 | for (j = 0; j < n; j++) { | |
72 | for (i = 0; i < k; i++) { | |
73 | r[j][i] /= nr[j]; | |
74 | } // mean i | |
75 | } // data point j | |
76 | ||
77 | // | |
78 | // Update step | |
79 | Double_t di = 0; | |
80 | ||
81 | for (i = 0; i < k; i++) { | |
82 | Double_t oldx = mx[i]; | |
83 | Double_t oldy = my[i]; | |
84 | ||
85 | mx[i] = x[0]; | |
86 | my[i] = y[0]; | |
87 | rk[i] = r[0][i]; | |
88 | ||
89 | for (j = 1; j < n; j++) { | |
90 | Double_t xx = x[j]; | |
91 | // | |
92 | // Here we have to take into acount the cylinder topology where phi is defined mod 2xpi | |
93 | // If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi | |
94 | ||
95 | Double_t dx = mx[i] - x[j]; | |
96 | if (dx > TMath::Pi()) xx += 2. * TMath::Pi(); | |
97 | if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi(); | |
98 | mx[i] = mx[i] * rk[i] + r[j][i] * xx; | |
99 | my[i] = my[i] * rk[i] + r[j][i] * y[j]; | |
100 | rk[i] += r[j][i]; | |
101 | mx[i] /= rk[i]; | |
102 | my[i] /= rk[i]; | |
103 | if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi(); | |
104 | if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi(); | |
105 | } // Data | |
106 | di += d(mx[i], my[i], oldx, oldy); | |
107 | } // means | |
108 | // | |
109 | // ending condition | |
110 | if (di < 1.e-8 || nit > 1000) break; | |
111 | } // while | |
112 | ||
113 | // Clean-up | |
114 | delete[] nr; | |
115 | for (j = 0; j < n; j++) delete[] r[j]; | |
116 | delete[] r; | |
117 | } | |
118 | ||
77f42a25 | 119 | void AliKMeansClustering::SoftKMeans2(Int_t k, Int_t n, Double_t* x, Double_t* y, Double_t* mx, Double_t* my , Double_t* sigma2, Double_t* rk ) |
120 | { | |
121 | // | |
122 | // The soft K-means algorithm | |
123 | // | |
124 | Int_t i,j; | |
125 | // | |
126 | // (1) Initialisation of the k means | |
127 | // | |
128 | // (there is an optimized version for initialisation called kmeans++) | |
129 | for (i = 0; i < k; i++) { | |
130 | mx[i] = 2. * TMath::Pi() * gRandom->Rndm(); | |
131 | my[i] = -1. + 2. * gRandom->Rndm(); | |
132 | } | |
133 | ||
134 | // | |
135 | // (2a) The responsibilities | |
136 | Double_t** r = new Double_t*[n]; // responsibilities | |
137 | for (j = 0; j < n; j++) {r[j] = new Double_t[k];} | |
138 | // | |
139 | // (2b) Normalisation | |
140 | Double_t* nr = new Double_t[n]; | |
141 | // | |
142 | // (2c) Weights | |
143 | Double_t* pi = new Double_t[k]; | |
144 | // | |
145 | // (2d) Initialise the responsibilties and weights | |
146 | for (j = 0; j < n; j++) { | |
147 | nr[j] = 0.; | |
148 | for (i = 0; i < k; i++) { | |
149 | r[j][i] = TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])); | |
150 | nr[j] += r[j][i]; | |
151 | } // mean i | |
152 | } // data point j | |
153 | ||
154 | for (i = 0; i < k; i++) { | |
155 | rk[i] = 0.; | |
156 | sigma2[i] = 1./fBeta; | |
157 | ||
158 | for (j = 0; j < n; j++) { | |
159 | r[j][i] /= nr[j]; | |
160 | rk[i] += r[j][i]; | |
161 | } // mean i | |
162 | pi[i] = rk[i] / Double_t(n); | |
163 | } // data point j | |
164 | ||
165 | ||
166 | // (3) Iterations | |
167 | Int_t nit = 0; | |
168 | ||
169 | while(1) { | |
170 | nit++; | |
171 | // | |
172 | // Assignment step | |
173 | // | |
174 | for (j = 0; j < n; j++) { | |
175 | nr[j] = 0.; | |
176 | for (i = 0; i < k; i++) { | |
177 | r[j][i] = pi[i] * TMath::Exp(- fBeta * d(mx[i], my[i], x[j], y[j])) | |
178 | / (TMath::Sqrt(2. * sigma2[i]) * TMath::Pi()); | |
179 | nr[j] += r[j][i]; | |
180 | } // mean i | |
181 | } // data point j | |
182 | ||
183 | for (i = 0; i < k; i++) { | |
184 | for (j = 0; j < n; j++) { | |
185 | r[j][i] /= nr[j]; | |
186 | } // mean i | |
187 | } // data point j | |
188 | ||
189 | // | |
190 | // Update step | |
191 | Double_t di = 0; | |
192 | ||
193 | for (i = 0; i < k; i++) { | |
194 | Double_t oldx = mx[i]; | |
195 | Double_t oldy = my[i]; | |
196 | ||
197 | mx[i] = x[0]; | |
198 | my[i] = y[0]; | |
199 | rk[i] = r[0][i]; | |
200 | ||
201 | for (j = 1; j < n; j++) { | |
202 | Double_t xx = x[j]; | |
203 | // | |
204 | // Here we have to take into acount the cylinder topology where phi is defined mod 2xpi | |
205 | // If two coordinates are separated by more than pi in phi one has to be shifted by +/- 2 pi | |
206 | ||
207 | Double_t dx = mx[i] - x[j]; | |
208 | if (dx > TMath::Pi()) xx += 2. * TMath::Pi(); | |
209 | if (dx < -TMath::Pi()) xx -= 2. * TMath::Pi(); | |
210 | mx[i] = mx[i] * rk[i] + r[j][i] * xx; | |
211 | my[i] = my[i] * rk[i] + r[j][i] * y[j]; | |
212 | rk[i] += r[j][i]; | |
213 | mx[i] /= rk[i]; | |
214 | my[i] /= rk[i]; | |
215 | if (mx[i] > 2. * TMath::Pi()) mx[i] -= 2. * TMath::Pi(); | |
216 | if (mx[i] < 0. ) mx[i] += 2. * TMath::Pi(); | |
217 | } // Data | |
218 | di += d(mx[i], my[i], oldx, oldy); | |
219 | } // means | |
220 | // | |
221 | // Sigma | |
222 | for (i = 0; i < k; i++) { | |
223 | sigma2[i] = 0.; | |
224 | for (j = 1; j < n; j++) { | |
225 | sigma2[i] += 2. * r[j][i] * d(mx[i], my[i], x[j], y[j]); | |
226 | } // Data | |
227 | sigma2[i] /= rk[i]; | |
228 | } // Clusters | |
229 | // | |
230 | // Fractions | |
231 | for (i = 0; i < k; i++) pi[i] = rk[i] / Double_t(n); | |
232 | // | |
233 | // ending condition | |
234 | if (di < 1.e-8 || nit > 1000) break; | |
235 | } // while | |
236 | ||
237 | // Clean-up | |
238 | delete[] nr; | |
239 | delete[] pi; | |
240 | for (j = 0; j < n; j++) delete[] r[j]; | |
241 | delete[] r; | |
242 | } | |
243 | ||
70f2ce9d | 244 | Double_t AliKMeansClustering::d(Double_t mx, Double_t my, Double_t x, Double_t y) |
245 | { | |
246 | // | |
247 | // Distance definition | |
248 | // Quasi - Euclidian on the eta-phi cylinder | |
249 | ||
250 | Double_t dx = TMath::Abs(mx-x); | |
251 | if (dx > TMath::Pi()) dx = 2. * TMath::Pi() - dx; | |
252 | ||
8422e0a8 | 253 | return (0.5*(dx * dx + (my - y) * (my - y))); |
70f2ce9d | 254 | } |