]> git.uio.no Git - u/mrichter/AliRoot.git/blame - Vc/src/cpuid.cpp
- support compilation with GCC 4.1 and 4.2
[u/mrichter/AliRoot.git] / Vc / src / cpuid.cpp
CommitLineData
f22341db 1/* This file is part of the Vc library.
2
3 Copyright (C) 2011-2012 Matthias Kretz <kretz@kde.org>
4
5 Vc is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as
7 published by the Free Software Foundation, either version 3 of
8 the License, or (at your option) any later version.
9
10 Vc is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with Vc. If not, see <http://www.gnu.org/licenses/>.
17
18*/
19
20#include <Vc/cpuid.h>
21
22namespace Vc
23{
24CpuId::uint CpuId::s_ecx0 = 0;
25CpuId::uint CpuId::s_logicalProcessors = 0;
26CpuId::uint CpuId::s_processorFeaturesC = 0;
27CpuId::uint CpuId::s_processorFeaturesD = 0;
28CpuId::uint CpuId::s_processorFeatures8C = 0;
29CpuId::uint CpuId::s_processorFeatures8D = 0;
30CpuId::uint CpuId::s_L1Instruction = 0;
31CpuId::uint CpuId::s_L1Data = 0;
32CpuId::uint CpuId::s_L2Data = 0;
33CpuId::uint CpuId::s_L3Data = 0;
34CpuId::ushort CpuId::s_L1InstructionLineSize = 0;
35CpuId::ushort CpuId::s_L1DataLineSize = 0;
36CpuId::ushort CpuId::s_L2DataLineSize = 0;
37CpuId::ushort CpuId::s_L3DataLineSize = 0;
38CpuId::uint CpuId::s_L1Associativity = 0;
39CpuId::uint CpuId::s_L2Associativity = 0;
40CpuId::uint CpuId::s_L3Associativity = 0;
41CpuId::ushort CpuId::s_prefetch = 32; // The Intel ORM says that if CPUID(2) doesn't set the prefetch size it is 32
42CpuId::uchar CpuId::s_brandIndex = 0;
43CpuId::uchar CpuId::s_cacheLineSize = 0;
44CpuId::uchar CpuId::s_processorModel = 0;
45CpuId::uchar CpuId::s_processorFamily = 0;
46CpuId::ProcessorType CpuId::s_processorType = CpuId::IntelReserved;
47bool CpuId::s_noL2orL3 = false;
48
49#ifdef _MSC_VER
6936ae04 50} // better not include intrin.h inside the Vc namespace :)
f22341db 51#include <intrin.h>
6936ae04 52namespace Vc {
f22341db 53#define CPUID(leaf) \
54 do { \
55 int out[4]; \
56 __cpuid(out, leaf); \
57 eax = out[0]; \
58 ebx = out[1]; \
59 ecx = out[2]; \
60 edx = out[3]; \
61 } while (false)
62#define CPUID_C(leaf, _ecx_) \
63 do { \
64 int out[4]; \
65 __cpuidex(out, leaf, _ecx_); \
66 eax = out[0]; \
67 ebx = out[1]; \
68 ecx = out[2]; \
69 edx = out[3]; \
70 } while (false)
68ac0973 71#elif defined(__i386__) && defined(__PIC__)
72// %ebx may be the PIC register.
a0868a49 73static inline void _Vc_cpuid(int leaf, unsigned int &eax, unsigned int &ebx, unsigned int &ecx, unsigned int &edx)
74{
75 int tmpb;
76 asm("mov %%ebx, %[tmpb]\n\t"
77 "cpuid\n\t"
78 "mov %%ebx, %[ebx]\n\t"
79 "mov %[tmpb], %%ebx\n\t"
80 : [tmpb]"=m"(tmpb), "=a"(eax), [ebx] "=m"(ebx), "+c"(ecx), "=d"(edx)
81 : [leaf] "a"(leaf)
82 );
83}
68ac0973 84#define CPUID(leaf) \
a0868a49 85 ecx = 0; \
86 _Vc_cpuid(leaf, eax, ebx, ecx, edx)
68ac0973 87#define CPUID_C(leaf, _ecx_) \
a0868a49 88 ecx = _ecx_; \
89 _Vc_cpuid(leaf, eax, ebx, ecx, edx)
f22341db 90#else
91#define CPUID(leaf) \
68ac0973 92 __asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(leaf))
f22341db 93#define CPUID_C(leaf, _ecx_) \
68ac0973 94 __asm__("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "a"(leaf), "c"(_ecx_))
f22341db 95#endif
96static unsigned int CpuIdAmdAssociativityTable(int bits)
97{
98 switch (bits) {
99 case 0x0: return 0;
100 case 0x1: return 1;
101 case 0x2: return 2;
102 case 0x4: return 4;
103 case 0x6: return 8;
104 case 0x8: return 16;
105 case 0xA: return 32;
106 case 0xB: return 48;
107 case 0xC: return 64;
108 case 0xD: return 96;
109 case 0xE: return 128;
110 case 0xF: return 0xff;
111 }
112 return 0xffffffffu;
113}
114
115void CpuId::init()
116{
117 {
118 static bool done = false;
119 if (done) return;
120 done = true;
121 }
122 uint eax, ebx, ecx, edx;
123
124 CPUID(0);
125 s_ecx0 = ecx;
126
127 CPUID(1);
128 s_processorFeaturesC = ecx;
129 s_processorFeaturesD = edx;
130 s_processorModel = (eax & 0x000000f0) >> 4;
131 s_processorFamily = (eax & 0x00000f00) >> 8;
132 if (isAmd()) {
133 if (s_processorFamily >= 0xf) {
134 const uchar processorFamilyExt = (eax & 0x0ff00000) >> 20;
135 s_processorFamily += processorFamilyExt;
136 const uchar processorModelExt = (eax & 0x000f0000) >> 12;
137 s_processorModel += processorModelExt;
138 }
139 } else if (s_processorFamily == 0xf) {
140 const uchar processorFamilyExt = (eax & 0x0ff00000) >> 20;
141 s_processorFamily += processorFamilyExt;
142 const uchar processorModelExt = (eax & 0x000f0000) >> 12;
143 s_processorModel += processorModelExt;
144 } else if (s_processorFamily == 0x6) {
145 const uchar processorModelExt = (eax & 0x000f0000) >> 12;
146 s_processorModel += processorModelExt;
147 }
148 s_processorType = static_cast<ProcessorType>((eax & 0x00003000) >> 12);
149
150 s_brandIndex = ebx & 0xff;
151 ebx >>= 8;
152 s_cacheLineSize = ebx & 0xff;
153 ebx >>= 8;
154 s_logicalProcessors = ebx & 0xff;
155
156 CPUID(0x80000001);
157 s_processorFeatures8C = ecx;
158 s_processorFeatures8D = edx;
159
160 if (isAmd()) {
161 s_prefetch = cacheLineSize();
162
163 CPUID(0x80000005);
164 s_L1DataLineSize = ecx & 0xff;
165 s_L1Data = (ecx >> 24) * 1024;
166 s_L1Associativity = (ecx >> 16) & 0xff;
167 s_L1InstructionLineSize = edx & 0xff;
168 s_L1Instruction = (edx >> 24) * 1024;
169
170 CPUID(0x80000006);
171 s_L2DataLineSize = ecx & 0xff;
172 s_L2Data = (ecx >> 16) * 1024;
173 s_L2Associativity = CpuIdAmdAssociativityTable((ecx >> 12) & 0xf);
174 s_L3DataLineSize = edx & 0xff;
175 s_L3Data = (edx >> 18) * 512 * 1024;
176 s_L3Associativity = CpuIdAmdAssociativityTable((ecx >> 12) & 0xf);
177 return;
178 }
179
180 // Intel only
181 int repeat = 0;
182 bool checkLeaf4 = false;
183 do {
184 CPUID(2);
185 if (repeat == 0) {
186 repeat = eax & 0xff;
187 }
188 if (0 == (0x80000000u & eax)) {
189 for (int i = 0; i < 3; ++i) {
190 eax >>= 8;
191 interpret(eax & 0xff, &checkLeaf4);
192 }
193 }
194 if (0 == (0x80000000u & ebx)) {
195 for (int i = 0; i < 4; ++i) {
196 interpret(ebx & 0xff, &checkLeaf4);
197 ebx >>= 8;
198 }
199 }
200 if (0 == (0x80000000u & ecx)) {
201 for (int i = 0; i < 4; ++i) {
202 interpret(ecx & 0xff, &checkLeaf4);
203 ecx >>= 8;
204 }
205 }
206 if (0 == (0x80000000u & edx)) {
207 for (int i = 0; i < 4; ++i) {
208 interpret(edx & 0xff, &checkLeaf4);
209 edx >>= 8;
210 }
211 }
212 } while (--repeat > 0);
213 if (checkLeaf4) {
214 s_prefetch = cacheLineSize();
215 if (s_prefetch == 0) {
216 s_prefetch = 64;
217 }
218 eax = 1;
219 for (int i = 0; eax & 0x1f; ++i) {
220 CPUID_C(4, i);
221 const int cacheLevel = (eax >> 5) & 7;
222 //const int sharedBy = 1 + ((eax >> 14) & 0xfff);
223 const int linesize = 1 + (ebx & 0xfff); ebx >>= 12;
224 const int partitions = 1 + (ebx & 0x3ff); ebx >>= 10;
225 const int ways = 1 + (ebx & 0x3ff);
226 const int sets = 1 + ecx;
227 const int size = ways * partitions * linesize * sets;
228 switch (eax & 0x1f) {
229 case 1: // data cache
230 switch (cacheLevel) {
231 case 1:
232 s_L1Data = size;
233 s_L1DataLineSize = linesize;
234 s_L1Associativity = ways;
235 break;
236 case 2:
237 s_L2Data = size;
238 s_L2DataLineSize = linesize;
239 s_L2Associativity = ways;
240 break;
241 case 3:
242 s_L3Data = size;
243 s_L3DataLineSize = linesize;
244 s_L3Associativity = ways;
245 break;
246 }
247 break;
248 case 2: // instruction cache
249 switch (cacheLevel) {
250 case 1:
251 s_L1Instruction = size;
252 s_L1InstructionLineSize = linesize;
253 break;
254 }
255 break;
256 case 3: // unified cache
257 switch (cacheLevel) {
258 case 1:
259 s_L1Data = size;// / sharedBy;
260 s_L1DataLineSize = linesize;
261 s_L1Associativity = ways;
262 break;
263 case 2:
264 s_L2Data = size;// / sharedBy;
265 s_L2DataLineSize = linesize;
266 s_L2Associativity = ways;
267 break;
268 case 3:
269 s_L3Data = size;// / sharedBy;
270 s_L3DataLineSize = linesize;
271 s_L3Associativity = ways;
272 break;
273 }
274 break;
275 case 0: // no more caches
276 break;
277 default: // reserved
278 break;
279 }
280 }
281 }
282}
283
284void CpuId::interpret(uchar byte, bool *checkLeaf4)
285{
286 switch (byte) {
287 case 0x06:
288 s_L1Instruction = 8 * 1024;
289 s_L1InstructionLineSize = 32;
290 s_L1Associativity = 4;
291 break;
292 case 0x08:
293 s_L1Instruction = 16 * 1024;
294 s_L1InstructionLineSize = 32;
295 s_L1Associativity = 4;
296 break;
297 case 0x09:
298 s_L1Instruction = 32 * 1024;
299 s_L1InstructionLineSize = 64;
300 s_L1Associativity = 4;
301 break;
302 case 0x0A:
303 s_L1Data = 8 * 1024;
304 s_L1DataLineSize = 32;
305 s_L1Associativity = 2;
306 break;
307 case 0x0C:
308 s_L1Data = 16 * 1024;
309 s_L1DataLineSize = 32;
310 s_L1Associativity = 4;
311 break;
312 case 0x0D:
313 s_L1Data = 16 * 1024;
314 s_L1DataLineSize = 64;
315 s_L1Associativity = 4;
316 break;
317 case 0x0E:
318 s_L1Data = 24 * 1024;
319 s_L1DataLineSize = 64;
320 s_L1Associativity = 6;
321 break;
322 case 0x21:
323 s_L2Data = 256 * 1024;
324 s_L2DataLineSize = 64;
325 s_L2Associativity = 8;
326 break;
327 case 0x22:
328 s_L3Data = 512 * 1024;
329 s_L3DataLineSize = 64;
330 s_L3Associativity = 4;
331 break;
332 case 0x23:
333 s_L3Data = 1024 * 1024;
334 s_L3DataLineSize = 64;
335 s_L3Associativity = 8;
336 break;
337 case 0x25:
338 s_L3Data = 2 * 1024 * 1024;
339 s_L3DataLineSize = 64;
340 s_L3Associativity = 8;
341 break;
342 case 0x29:
343 s_L3Data = 4 * 1024 * 1024;
344 s_L3DataLineSize = 64;
345 s_L3Associativity = 8;
346 break;
347 case 0x2C:
348 s_L1Data = 32 * 1024;
349 s_L1DataLineSize = 64;
350 s_L1Associativity = 8;
351 break;
352 case 0x30:
353 s_L1Data = 32 * 1024;
354 s_L1DataLineSize = 64;
355 s_L1Associativity = 8;
356 break;
357 case 0x40:
358 s_noL2orL3 = true;
359 break;
360 case 0x41:
361 s_L2Data = 128 * 1024;
362 s_L2DataLineSize = 32;
363 s_L2Associativity = 4;
364 break;
365 case 0x42:
366 s_L2Data = 256 * 1024;
367 s_L2DataLineSize = 32;
368 s_L2Associativity = 4;
369 break;
370 case 0x43:
371 s_L2Data = 512 * 1024;
372 s_L2DataLineSize = 32;
373 s_L2Associativity = 4;
374 break;
375 case 0x44:
376 s_L2Data = 1024 * 1024;
377 s_L2DataLineSize = 32;
378 s_L2Associativity = 4;
379 break;
380 case 0x45:
381 s_L2Data = 2 * 1024 * 1024;
382 s_L2DataLineSize = 32;
383 s_L2Associativity = 4;
384 break;
385 case 0x46:
386 s_L3Data = 4 * 1024 * 1024;
387 s_L3DataLineSize = 64;
388 s_L3Associativity = 4;
389 break;
390 case 0x47:
391 s_L3Data = 8 * 1024 * 1024;
392 s_L3DataLineSize = 64;
393 s_L3Associativity = 8;
394 break;
395 case 0x48:
396 s_L2Data = 3 * 1024 * 1024;
397 s_L2DataLineSize = 64;
398 s_L2Associativity = 12;
399 break;
400 case 0x49:
401 if (s_processorFamily == 0xf && s_processorModel == 0x6) {
402 s_L3Data = 4 * 1024 * 1024;
403 s_L3DataLineSize = 64;
404 s_L3Associativity = 16;
405 } else {
406 s_L2Data = 4 * 1024 * 1024;
407 s_L2DataLineSize = 64;
408 s_L2Associativity = 16;
409 }
410 break;
411 case 0x4A:
412 s_L3Data = 6 * 1024 * 1024;
413 s_L3DataLineSize = 64;
414 s_L3Associativity = 12;
415 break;
416 case 0x4B:
417 s_L3Data = 8 * 1024 * 1024;
418 s_L3DataLineSize = 64;
419 s_L3Associativity = 16;
420 break;
421 case 0x4C:
422 s_L3Data = 12 * 1024 * 1024;
423 s_L3DataLineSize = 64;
424 s_L3Associativity = 12;
425 break;
426 case 0x4D:
427 s_L3Data = 16 * 1024 * 1024;
428 s_L3DataLineSize = 64;
429 s_L3Associativity = 16;
430 break;
431 case 0x4E:
432 s_L2Data = 6 * 1024 * 1024;
433 s_L2DataLineSize = 64;
434 s_L2Associativity = 24;
435 break;
436 case 0x60:
437 s_L1Data = 16 * 1024;
438 s_L1DataLineSize = 64;
439 s_L1Associativity = 8;
440 break;
441 case 0x66:
442 s_L1Data = 8 * 1024;
443 s_L1DataLineSize = 64;
444 s_L1Associativity = 4;
445 break;
446 case 0x67:
447 s_L1Data = 16 * 1024;
448 s_L1DataLineSize = 64;
449 s_L1Associativity = 4;
450 break;
451 case 0x68:
452 s_L1Data = 32 * 1024;
453 s_L1DataLineSize = 64;
454 s_L1Associativity = 4;
455 break;
456 case 0x78:
457 s_L2Data = 1024 * 1024;
458 s_L2DataLineSize = 64;
459 s_L2Associativity = 4;
460 break;
461 case 0x79:
462 s_L2Data = 128 * 1024;
463 s_L2DataLineSize = 64;
464 s_L2Associativity = 8;
465 break;
466 case 0x7A:
467 s_L2Data = 256 * 1024;
468 s_L2DataLineSize = 64;
469 s_L2Associativity = 8;
470 break;
471 case 0x7B:
472 s_L2Data = 512 * 1024;
473 s_L2DataLineSize = 64;
474 s_L2Associativity = 8;
475 break;
476 case 0x7C:
477 s_L2Data = 1024 * 1024;
478 s_L2DataLineSize = 64;
479 s_L2Associativity = 8;
480 break;
481 case 0x7D:
482 s_L2Data = 2 * 1024 * 1024;
483 s_L2DataLineSize = 64;
484 s_L2Associativity = 8;
485 break;
486 case 0x7F:
487 s_L2Data = 512 * 1024;
488 s_L2DataLineSize = 64;
489 s_L2Associativity = 2;
490 break;
491 case 0x80:
492 s_L2Data = 512 * 1024;
493 s_L2DataLineSize = 64;
494 s_L2Associativity = 8;
495 break;
496 case 0x82:
497 s_L2Data = 256 * 1024;
498 s_L2DataLineSize = 32;
499 s_L2Associativity = 8;
500 break;
501 case 0x83:
502 s_L2Data = 512 * 1024;
503 s_L2DataLineSize = 32;
504 s_L2Associativity = 8;
505 break;
506 case 0x84:
507 s_L2Data = 1024 * 1024;
508 s_L2DataLineSize = 32;
509 s_L2Associativity = 8;
510 break;
511 case 0x85:
512 s_L2Data = 2 * 1024 * 1024;
513 s_L2DataLineSize = 32;
514 s_L2Associativity = 8;
515 break;
516 case 0x86:
517 s_L2Data = 512 * 1024;
518 s_L2DataLineSize = 64;
519 s_L2Associativity = 4;
520 break;
521 case 0x87:
522 s_L2Data = 1024 * 1024;
523 s_L2DataLineSize = 64;
524 s_L2Associativity = 8;
525 break;
526 case 0xD0:
527 s_L3Data = 512 * 1024;
528 s_L3DataLineSize = 64;
529 s_L3Associativity = 4;
530 break;
531 case 0xD1:
532 s_L3Data = 1024 * 1024;
533 s_L3DataLineSize = 64;
534 s_L3Associativity = 4;
535 break;
536 case 0xD2:
537 s_L3Data = 2 * 1024 * 1024;
538 s_L3DataLineSize = 64;
539 s_L3Associativity = 4;
540 break;
541 case 0xD6:
542 s_L3Data = 1024 * 1024;
543 s_L3DataLineSize = 64;
544 s_L3Associativity = 8;
545 break;
546 case 0xD7:
547 s_L3Data = 2 * 1024 * 1024;
548 s_L3DataLineSize = 64;
549 s_L3Associativity = 8;
550 break;
551 case 0xD8:
552 s_L3Data = 4 * 1024 * 1024;
553 s_L3DataLineSize = 64;
554 s_L3Associativity = 8;
555 break;
556 case 0xDC:
557 s_L3Data = 3 * 512 * 1024;
558 s_L3DataLineSize = 64;
559 s_L3Associativity = 12;
560 break;
561 case 0xDD:
562 s_L3Data = 3 * 1024 * 1024;
563 s_L3DataLineSize = 64;
564 s_L3Associativity = 12;
565 break;
566 case 0xDE:
567 s_L3Data = 6 * 1024 * 1024;
568 s_L3DataLineSize = 64;
569 s_L3Associativity = 12;
570 break;
571 case 0xE2:
572 s_L3Data = 2 * 1024 * 1024;
573 s_L3DataLineSize = 64;
574 s_L3Associativity = 16;
575 break;
576 case 0xE3:
577 s_L3Data = 4 * 1024 * 1024;
578 s_L3DataLineSize = 64;
579 s_L3Associativity = 16;
580 break;
581 case 0xE4:
582 s_L3Data = 8 * 1024 * 1024;
583 s_L3DataLineSize = 64;
584 s_L3Associativity = 16;
585 break;
586 case 0xEA:
587 s_L3Data = 12 * 1024 * 1024;
588 s_L3DataLineSize = 64;
589 s_L3Associativity = 24;
590 break;
591 case 0xEB:
592 s_L3Data = 18 * 1024 * 1024;
593 s_L3DataLineSize = 64;
594 s_L3Associativity = 24;
595 break;
596 case 0xEC:
597 s_L3Data = 24 * 1024 * 1024;
598 s_L3DataLineSize = 64;
599 s_L3Associativity = 24;
600 break;
601 case 0xF0:
602 s_prefetch = 64;
603 break;
604 case 0xF1:
605 s_prefetch = 128;
606 break;
607 case 0xFF:
608 // we have to use CPUID(4) to find out
609 *checkLeaf4 = true;
610 break;
611 default:
612 break;
613 }
614}
615} // namespace Vc
616
617// vim: sw=4 sts=4 et tw=100