1 | #include <stdio.h> |
---|
2 | #include <string.h> |
---|
3 | #include <stdint.h> |
---|
4 | |
---|
5 | #ifndef ARCH_X86_64 |
---|
6 | #define ARCH_X86_32 |
---|
7 | #endif |
---|
8 | |
---|
9 | #ifdef ARCH_X86_64 |
---|
10 | # define REG_b "rbx" |
---|
11 | # define REG_S "rsi" |
---|
12 | typedef int64_t x86_reg; |
---|
13 | #else |
---|
14 | # define REG_b "ebx" |
---|
15 | # define REG_S "esi" |
---|
16 | typedef int32_t x86_reg; |
---|
17 | #endif |
---|
18 | |
---|
19 | #define MM_MMX 0x0001 /* standard MMX */ |
---|
20 | #define MM_MMX2 0x0002 ///< SSE integer functions or AMD MMX ext |
---|
21 | #define MM_3DNOW 0x0004 /* AMD 3DNOW */ |
---|
22 | #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ |
---|
23 | #define MM_SSE 0x0008 /* SSE functions */ |
---|
24 | #define MM_SSE2 0x0010 /* PIV SSE2 functions */ |
---|
25 | #define MM_SSE2SLOW 0x40000000 ///< SSE2 supported, but usually not faster |
---|
26 | ///< than regular MMX/SSE (e.g. Core1) |
---|
27 | #define MM_SSE3SLOW 0x20000000 ///< SSE3 supported, but usually not faster |
---|
28 | ///< than regular MMX/SSE (e.g. Core1) |
---|
29 | #define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ |
---|
30 | #define MM_SSE3 0x0010 /* SSE3 functions */ |
---|
31 | #define MM_SSSE3 0x0010 /* SSSE3 functions */ |
---|
32 | #define MM_SSE4 0x0010 /* SSE4.1 functions */ |
---|
33 | #define MM_SSE42 0x0010 /* SSE4.2 functions */ |
---|
34 | |
---|
35 | /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ |
---|
36 | #define cpuid(index,eax,ebx,ecx,edx)\ |
---|
37 | __asm __volatile\ |
---|
38 | ("mov %%"REG_b", %%"REG_S"\n\t"\ |
---|
39 | "cpuid\n\t"\ |
---|
40 | "xchg %%"REG_b", %%"REG_S\ |
---|
41 | : "=a" (eax), "=S" (ebx),\ |
---|
42 | "=c" (ecx), "=d" (edx)\ |
---|
43 | : "0" (index)); |
---|
44 | |
---|
45 | /* Function to test if multimedia instructions are supported... */ |
---|
46 | int mm_support1(void) |
---|
47 | { |
---|
48 | int rval = 0; |
---|
49 | int eax, ebx, ecx, edx; |
---|
50 | int max_std_level, max_ext_level, std_caps=0, ext_caps=0; |
---|
51 | int family=0, model=0; |
---|
52 | union { int i[3]; char c[12]; } vendor; |
---|
53 | |
---|
54 | #ifdef ARCH_X86_32 |
---|
55 | x86_reg a, c; |
---|
56 | __asm__ volatile ( |
---|
57 | /* See if CPUID instruction is supported ... */ |
---|
58 | /* ... Get copies of EFLAGS into eax and ecx */ |
---|
59 | "pushfl\n\t" |
---|
60 | "pop %0\n\t" |
---|
61 | "mov %0, %1\n\t" |
---|
62 | |
---|
63 | /* ... Toggle the ID bit in one copy and store */ |
---|
64 | /* to the EFLAGS reg */ |
---|
65 | "xor $0x200000, %0\n\t" |
---|
66 | "push %0\n\t" |
---|
67 | "popfl\n\t" |
---|
68 | |
---|
69 | /* ... Get the (hopefully modified) EFLAGS */ |
---|
70 | "pushfl\n\t" |
---|
71 | "pop %0\n\t" |
---|
72 | : "=a" (a), "=c" (c) |
---|
73 | : |
---|
74 | : "cc" |
---|
75 | ); |
---|
76 | |
---|
77 | if (a == c) |
---|
78 | return 0; /* CPUID not supported */ |
---|
79 | #endif |
---|
80 | |
---|
81 | cpuid(0, max_std_level, vendor.i[0], vendor.i[2], vendor.i[1]); |
---|
82 | |
---|
83 | if(max_std_level >= 1){ |
---|
84 | cpuid(1, eax, ebx, ecx, std_caps); |
---|
85 | |
---|
86 | family = ((eax>>8)&0xf) + ((eax>>20)&0xff); |
---|
87 | model = ((eax>>4)&0xf) + ((eax>>12)&0xf0); |
---|
88 | if (std_caps & (1<<23)) |
---|
89 | rval |= MM_MMX; |
---|
90 | if (std_caps & (1<<25)) |
---|
91 | rval |= MM_MMX2 |
---|
92 | | MM_SSE; |
---|
93 | if (std_caps & (1<<26)) |
---|
94 | rval |= MM_SSE2; |
---|
95 | if (ecx & 1) |
---|
96 | rval |= MM_SSE3; |
---|
97 | if (ecx & 0x00000200 ) |
---|
98 | rval |= MM_SSSE3; |
---|
99 | if (ecx & 0x00080000 ) |
---|
100 | rval |= MM_SSE4; |
---|
101 | if (ecx & 0x00100000 ) |
---|
102 | rval |= MM_SSE42; |
---|
103 | } |
---|
104 | |
---|
105 | cpuid(0x80000000, max_ext_level, ebx, ecx, edx); |
---|
106 | |
---|
107 | if(max_ext_level >= 0x80000001){ |
---|
108 | cpuid(0x80000001, eax, ebx, ecx, ext_caps); |
---|
109 | if (ext_caps & (1<<31)) |
---|
110 | rval |= MM_3DNOW; |
---|
111 | if (ext_caps & (1<<30)) |
---|
112 | rval |= MM_3DNOWEXT; |
---|
113 | if (ext_caps & (1<<23)) |
---|
114 | rval |= MM_MMX; |
---|
115 | if (ext_caps & (1<<22)) |
---|
116 | rval |= MM_MMX2; |
---|
117 | } |
---|
118 | |
---|
119 | if (!strncmp(vendor.c, "GenuineIntel", 12) && |
---|
120 | family == 6 && (model == 9 || model == 13 || model == 14)) { |
---|
121 | /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah") |
---|
122 | * theoretically support sse2, but it's usually slower than mmx, |
---|
123 | * so let's just pretend they don't. */ |
---|
124 | if (rval & MM_SSE2) rval ^= MM_SSE2SLOW|MM_SSE2; |
---|
125 | if (rval & MM_SSE3) rval ^= MM_SSE3SLOW|MM_SSE3; |
---|
126 | } |
---|
127 | |
---|
128 | return rval; |
---|
129 | } |
---|
130 | |
---|
131 | |
---|
132 | /* Function to test if multimedia instructions are supported... */ |
---|
133 | static int mm_support2(void) |
---|
134 | { |
---|
135 | int rval = 0; |
---|
136 | int eax, ebx, ecx, edx; |
---|
137 | int max_std_level, max_ext_level, std_caps=0, ext_caps=0; |
---|
138 | long a, c; |
---|
139 | |
---|
140 | __asm__ __volatile__ ( |
---|
141 | /* See if CPUID instruction is supported ... */ |
---|
142 | /* ... Get copies of EFLAGS into eax and ecx */ |
---|
143 | "pushf\n\t" |
---|
144 | "pop %0\n\t" |
---|
145 | "mov %0, %1\n\t" |
---|
146 | |
---|
147 | /* ... Toggle the ID bit in one copy and store */ |
---|
148 | /* to the EFLAGS reg */ |
---|
149 | "xor $0x200000, %0\n\t" |
---|
150 | "push %0\n\t" |
---|
151 | "popf\n\t" |
---|
152 | |
---|
153 | /* ... Get the (hopefully modified) EFLAGS */ |
---|
154 | "pushf\n\t" |
---|
155 | "pop %0\n\t" |
---|
156 | : "=a" (a), "=c" (c) |
---|
157 | : |
---|
158 | : "cc" |
---|
159 | ); |
---|
160 | |
---|
161 | if (a == c) |
---|
162 | return 0; /* CPUID not supported */ |
---|
163 | |
---|
164 | cpuid(0, max_std_level, ebx, ecx, edx); |
---|
165 | |
---|
166 | if(max_std_level >= 1){ |
---|
167 | cpuid(1, eax, ebx, ecx, std_caps); |
---|
168 | if (std_caps & (1<<23)) |
---|
169 | rval |= MM_MMX; |
---|
170 | if (std_caps & (1<<25)) |
---|
171 | rval |= MM_MMXEXT | MM_SSE; |
---|
172 | if (std_caps & (1<<26)) |
---|
173 | rval |= MM_SSE2; |
---|
174 | if (ecx & 1) |
---|
175 | rval |= MM_SSE3; |
---|
176 | if (ecx & 0x00000200 ) |
---|
177 | rval |= MM_SSSE3; |
---|
178 | if (ecx & 0x00080000 ) |
---|
179 | rval |= MM_SSE4; |
---|
180 | if (ecx & 0x00100000 ) |
---|
181 | rval |= MM_SSE42; |
---|
182 | } |
---|
183 | |
---|
184 | cpuid(0x80000000, max_ext_level, ebx, ecx, edx); |
---|
185 | |
---|
186 | if(max_ext_level >= 0x80000001){ |
---|
187 | cpuid(0x80000001, eax, ebx, ecx, ext_caps); |
---|
188 | if (ext_caps & (1<<31)) |
---|
189 | rval |= MM_3DNOW; |
---|
190 | if (ext_caps & (1<<30)) |
---|
191 | rval |= MM_3DNOWEXT; |
---|
192 | if (ext_caps & (1<<23)) |
---|
193 | rval |= MM_MMX; |
---|
194 | } |
---|
195 | |
---|
196 | cpuid(0, eax, ebx, ecx, edx); |
---|
197 | if ( ebx == 0x68747541 && |
---|
198 | edx == 0x69746e65 && |
---|
199 | ecx == 0x444d4163) { |
---|
200 | /* AMD */ |
---|
201 | if(ext_caps & (1<<22)) |
---|
202 | rval |= MM_MMXEXT; |
---|
203 | } else if (ebx == 0x746e6543 && |
---|
204 | edx == 0x48727561 && |
---|
205 | ecx == 0x736c7561) { /* "CentaurHauls" */ |
---|
206 | /* VIA C3 */ |
---|
207 | if(ext_caps & (1<<24)) |
---|
208 | rval |= MM_MMXEXT; |
---|
209 | } else if (ebx == 0x69727943 && |
---|
210 | edx == 0x736e4978 && |
---|
211 | ecx == 0x64616574) { |
---|
212 | /* Cyrix Section */ |
---|
213 | /* See if extended CPUID level 80000001 is supported */ |
---|
214 | /* The value of CPUID/80000001 for the 6x86MX is undefined |
---|
215 | according to the Cyrix CPU Detection Guide (Preliminary |
---|
216 | Rev. 1.01 table 1), so we'll check the value of eax for |
---|
217 | CPUID/0 to see if standard CPUID level 2 is supported. |
---|
218 | According to the table, the only CPU which supports level |
---|
219 | 2 is also the only one which supports extended CPUID levels. |
---|
220 | */ |
---|
221 | if (eax < 2) |
---|
222 | return rval; |
---|
223 | if (ext_caps & (1<<24)) |
---|
224 | rval |= MM_MMXEXT; |
---|
225 | } |
---|
226 | return rval; |
---|
227 | } |
---|
228 | |
---|
229 | int main ( void ) |
---|
230 | { |
---|
231 | int mm_flags1, mm_flags2; |
---|
232 | mm_flags1 = mm_support1(); |
---|
233 | mm_flags2 = mm_support2(); |
---|
234 | printf("mm_support1 = 0x%08X\n",mm_flags1); |
---|
235 | printf("mm_support2 = 0x%08X\n",mm_flags2); |
---|
236 | return 0; |
---|
237 | } |
---|