/home/arjun/llvm-project/llvm/lib/Support/Host.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the operating system Host concept. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "llvm/Support/Host.h" |
14 | | #include "llvm/ADT/SmallSet.h" |
15 | | #include "llvm/ADT/SmallVector.h" |
16 | | #include "llvm/ADT/StringMap.h" |
17 | | #include "llvm/ADT/StringRef.h" |
18 | | #include "llvm/ADT/StringSwitch.h" |
19 | | #include "llvm/ADT/Triple.h" |
20 | | #include "llvm/Config/llvm-config.h" |
21 | | #include "llvm/Support/Debug.h" |
22 | | #include "llvm/Support/FileSystem.h" |
23 | | #include "llvm/Support/MemoryBuffer.h" |
24 | | #include "llvm/Support/TargetParser.h" |
25 | | #include "llvm/Support/raw_ostream.h" |
26 | | #include <assert.h> |
27 | | #include <string.h> |
28 | | |
29 | | // Include the platform-specific parts of this class. |
30 | | #ifdef LLVM_ON_UNIX |
31 | | #include "Unix/Host.inc" |
32 | | #include <sched.h> |
33 | | #endif |
34 | | #ifdef _WIN32 |
35 | | #include "Windows/Host.inc" |
36 | | #endif |
37 | | #ifdef _MSC_VER |
38 | | #include <intrin.h> |
39 | | #endif |
40 | | #if defined(__APPLE__) && (!defined(__x86_64__)) |
41 | | #include <mach/host_info.h> |
42 | | #include <mach/mach.h> |
43 | | #include <mach/mach_host.h> |
44 | | #include <mach/machine.h> |
45 | | #endif |
46 | | |
47 | | #define DEBUG_TYPE "host-detection" |
48 | | |
49 | | //===----------------------------------------------------------------------===// |
50 | | // |
51 | | // Implementations of the CPU detection routines |
52 | | // |
53 | | //===----------------------------------------------------------------------===// |
54 | | |
55 | | using namespace llvm; |
56 | | |
57 | | static std::unique_ptr<llvm::MemoryBuffer> |
58 | 0 | LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { |
59 | 0 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
60 | 0 | llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); |
61 | 0 | if (std::error_code EC = Text.getError()) { |
62 | 0 | llvm::errs() << "Can't read " |
63 | 0 | << "/proc/cpuinfo: " << EC.message() << "\n"; |
64 | 0 | return nullptr; |
65 | 0 | } |
66 | 0 | return std::move(*Text); |
67 | 0 | } |
68 | | |
69 | 0 | StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { |
70 | 0 | // Access to the Processor Version Register (PVR) on PowerPC is privileged, |
71 | 0 | // and so we must use an operating-system interface to determine the current |
72 | 0 | // processor type. On Linux, this is exposed through the /proc/cpuinfo file. |
73 | 0 | const char *generic = "generic"; |
74 | 0 |
|
75 | 0 | // The cpu line is second (after the 'processor: 0' line), so if this |
76 | 0 | // buffer is too small then something has changed (or is wrong). |
77 | 0 | StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); |
78 | 0 | StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); |
79 | 0 |
|
80 | 0 | StringRef::const_iterator CIP = CPUInfoStart; |
81 | 0 |
|
82 | 0 | StringRef::const_iterator CPUStart = 0; |
83 | 0 | size_t CPULen = 0; |
84 | 0 |
|
85 | 0 | // We need to find the first line which starts with cpu, spaces, and a colon. |
86 | 0 | // After the colon, there may be some additional spaces and then the cpu type. |
87 | 0 | while (CIP < CPUInfoEnd && CPUStart == 0) { |
88 | 0 | if (CIP < CPUInfoEnd && *CIP == '\n') |
89 | 0 | ++CIP; |
90 | 0 |
|
91 | 0 | if (CIP < CPUInfoEnd && *CIP == 'c') { |
92 | 0 | ++CIP; |
93 | 0 | if (CIP < CPUInfoEnd && *CIP == 'p') { |
94 | 0 | ++CIP; |
95 | 0 | if (CIP < CPUInfoEnd && *CIP == 'u') { |
96 | 0 | ++CIP; |
97 | 0 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) |
98 | 0 | ++CIP; |
99 | 0 |
|
100 | 0 | if (CIP < CPUInfoEnd && *CIP == ':') { |
101 | 0 | ++CIP; |
102 | 0 | while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) |
103 | 0 | ++CIP; |
104 | 0 |
|
105 | 0 | if (CIP < CPUInfoEnd) { |
106 | 0 | CPUStart = CIP; |
107 | 0 | while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && |
108 | 0 | *CIP != ',' && *CIP != '\n')) |
109 | 0 | ++CIP; |
110 | 0 | CPULen = CIP - CPUStart; |
111 | 0 | } |
112 | 0 | } |
113 | 0 | } |
114 | 0 | } |
115 | 0 | } |
116 | 0 |
|
117 | 0 | if (CPUStart == 0) |
118 | 0 | while (CIP < CPUInfoEnd && *CIP != '\n') |
119 | 0 | ++CIP; |
120 | 0 | } |
121 | 0 |
|
122 | 0 | if (CPUStart == 0) |
123 | 0 | return generic; |
124 | 0 | |
125 | 0 | return StringSwitch<const char *>(StringRef(CPUStart, CPULen)) |
126 | 0 | .Case("604e", "604e") |
127 | 0 | .Case("604", "604") |
128 | 0 | .Case("7400", "7400") |
129 | 0 | .Case("7410", "7400") |
130 | 0 | .Case("7447", "7400") |
131 | 0 | .Case("7455", "7450") |
132 | 0 | .Case("G4", "g4") |
133 | 0 | .Case("POWER4", "970") |
134 | 0 | .Case("PPC970FX", "970") |
135 | 0 | .Case("PPC970MP", "970") |
136 | 0 | .Case("G5", "g5") |
137 | 0 | .Case("POWER5", "g5") |
138 | 0 | .Case("A2", "a2") |
139 | 0 | .Case("POWER6", "pwr6") |
140 | 0 | .Case("POWER7", "pwr7") |
141 | 0 | .Case("POWER8", "pwr8") |
142 | 0 | .Case("POWER8E", "pwr8") |
143 | 0 | .Case("POWER8NVL", "pwr8") |
144 | 0 | .Case("POWER9", "pwr9") |
145 | 0 | .Case("POWER10", "pwr10") |
146 | 0 | // FIXME: If we get a simulator or machine with the capabilities of |
147 | 0 | // mcpu=future, we should revisit this and add the name reported by the |
148 | 0 | // simulator/machine. |
149 | 0 | .Default(generic); |
150 | 0 | } |
151 | | |
152 | 0 | StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { |
153 | 0 | // The cpuid register on arm is not accessible from user space. On Linux, |
154 | 0 | // it is exposed through the /proc/cpuinfo file. |
155 | 0 |
|
156 | 0 | // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line |
157 | 0 | // in all cases. |
158 | 0 | SmallVector<StringRef, 32> Lines; |
159 | 0 | ProcCpuinfoContent.split(Lines, "\n"); |
160 | 0 |
|
161 | 0 | // Look for the CPU implementer line. |
162 | 0 | StringRef Implementer; |
163 | 0 | StringRef Hardware; |
164 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
165 | 0 | if (Lines[I].startswith("CPU implementer")) |
166 | 0 | Implementer = Lines[I].substr(15).ltrim("\t :"); |
167 | 0 | if (Lines[I].startswith("Hardware")) |
168 | 0 | Hardware = Lines[I].substr(8).ltrim("\t :"); |
169 | 0 | } |
170 | 0 |
|
171 | 0 | if (Implementer == "0x41") { // ARM Ltd. |
172 | 0 | // MSM8992/8994 may give cpu part for the core that the kernel is running on, |
173 | 0 | // which is undeterministic and wrong. Always return cortex-a53 for these SoC. |
174 | 0 | if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) |
175 | 0 | return "cortex-a53"; |
176 | 0 | |
177 | 0 | |
178 | 0 | // Look for the CPU part line. |
179 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
180 | 0 | if (Lines[I].startswith("CPU part")) |
181 | 0 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
182 | 0 | // values correspond to the "Part number" in the CP15/c0 register. The |
183 | 0 | // contents are specified in the various processor manuals. |
184 | 0 | // This corresponds to the Main ID Register in Technical Reference Manuals. |
185 | 0 | // and is used in programs like sys-utils |
186 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
187 | 0 | .Case("0x926", "arm926ej-s") |
188 | 0 | .Case("0xb02", "mpcore") |
189 | 0 | .Case("0xb36", "arm1136j-s") |
190 | 0 | .Case("0xb56", "arm1156t2-s") |
191 | 0 | .Case("0xb76", "arm1176jz-s") |
192 | 0 | .Case("0xc08", "cortex-a8") |
193 | 0 | .Case("0xc09", "cortex-a9") |
194 | 0 | .Case("0xc0f", "cortex-a15") |
195 | 0 | .Case("0xc20", "cortex-m0") |
196 | 0 | .Case("0xc23", "cortex-m3") |
197 | 0 | .Case("0xc24", "cortex-m4") |
198 | 0 | .Case("0xd22", "cortex-m55") |
199 | 0 | .Case("0xd02", "cortex-a34") |
200 | 0 | .Case("0xd04", "cortex-a35") |
201 | 0 | .Case("0xd03", "cortex-a53") |
202 | 0 | .Case("0xd07", "cortex-a57") |
203 | 0 | .Case("0xd08", "cortex-a72") |
204 | 0 | .Case("0xd09", "cortex-a73") |
205 | 0 | .Case("0xd0a", "cortex-a75") |
206 | 0 | .Case("0xd0b", "cortex-a76") |
207 | 0 | .Case("0xd0c", "neoverse-n1") |
208 | 0 | .Default("generic"); |
209 | 0 | } |
210 | 0 |
|
211 | 0 | if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. |
212 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
213 | 0 | if (Lines[I].startswith("CPU part")) { |
214 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
215 | 0 | .Case("0x516", "thunderx2t99") |
216 | 0 | .Case("0x0516", "thunderx2t99") |
217 | 0 | .Case("0xaf", "thunderx2t99") |
218 | 0 | .Case("0x0af", "thunderx2t99") |
219 | 0 | .Case("0xa1", "thunderxt88") |
220 | 0 | .Case("0x0a1", "thunderxt88") |
221 | 0 | .Default("generic"); |
222 | 0 | } |
223 | 0 | } |
224 | 0 | } |
225 | 0 |
|
226 | 0 | if (Implementer == "0x46") { // Fujitsu Ltd. |
227 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
228 | 0 | if (Lines[I].startswith("CPU part")) { |
229 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
230 | 0 | .Case("0x001", "a64fx") |
231 | 0 | .Default("generic"); |
232 | 0 | } |
233 | 0 | } |
234 | 0 | } |
235 | 0 |
|
236 | 0 | if (Implementer == "0x4e") { // NVIDIA Corporation |
237 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
238 | 0 | if (Lines[I].startswith("CPU part")) { |
239 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
240 | 0 | .Case("0x004", "carmel") |
241 | 0 | .Default("generic"); |
242 | 0 | } |
243 | 0 | } |
244 | 0 | } |
245 | 0 |
|
246 | 0 | if (Implementer == "0x48") // HiSilicon Technologies, Inc. |
247 | 0 | // Look for the CPU part line. |
248 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
249 | 0 | if (Lines[I].startswith("CPU part")) |
250 | 0 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
251 | 0 | // values correspond to the "Part number" in the CP15/c0 register. The |
252 | 0 | // contents are specified in the various processor manuals. |
253 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
254 | 0 | .Case("0xd01", "tsv110") |
255 | 0 | .Default("generic"); |
256 | 0 |
|
257 | 0 | if (Implementer == "0x51") // Qualcomm Technologies, Inc. |
258 | 0 | // Look for the CPU part line. |
259 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
260 | 0 | if (Lines[I].startswith("CPU part")) |
261 | 0 | // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The |
262 | 0 | // values correspond to the "Part number" in the CP15/c0 register. The |
263 | 0 | // contents are specified in the various processor manuals. |
264 | 0 | return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :")) |
265 | 0 | .Case("0x06f", "krait") // APQ8064 |
266 | 0 | .Case("0x201", "kryo") |
267 | 0 | .Case("0x205", "kryo") |
268 | 0 | .Case("0x211", "kryo") |
269 | 0 | .Case("0x800", "cortex-a73") |
270 | 0 | .Case("0x801", "cortex-a73") |
271 | 0 | .Case("0x802", "cortex-a73") |
272 | 0 | .Case("0x803", "cortex-a73") |
273 | 0 | .Case("0x804", "cortex-a73") |
274 | 0 | .Case("0x805", "cortex-a73") |
275 | 0 | .Case("0xc00", "falkor") |
276 | 0 | .Case("0xc01", "saphira") |
277 | 0 | .Default("generic"); |
278 | 0 |
|
279 | 0 | if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. |
280 | 0 | // The Exynos chips have a convoluted ID scheme that doesn't seem to follow |
281 | 0 | // any predictive pattern across variants and parts. |
282 | 0 | unsigned Variant = 0, Part = 0; |
283 | 0 |
|
284 | 0 | // Look for the CPU variant line, whose value is a 1 digit hexadecimal |
285 | 0 | // number, corresponding to the Variant bits in the CP15/C0 register. |
286 | 0 | for (auto I : Lines) |
287 | 0 | if (I.consume_front("CPU variant")) |
288 | 0 | I.ltrim("\t :").getAsInteger(0, Variant); |
289 | 0 |
|
290 | 0 | // Look for the CPU part line, whose value is a 3 digit hexadecimal |
291 | 0 | // number, corresponding to the PartNum bits in the CP15/C0 register. |
292 | 0 | for (auto I : Lines) |
293 | 0 | if (I.consume_front("CPU part")) |
294 | 0 | I.ltrim("\t :").getAsInteger(0, Part); |
295 | 0 |
|
296 | 0 | unsigned Exynos = (Variant << 12) | Part; |
297 | 0 | switch (Exynos) { |
298 | 0 | default: |
299 | 0 | // Default by falling through to Exynos M3. |
300 | 0 | LLVM_FALLTHROUGH; |
301 | 0 | case 0x1002: |
302 | 0 | return "exynos-m3"; |
303 | 0 | case 0x1003: |
304 | 0 | return "exynos-m4"; |
305 | 0 | } |
306 | 0 | } |
307 | 0 | |
308 | 0 | return "generic"; |
309 | 0 | } |
310 | | |
311 | 0 | StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { |
312 | 0 | // STIDP is a privileged operation, so use /proc/cpuinfo instead. |
313 | 0 |
|
314 | 0 | // The "processor 0:" line comes after a fair amount of other information, |
315 | 0 | // including a cache breakdown, but this should be plenty. |
316 | 0 | SmallVector<StringRef, 32> Lines; |
317 | 0 | ProcCpuinfoContent.split(Lines, "\n"); |
318 | 0 |
|
319 | 0 | // Look for the CPU features. |
320 | 0 | SmallVector<StringRef, 32> CPUFeatures; |
321 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
322 | 0 | if (Lines[I].startswith("features")) { |
323 | 0 | size_t Pos = Lines[I].find(":"); |
324 | 0 | if (Pos != StringRef::npos) { |
325 | 0 | Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 |
|
330 | 0 | // We need to check for the presence of vector support independently of |
331 | 0 | // the machine type, since we may only use the vector register set when |
332 | 0 | // supported by the kernel (and hypervisor). |
333 | 0 | bool HaveVectorSupport = false; |
334 | 0 | for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { |
335 | 0 | if (CPUFeatures[I] == "vx") |
336 | 0 | HaveVectorSupport = true; |
337 | 0 | } |
338 | 0 |
|
339 | 0 | // Now check the processor machine type. |
340 | 0 | for (unsigned I = 0, E = Lines.size(); I != E; ++I) { |
341 | 0 | if (Lines[I].startswith("processor ")) { |
342 | 0 | size_t Pos = Lines[I].find("machine = "); |
343 | 0 | if (Pos != StringRef::npos) { |
344 | 0 | Pos += sizeof("machine = ") - 1; |
345 | 0 | unsigned int Id; |
346 | 0 | if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { |
347 | 0 | if (Id >= 8561 && HaveVectorSupport) |
348 | 0 | return "z15"; |
349 | 0 | if (Id >= 3906 && HaveVectorSupport) |
350 | 0 | return "z14"; |
351 | 0 | if (Id >= 2964 && HaveVectorSupport) |
352 | 0 | return "z13"; |
353 | 0 | if (Id >= 2827) |
354 | 0 | return "zEC12"; |
355 | 0 | if (Id >= 2817) |
356 | 0 | return "z196"; |
357 | 0 | } |
358 | 0 | } |
359 | 0 | break; |
360 | 0 | } |
361 | 0 | } |
362 | 0 |
|
363 | 0 | return "generic"; |
364 | 0 | } |
365 | | |
366 | 0 | StringRef sys::detail::getHostCPUNameForBPF() { |
367 | | #if !defined(__linux__) || !defined(__x86_64__) |
368 | | return "generic"; |
369 | | #else |
370 | | uint8_t v3_insns[40] __attribute__ ((aligned (8))) = |
371 | 0 | /* BPF_MOV64_IMM(BPF_REG_0, 0) */ |
372 | 0 | { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
373 | 0 | /* BPF_MOV64_IMM(BPF_REG_2, 1) */ |
374 | 0 | 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
375 | 0 | /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ |
376 | 0 | 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, |
377 | 0 | /* BPF_MOV64_IMM(BPF_REG_0, 1) */ |
378 | 0 | 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
379 | 0 | /* BPF_EXIT_INSN() */ |
380 | 0 | 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; |
381 | 0 |
|
382 | 0 | uint8_t v2_insns[40] __attribute__ ((aligned (8))) = |
383 | 0 | /* BPF_MOV64_IMM(BPF_REG_0, 0) */ |
384 | 0 | { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, |
385 | 0 | /* BPF_MOV64_IMM(BPF_REG_2, 1) */ |
386 | 0 | 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
387 | 0 | /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ |
388 | 0 | 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, |
389 | 0 | /* BPF_MOV64_IMM(BPF_REG_0, 1) */ |
390 | 0 | 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, |
391 | 0 | /* BPF_EXIT_INSN() */ |
392 | 0 | 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; |
393 | 0 |
|
394 | 0 | struct bpf_prog_load_attr { |
395 | 0 | uint32_t prog_type; |
396 | 0 | uint32_t insn_cnt; |
397 | 0 | uint64_t insns; |
398 | 0 | uint64_t license; |
399 | 0 | uint32_t log_level; |
400 | 0 | uint32_t log_size; |
401 | 0 | uint64_t log_buf; |
402 | 0 | uint32_t kern_version; |
403 | 0 | uint32_t prog_flags; |
404 | 0 | } attr = {}; |
405 | 0 | attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ |
406 | 0 | attr.insn_cnt = 5; |
407 | 0 | attr.insns = (uint64_t)v3_insns; |
408 | 0 | attr.license = (uint64_t)"DUMMY"; |
409 | 0 |
|
410 | 0 | int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, |
411 | 0 | sizeof(attr)); |
412 | 0 | if (fd >= 0) { |
413 | 0 | close(fd); |
414 | 0 | return "v3"; |
415 | 0 | } |
416 | 0 | |
417 | 0 | /* Clear the whole attr in case its content changed by syscall. */ |
418 | 0 | memset(&attr, 0, sizeof(attr)); |
419 | 0 | attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ |
420 | 0 | attr.insn_cnt = 5; |
421 | 0 | attr.insns = (uint64_t)v2_insns; |
422 | 0 | attr.license = (uint64_t)"DUMMY"; |
423 | 0 | fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); |
424 | 0 | if (fd >= 0) { |
425 | 0 | close(fd); |
426 | 0 | return "v2"; |
427 | 0 | } |
428 | 0 | return "v1"; |
429 | 0 | #endif |
430 | 0 | } |
431 | | |
432 | | #if defined(__i386__) || defined(_M_IX86) || \ |
433 | | defined(__x86_64__) || defined(_M_X64) |
434 | | |
435 | | enum VendorSignatures { |
436 | | SIG_INTEL = 0x756e6547 /* Genu */, |
437 | | SIG_AMD = 0x68747541 /* Auth */ |
438 | | }; |
439 | | |
440 | | // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). |
441 | | // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID |
442 | | // support. Consequently, for i386, the presence of CPUID is checked first |
443 | | // via the corresponding eflags bit. |
444 | | // Removal of cpuid.h header motivated by PR30384 |
445 | | // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp |
446 | | // or test-suite, but are used in external projects e.g. libstdcxx |
447 | 0 | static bool isCpuIdSupported() { |
448 | 0 | #if defined(__GNUC__) || defined(__clang__) |
449 | | #if defined(__i386__) |
450 | | int __cpuid_supported; |
451 | | __asm__(" pushfl\n" |
452 | | " popl %%eax\n" |
453 | | " movl %%eax,%%ecx\n" |
454 | | " xorl $0x00200000,%%eax\n" |
455 | | " pushl %%eax\n" |
456 | | " popfl\n" |
457 | | " pushfl\n" |
458 | | " popl %%eax\n" |
459 | | " movl $0,%0\n" |
460 | | " cmpl %%eax,%%ecx\n" |
461 | | " je 1f\n" |
462 | | " movl $1,%0\n" |
463 | | "1:" |
464 | | : "=r"(__cpuid_supported) |
465 | | : |
466 | | : "eax", "ecx"); |
467 | | if (!__cpuid_supported) |
468 | | return false; |
469 | | #endif |
470 | | return true; |
471 | 0 | #endif |
472 | 0 | return true; |
473 | 0 | } |
474 | | |
475 | | /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in |
476 | | /// the specified arguments. If we can't run cpuid on the host, return true. |
477 | | static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, |
478 | 0 | unsigned *rECX, unsigned *rEDX) { |
479 | 0 | #if defined(__GNUC__) || defined(__clang__) |
480 | 0 | #if defined(__x86_64__) |
481 | 0 | // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. |
482 | 0 | // FIXME: should we save this for Clang? |
483 | 0 | __asm__("movq\t%%rbx, %%rsi\n\t" |
484 | 0 | "cpuid\n\t" |
485 | 0 | "xchgq\t%%rbx, %%rsi\n\t" |
486 | 0 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) |
487 | 0 | : "a"(value)); |
488 | 0 | return false; |
489 | | #elif defined(__i386__) |
490 | | __asm__("movl\t%%ebx, %%esi\n\t" |
491 | | "cpuid\n\t" |
492 | | "xchgl\t%%ebx, %%esi\n\t" |
493 | | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) |
494 | | : "a"(value)); |
495 | | return false; |
496 | | #else |
497 | | return true; |
498 | | #endif |
499 | | #elif defined(_MSC_VER) |
500 | | // The MSVC intrinsic is portable across x86 and x64. |
501 | | int registers[4]; |
502 | | __cpuid(registers, value); |
503 | | *rEAX = registers[0]; |
504 | | *rEBX = registers[1]; |
505 | | *rECX = registers[2]; |
506 | | *rEDX = registers[3]; |
507 | | return false; |
508 | | #else |
509 | | return true; |
510 | | #endif |
511 | | } |
512 | | |
513 | | /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return |
514 | | /// the 4 values in the specified arguments. If we can't run cpuid on the host, |
515 | | /// return true. |
516 | | static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, |
517 | | unsigned *rEAX, unsigned *rEBX, unsigned *rECX, |
518 | 0 | unsigned *rEDX) { |
519 | 0 | #if defined(__GNUC__) || defined(__clang__) |
520 | 0 | #if defined(__x86_64__) |
521 | 0 | // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. |
522 | 0 | // FIXME: should we save this for Clang? |
523 | 0 | __asm__("movq\t%%rbx, %%rsi\n\t" |
524 | 0 | "cpuid\n\t" |
525 | 0 | "xchgq\t%%rbx, %%rsi\n\t" |
526 | 0 | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) |
527 | 0 | : "a"(value), "c"(subleaf)); |
528 | 0 | return false; |
529 | | #elif defined(__i386__) |
530 | | __asm__("movl\t%%ebx, %%esi\n\t" |
531 | | "cpuid\n\t" |
532 | | "xchgl\t%%ebx, %%esi\n\t" |
533 | | : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) |
534 | | : "a"(value), "c"(subleaf)); |
535 | | return false; |
536 | | #else |
537 | | return true; |
538 | | #endif |
539 | | #elif defined(_MSC_VER) |
540 | | int registers[4]; |
541 | | __cpuidex(registers, value, subleaf); |
542 | | *rEAX = registers[0]; |
543 | | *rEBX = registers[1]; |
544 | | *rECX = registers[2]; |
545 | | *rEDX = registers[3]; |
546 | | return false; |
547 | | #else |
548 | | return true; |
549 | | #endif |
550 | | } |
551 | | |
552 | | // Read control register 0 (XCR0). Used to detect features such as AVX. |
553 | 0 | static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { |
554 | 0 | #if defined(__GNUC__) || defined(__clang__) |
555 | 0 | // Check xgetbv; this uses a .byte sequence instead of the instruction |
556 | 0 | // directly because older assemblers do not include support for xgetbv and |
557 | 0 | // there is no easy way to conditionally compile based on the assembler used. |
558 | 0 | __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); |
559 | 0 | return false; |
560 | | #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) |
561 | | unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); |
562 | | *rEAX = Result; |
563 | | *rEDX = Result >> 32; |
564 | | return false; |
565 | | #else |
566 | | return true; |
567 | | #endif |
568 | | } |
569 | | |
570 | | static void detectX86FamilyModel(unsigned EAX, unsigned *Family, |
571 | 0 | unsigned *Model) { |
572 | 0 | *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 |
573 | 0 | *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 |
574 | 0 | if (*Family == 6 || *Family == 0xf) { |
575 | 0 | if (*Family == 0xf) |
576 | 0 | // Examine extended family ID if family ID is F. |
577 | 0 | *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 |
578 | 0 | // Examine extended model ID if family ID is 6 or F. |
579 | 0 | *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 |
580 | 0 | } |
581 | 0 | } |
582 | | |
583 | | static void |
584 | | getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, |
585 | | unsigned Brand_id, unsigned Features, |
586 | | unsigned Features2, unsigned Features3, |
587 | 0 | unsigned *Type, unsigned *Subtype) { |
588 | 0 | if (Brand_id != 0) |
589 | 0 | return; |
590 | 0 | switch (Family) { |
591 | 0 | case 3: |
592 | 0 | *Type = X86::INTEL_i386; |
593 | 0 | break; |
594 | 0 | case 4: |
595 | 0 | *Type = X86::INTEL_i486; |
596 | 0 | break; |
597 | 0 | case 5: |
598 | 0 | if (Features & (1 << X86::FEATURE_MMX)) { |
599 | 0 | *Type = X86::INTEL_PENTIUM_MMX; |
600 | 0 | break; |
601 | 0 | } |
602 | 0 | *Type = X86::INTEL_PENTIUM; |
603 | 0 | break; |
604 | 0 | case 6: |
605 | 0 | switch (Model) { |
606 | 0 | case 0x01: // Pentium Pro processor |
607 | 0 | *Type = X86::INTEL_PENTIUM_PRO; |
608 | 0 | break; |
609 | 0 | case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, |
610 | 0 | // model 03 |
611 | 0 | case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, |
612 | 0 | // model 05, and Intel Celeron processor, model 05 |
613 | 0 | case 0x06: // Celeron processor, model 06 |
614 | 0 | *Type = X86::INTEL_PENTIUM_II; |
615 | 0 | break; |
616 | 0 | case 0x07: // Pentium III processor, model 07, and Pentium III Xeon |
617 | 0 | // processor, model 07 |
618 | 0 | case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, |
619 | 0 | // model 08, and Celeron processor, model 08 |
620 | 0 | case 0x0a: // Pentium III Xeon processor, model 0Ah |
621 | 0 | case 0x0b: // Pentium III processor, model 0Bh |
622 | 0 | *Type = X86::INTEL_PENTIUM_III; |
623 | 0 | break; |
624 | 0 | case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. |
625 | 0 | case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model |
626 | 0 | // 0Dh. All processors are manufactured using the 90 nm process. |
627 | 0 | case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 |
628 | 0 | // Integrated Processor with Intel QuickAssist Technology |
629 | 0 | *Type = X86::INTEL_PENTIUM_M; |
630 | 0 | break; |
631 | 0 | case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model |
632 | 0 | // 0Eh. All processors are manufactured using the 65 nm process. |
633 | 0 | *Type = X86::INTEL_CORE_DUO; |
634 | 0 | break; // yonah |
635 | 0 | case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile |
636 | 0 | // processor, Intel Core 2 Quad processor, Intel Core 2 Quad |
637 | 0 | // mobile processor, Intel Core 2 Extreme processor, Intel |
638 | 0 | // Pentium Dual-Core processor, Intel Xeon processor, model |
639 | 0 | // 0Fh. All processors are manufactured using the 65 nm process. |
640 | 0 | case 0x16: // Intel Celeron processor model 16h. All processors are |
641 | 0 | // manufactured using the 65 nm process |
642 | 0 | *Type = X86::INTEL_CORE2; // "core2" |
643 | 0 | *Subtype = X86::INTEL_CORE2_65; |
644 | 0 | break; |
645 | 0 | case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model |
646 | 0 | // 17h. All processors are manufactured using the 45 nm process. |
647 | 0 | // |
648 | 0 | // 45nm: Penryn , Wolfdale, Yorkfield (XE) |
649 | 0 | case 0x1d: // Intel Xeon processor MP. All processors are manufactured using |
650 | 0 | // the 45 nm process. |
651 | 0 | *Type = X86::INTEL_CORE2; // "penryn" |
652 | 0 | *Subtype = X86::INTEL_CORE2_45; |
653 | 0 | break; |
654 | 0 | case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All |
655 | 0 | // processors are manufactured using the 45 nm process. |
656 | 0 | case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. |
657 | 0 | // As found in a Summer 2010 model iMac. |
658 | 0 | case 0x1f: |
659 | 0 | case 0x2e: // Nehalem EX |
660 | 0 | *Type = X86::INTEL_COREI7; // "nehalem" |
661 | 0 | *Subtype = X86::INTEL_COREI7_NEHALEM; |
662 | 0 | break; |
663 | 0 | case 0x25: // Intel Core i7, laptop version. |
664 | 0 | case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All |
665 | 0 | // processors are manufactured using the 32 nm process. |
666 | 0 | case 0x2f: // Westmere EX |
667 | 0 | *Type = X86::INTEL_COREI7; // "westmere" |
668 | 0 | *Subtype = X86::INTEL_COREI7_WESTMERE; |
669 | 0 | break; |
670 | 0 | case 0x2a: // Intel Core i7 processor. All processors are manufactured |
671 | 0 | // using the 32 nm process. |
672 | 0 | case 0x2d: |
673 | 0 | *Type = X86::INTEL_COREI7; //"sandybridge" |
674 | 0 | *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; |
675 | 0 | break; |
676 | 0 | case 0x3a: |
677 | 0 | case 0x3e: // Ivy Bridge EP |
678 | 0 | *Type = X86::INTEL_COREI7; // "ivybridge" |
679 | 0 | *Subtype = X86::INTEL_COREI7_IVYBRIDGE; |
680 | 0 | break; |
681 | 0 |
|
682 | 0 | // Haswell: |
683 | 0 | case 0x3c: |
684 | 0 | case 0x3f: |
685 | 0 | case 0x45: |
686 | 0 | case 0x46: |
687 | 0 | *Type = X86::INTEL_COREI7; // "haswell" |
688 | 0 | *Subtype = X86::INTEL_COREI7_HASWELL; |
689 | 0 | break; |
690 | 0 |
|
691 | 0 | // Broadwell: |
692 | 0 | case 0x3d: |
693 | 0 | case 0x47: |
694 | 0 | case 0x4f: |
695 | 0 | case 0x56: |
696 | 0 | *Type = X86::INTEL_COREI7; // "broadwell" |
697 | 0 | *Subtype = X86::INTEL_COREI7_BROADWELL; |
698 | 0 | break; |
699 | 0 |
|
700 | 0 | // Skylake: |
701 | 0 | case 0x4e: // Skylake mobile |
702 | 0 | case 0x5e: // Skylake desktop |
703 | 0 | case 0x8e: // Kaby Lake mobile |
704 | 0 | case 0x9e: // Kaby Lake desktop |
705 | 0 | case 0xa5: // Comet Lake-H/S |
706 | 0 | case 0xa6: // Comet Lake-U |
707 | 0 | *Type = X86::INTEL_COREI7; // "skylake" |
708 | 0 | *Subtype = X86::INTEL_COREI7_SKYLAKE; |
709 | 0 | break; |
710 | 0 |
|
711 | 0 | // Skylake Xeon: |
712 | 0 | case 0x55: |
713 | 0 | *Type = X86::INTEL_COREI7; |
714 | 0 | if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) |
715 | 0 | *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake" |
716 | 0 | else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) |
717 | 0 | *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake" |
718 | 0 | else |
719 | 0 | *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" |
720 | 0 | break; |
721 | 0 |
|
722 | 0 | // Cannonlake: |
723 | 0 | case 0x66: |
724 | 0 | *Type = X86::INTEL_COREI7; |
725 | 0 | *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake" |
726 | 0 | break; |
727 | 0 |
|
728 | 0 | // Icelake: |
729 | 0 | case 0x7d: |
730 | 0 | case 0x7e: |
731 | 0 | *Type = X86::INTEL_COREI7; |
732 | 0 | *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client" |
733 | 0 | break; |
734 | 0 |
|
735 | 0 | // Icelake Xeon: |
736 | 0 | case 0x6a: |
737 | 0 | case 0x6c: |
738 | 0 | *Type = X86::INTEL_COREI7; |
739 | 0 | *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server" |
740 | 0 | break; |
741 | 0 |
|
742 | 0 | case 0x1c: // Most 45 nm Intel Atom processors |
743 | 0 | case 0x26: // 45 nm Atom Lincroft |
744 | 0 | case 0x27: // 32 nm Atom Medfield |
745 | 0 | case 0x35: // 32 nm Atom Midview |
746 | 0 | case 0x36: // 32 nm Atom Midview |
747 | 0 | *Type = X86::INTEL_BONNELL; |
748 | 0 | break; // "bonnell" |
749 | 0 |
|
750 | 0 | // Atom Silvermont codes from the Intel software optimization guide. |
751 | 0 | case 0x37: |
752 | 0 | case 0x4a: |
753 | 0 | case 0x4d: |
754 | 0 | case 0x5a: |
755 | 0 | case 0x5d: |
756 | 0 | case 0x4c: // really airmont |
757 | 0 | *Type = X86::INTEL_SILVERMONT; |
758 | 0 | break; // "silvermont" |
759 | 0 | // Goldmont: |
760 | 0 | case 0x5c: // Apollo Lake |
761 | 0 | case 0x5f: // Denverton |
762 | 0 | *Type = X86::INTEL_GOLDMONT; |
763 | 0 | break; // "goldmont" |
764 | 0 | case 0x7a: |
765 | 0 | *Type = X86::INTEL_GOLDMONT_PLUS; |
766 | 0 | break; |
767 | 0 | case 0x86: |
768 | 0 | *Type = X86::INTEL_TREMONT; |
769 | 0 | break; |
770 | 0 |
|
771 | 0 | case 0x57: |
772 | 0 | *Type = X86::INTEL_KNL; // knl |
773 | 0 | break; |
774 | 0 |
|
775 | 0 | case 0x85: |
776 | 0 | *Type = X86::INTEL_KNM; // knm |
777 | 0 | break; |
778 | 0 |
|
779 | 0 | default: // Unknown family 6 CPU, try to guess. |
780 | 0 | // TODO detect tigerlake host |
781 | 0 | if (Features2 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 32))) { |
782 | 0 | *Type = X86::INTEL_COREI7; |
783 | 0 | *Subtype = X86::INTEL_COREI7_TIGERLAKE; |
784 | 0 | break; |
785 | 0 | } |
786 | 0 | |
787 | 0 | if (Features & (1 << X86::FEATURE_AVX512VBMI2)) { |
788 | 0 | *Type = X86::INTEL_COREI7; |
789 | 0 | *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; |
790 | 0 | break; |
791 | 0 | } |
792 | 0 | |
793 | 0 | if (Features & (1 << X86::FEATURE_AVX512VBMI)) { |
794 | 0 | *Type = X86::INTEL_COREI7; |
795 | 0 | *Subtype = X86::INTEL_COREI7_CANNONLAKE; |
796 | 0 | break; |
797 | 0 | } |
798 | 0 | |
799 | 0 | if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) { |
800 | 0 | *Type = X86::INTEL_COREI7; |
801 | 0 | *Subtype = X86::INTEL_COREI7_COOPERLAKE; |
802 | 0 | break; |
803 | 0 | } |
804 | 0 | |
805 | 0 | if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) { |
806 | 0 | *Type = X86::INTEL_COREI7; |
807 | 0 | *Subtype = X86::INTEL_COREI7_CASCADELAKE; |
808 | 0 | break; |
809 | 0 | } |
810 | 0 | |
811 | 0 | if (Features & (1 << X86::FEATURE_AVX512VL)) { |
812 | 0 | *Type = X86::INTEL_COREI7; |
813 | 0 | *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; |
814 | 0 | break; |
815 | 0 | } |
816 | 0 | |
817 | 0 | if (Features & (1 << X86::FEATURE_AVX512ER)) { |
818 | 0 | *Type = X86::INTEL_KNL; // knl |
819 | 0 | break; |
820 | 0 | } |
821 | 0 | |
822 | 0 | if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) { |
823 | 0 | if (Features3 & (1 << (X86::FEATURE_SHA - 64))) { |
824 | 0 | *Type = X86::INTEL_GOLDMONT; |
825 | 0 | } else { |
826 | 0 | *Type = X86::INTEL_COREI7; |
827 | 0 | *Subtype = X86::INTEL_COREI7_SKYLAKE; |
828 | 0 | } |
829 | 0 | break; |
830 | 0 | } |
831 | 0 | if (Features3 & (1 << (X86::FEATURE_ADX - 64))) { |
832 | 0 | *Type = X86::INTEL_COREI7; |
833 | 0 | *Subtype = X86::INTEL_COREI7_BROADWELL; |
834 | 0 | break; |
835 | 0 | } |
836 | 0 | if (Features & (1 << X86::FEATURE_AVX2)) { |
837 | 0 | *Type = X86::INTEL_COREI7; |
838 | 0 | *Subtype = X86::INTEL_COREI7_HASWELL; |
839 | 0 | break; |
840 | 0 | } |
841 | 0 | if (Features & (1 << X86::FEATURE_AVX)) { |
842 | 0 | *Type = X86::INTEL_COREI7; |
843 | 0 | *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; |
844 | 0 | break; |
845 | 0 | } |
846 | 0 | if (Features & (1 << X86::FEATURE_SSE4_2)) { |
847 | 0 | if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { |
848 | 0 | *Type = X86::INTEL_SILVERMONT; |
849 | 0 | } else { |
850 | 0 | *Type = X86::INTEL_COREI7; |
851 | 0 | *Subtype = X86::INTEL_COREI7_NEHALEM; |
852 | 0 | } |
853 | 0 | break; |
854 | 0 | } |
855 | 0 | if (Features & (1 << X86::FEATURE_SSE4_1)) { |
856 | 0 | *Type = X86::INTEL_CORE2; // "penryn" |
857 | 0 | *Subtype = X86::INTEL_CORE2_45; |
858 | 0 | break; |
859 | 0 | } |
860 | 0 | if (Features & (1 << X86::FEATURE_SSSE3)) { |
861 | 0 | if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) { |
862 | 0 | *Type = X86::INTEL_BONNELL; // "bonnell" |
863 | 0 | } else { |
864 | 0 | *Type = X86::INTEL_CORE2; // "core2" |
865 | 0 | *Subtype = X86::INTEL_CORE2_65; |
866 | 0 | } |
867 | 0 | break; |
868 | 0 | } |
869 | 0 | if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { |
870 | 0 | *Type = X86::INTEL_CORE2; // "core2" |
871 | 0 | *Subtype = X86::INTEL_CORE2_65; |
872 | 0 | break; |
873 | 0 | } |
874 | 0 | if (Features & (1 << X86::FEATURE_SSE3)) { |
875 | 0 | *Type = X86::INTEL_CORE_DUO; |
876 | 0 | break; |
877 | 0 | } |
878 | 0 | if (Features & (1 << X86::FEATURE_SSE2)) { |
879 | 0 | *Type = X86::INTEL_PENTIUM_M; |
880 | 0 | break; |
881 | 0 | } |
882 | 0 | if (Features & (1 << X86::FEATURE_SSE)) { |
883 | 0 | *Type = X86::INTEL_PENTIUM_III; |
884 | 0 | break; |
885 | 0 | } |
886 | 0 | if (Features & (1 << X86::FEATURE_MMX)) { |
887 | 0 | *Type = X86::INTEL_PENTIUM_II; |
888 | 0 | break; |
889 | 0 | } |
890 | 0 | *Type = X86::INTEL_PENTIUM_PRO; |
891 | 0 | break; |
892 | 0 | } |
893 | 0 | break; |
894 | 0 | case 15: { |
895 | 0 | if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) { |
896 | 0 | *Type = X86::INTEL_NOCONA; |
897 | 0 | break; |
898 | 0 | } |
899 | 0 | if (Features & (1 << X86::FEATURE_SSE3)) { |
900 | 0 | *Type = X86::INTEL_PRESCOTT; |
901 | 0 | break; |
902 | 0 | } |
903 | 0 | *Type = X86::INTEL_PENTIUM_IV; |
904 | 0 | break; |
905 | 0 | } |
906 | 0 | default: |
907 | 0 | break; /*"generic"*/ |
908 | 0 | } |
909 | 0 | } |
910 | | |
911 | | static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, |
912 | | unsigned Features, unsigned *Type, |
913 | 0 | unsigned *Subtype) { |
914 | 0 | // FIXME: this poorly matches the generated SubtargetFeatureKV table. There |
915 | 0 | // appears to be no way to generate the wide variety of AMD-specific targets |
916 | 0 | // from the information returned from CPUID. |
917 | 0 | switch (Family) { |
918 | 0 | case 4: |
919 | 0 | *Type = X86::AMD_i486; |
920 | 0 | break; |
921 | 0 | case 5: |
922 | 0 | *Type = X86::AMDPENTIUM; |
923 | 0 | switch (Model) { |
924 | 0 | case 6: |
925 | 0 | case 7: |
926 | 0 | *Subtype = X86::AMDPENTIUM_K6; |
927 | 0 | break; // "k6" |
928 | 0 | case 8: |
929 | 0 | *Subtype = X86::AMDPENTIUM_K62; |
930 | 0 | break; // "k6-2" |
931 | 0 | case 9: |
932 | 0 | case 13: |
933 | 0 | *Subtype = X86::AMDPENTIUM_K63; |
934 | 0 | break; // "k6-3" |
935 | 0 | case 10: |
936 | 0 | *Subtype = X86::AMDPENTIUM_GEODE; |
937 | 0 | break; // "geode" |
938 | 0 | } |
939 | 0 | break; |
940 | 0 | case 6: |
941 | 0 | if (Features & (1 << X86::FEATURE_SSE)) { |
942 | 0 | *Type = X86::AMD_ATHLON_XP; |
943 | 0 | break; // "athlon-xp" |
944 | 0 | } |
945 | 0 | *Type = X86::AMD_ATHLON; |
946 | 0 | break; // "athlon" |
947 | 0 | case 15: |
948 | 0 | if (Features & (1 << X86::FEATURE_SSE3)) { |
949 | 0 | *Type = X86::AMD_K8SSE3; |
950 | 0 | break; // "k8-sse3" |
951 | 0 | } |
952 | 0 | *Type = X86::AMD_K8; |
953 | 0 | break; // "k8" |
954 | 0 | case 16: |
955 | 0 | *Type = X86::AMDFAM10H; // "amdfam10" |
956 | 0 | switch (Model) { |
957 | 0 | case 2: |
958 | 0 | *Subtype = X86::AMDFAM10H_BARCELONA; |
959 | 0 | break; |
960 | 0 | case 4: |
961 | 0 | *Subtype = X86::AMDFAM10H_SHANGHAI; |
962 | 0 | break; |
963 | 0 | case 8: |
964 | 0 | *Subtype = X86::AMDFAM10H_ISTANBUL; |
965 | 0 | break; |
966 | 0 | } |
967 | 0 | break; |
968 | 0 | case 20: |
969 | 0 | *Type = X86::AMD_BTVER1; |
970 | 0 | break; // "btver1"; |
971 | 0 | case 21: |
972 | 0 | *Type = X86::AMDFAM15H; |
973 | 0 | if (Model >= 0x60 && Model <= 0x7f) { |
974 | 0 | *Subtype = X86::AMDFAM15H_BDVER4; |
975 | 0 | break; // "bdver4"; 60h-7Fh: Excavator |
976 | 0 | } |
977 | 0 | if (Model >= 0x30 && Model <= 0x3f) { |
978 | 0 | *Subtype = X86::AMDFAM15H_BDVER3; |
979 | 0 | break; // "bdver3"; 30h-3Fh: Steamroller |
980 | 0 | } |
981 | 0 | if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { |
982 | 0 | *Subtype = X86::AMDFAM15H_BDVER2; |
983 | 0 | break; // "bdver2"; 02h, 10h-1Fh: Piledriver |
984 | 0 | } |
985 | 0 | if (Model <= 0x0f) { |
986 | 0 | *Subtype = X86::AMDFAM15H_BDVER1; |
987 | 0 | break; // "bdver1"; 00h-0Fh: Bulldozer |
988 | 0 | } |
989 | 0 | break; |
990 | 0 | case 22: |
991 | 0 | *Type = X86::AMD_BTVER2; |
992 | 0 | break; // "btver2" |
993 | 0 | case 23: |
994 | 0 | *Type = X86::AMDFAM17H; |
995 | 0 | if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { |
996 | 0 | *Subtype = X86::AMDFAM17H_ZNVER2; |
997 | 0 | break; // "znver2"; 30h-3fh, 71h: Zen2 |
998 | 0 | } |
999 | 0 | if (Model <= 0x0f) { |
1000 | 0 | *Subtype = X86::AMDFAM17H_ZNVER1; |
1001 | 0 | break; // "znver1"; 00h-0Fh: Zen1 |
1002 | 0 | } |
1003 | 0 | break; |
1004 | 0 | default: |
1005 | 0 | break; // "generic" |
1006 | 0 | } |
1007 | 0 | } |
1008 | | |
1009 | | static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, |
1010 | | unsigned *FeaturesOut, unsigned *Features2Out, |
1011 | 0 | unsigned *Features3Out) { |
1012 | 0 | unsigned Features = 0; |
1013 | 0 | unsigned Features2 = 0; |
1014 | 0 | unsigned Features3 = 0; |
1015 | 0 | unsigned EAX, EBX; |
1016 | 0 |
|
1017 | 0 | auto setFeature = [&](unsigned F) { |
1018 | 0 | if (F < 32) |
1019 | 0 | Features |= 1U << (F & 0x1f); |
1020 | 0 | else if (F < 64) |
1021 | 0 | Features2 |= 1U << ((F - 32) & 0x1f); |
1022 | 0 | else if (F < 96) |
1023 | 0 | Features3 |= 1U << ((F - 64) & 0x1f); |
1024 | 0 | else |
1025 | 0 | llvm_unreachable("Unexpected FeatureBit"); |
1026 | 0 | }; |
1027 | 0 |
|
1028 | 0 | if ((EDX >> 15) & 1) |
1029 | 0 | setFeature(X86::FEATURE_CMOV); |
1030 | 0 | if ((EDX >> 23) & 1) |
1031 | 0 | setFeature(X86::FEATURE_MMX); |
1032 | 0 | if ((EDX >> 25) & 1) |
1033 | 0 | setFeature(X86::FEATURE_SSE); |
1034 | 0 | if ((EDX >> 26) & 1) |
1035 | 0 | setFeature(X86::FEATURE_SSE2); |
1036 | 0 |
|
1037 | 0 | if ((ECX >> 0) & 1) |
1038 | 0 | setFeature(X86::FEATURE_SSE3); |
1039 | 0 | if ((ECX >> 1) & 1) |
1040 | 0 | setFeature(X86::FEATURE_PCLMUL); |
1041 | 0 | if ((ECX >> 9) & 1) |
1042 | 0 | setFeature(X86::FEATURE_SSSE3); |
1043 | 0 | if ((ECX >> 12) & 1) |
1044 | 0 | setFeature(X86::FEATURE_FMA); |
1045 | 0 | if ((ECX >> 19) & 1) |
1046 | 0 | setFeature(X86::FEATURE_SSE4_1); |
1047 | 0 | if ((ECX >> 20) & 1) |
1048 | 0 | setFeature(X86::FEATURE_SSE4_2); |
1049 | 0 | if ((ECX >> 23) & 1) |
1050 | 0 | setFeature(X86::FEATURE_POPCNT); |
1051 | 0 | if ((ECX >> 25) & 1) |
1052 | 0 | setFeature(X86::FEATURE_AES); |
1053 | 0 |
|
1054 | 0 | if ((ECX >> 22) & 1) |
1055 | 0 | setFeature(X86::FEATURE_MOVBE); |
1056 | 0 |
|
1057 | 0 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV |
1058 | 0 | // indicates that the AVX registers will be saved and restored on context |
1059 | 0 | // switch, then we have full AVX support. |
1060 | 0 | const unsigned AVXBits = (1 << 27) | (1 << 28); |
1061 | 0 | bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && |
1062 | 0 | ((EAX & 0x6) == 0x6); |
1063 | | #if defined(__APPLE__) |
1064 | | // Darwin lazily saves the AVX512 context on first use: trust that the OS will |
1065 | | // save the AVX512 context if we use AVX512 instructions, even the bit is not |
1066 | | // set right now. |
1067 | | bool HasAVX512Save = true; |
1068 | | #else |
1069 | | // AVX512 requires additional context to be saved by the OS. |
1070 | 0 | bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); |
1071 | 0 | #endif |
1072 | 0 |
|
1073 | 0 | if (HasAVX) |
1074 | 0 | setFeature(X86::FEATURE_AVX); |
1075 | 0 |
|
1076 | 0 | bool HasLeaf7 = |
1077 | 0 | MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); |
1078 | 0 |
|
1079 | 0 | if (HasLeaf7 && ((EBX >> 3) & 1)) |
1080 | 0 | setFeature(X86::FEATURE_BMI); |
1081 | 0 | if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) |
1082 | 0 | setFeature(X86::FEATURE_AVX2); |
1083 | 0 | if (HasLeaf7 && ((EBX >> 8) & 1)) |
1084 | 0 | setFeature(X86::FEATURE_BMI2); |
1085 | 0 | if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) |
1086 | 0 | setFeature(X86::FEATURE_AVX512F); |
1087 | 0 | if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) |
1088 | 0 | setFeature(X86::FEATURE_AVX512DQ); |
1089 | 0 | if (HasLeaf7 && ((EBX >> 19) & 1)) |
1090 | 0 | setFeature(X86::FEATURE_ADX); |
1091 | 0 | if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) |
1092 | 0 | setFeature(X86::FEATURE_AVX512IFMA); |
1093 | 0 | if (HasLeaf7 && ((EBX >> 23) & 1)) |
1094 | 0 | setFeature(X86::FEATURE_CLFLUSHOPT); |
1095 | 0 | if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) |
1096 | 0 | setFeature(X86::FEATURE_AVX512PF); |
1097 | 0 | if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) |
1098 | 0 | setFeature(X86::FEATURE_AVX512ER); |
1099 | 0 | if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) |
1100 | 0 | setFeature(X86::FEATURE_AVX512CD); |
1101 | 0 | if (HasLeaf7 && ((EBX >> 29) & 1)) |
1102 | 0 | setFeature(X86::FEATURE_SHA); |
1103 | 0 | if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) |
1104 | 0 | setFeature(X86::FEATURE_AVX512BW); |
1105 | 0 | if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) |
1106 | 0 | setFeature(X86::FEATURE_AVX512VL); |
1107 | 0 |
|
1108 | 0 | if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) |
1109 | 0 | setFeature(X86::FEATURE_AVX512VBMI); |
1110 | 0 | if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) |
1111 | 0 | setFeature(X86::FEATURE_AVX512VBMI2); |
1112 | 0 | if (HasLeaf7 && ((ECX >> 8) & 1)) |
1113 | 0 | setFeature(X86::FEATURE_GFNI); |
1114 | 0 | if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) |
1115 | 0 | setFeature(X86::FEATURE_VPCLMULQDQ); |
1116 | 0 | if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) |
1117 | 0 | setFeature(X86::FEATURE_AVX512VNNI); |
1118 | 0 | if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) |
1119 | 0 | setFeature(X86::FEATURE_AVX512BITALG); |
1120 | 0 | if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) |
1121 | 0 | setFeature(X86::FEATURE_AVX512VPOPCNTDQ); |
1122 | 0 |
|
1123 | 0 | if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) |
1124 | 0 | setFeature(X86::FEATURE_AVX5124VNNIW); |
1125 | 0 | if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) |
1126 | 0 | setFeature(X86::FEATURE_AVX5124FMAPS); |
1127 | 0 | if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) |
1128 | 0 | setFeature(X86::FEATURE_AVX512VP2INTERSECT); |
1129 | 0 |
|
1130 | 0 | bool HasLeaf7Subleaf1 = |
1131 | 0 | MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); |
1132 | 0 | if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) |
1133 | 0 | setFeature(X86::FEATURE_AVX512BF16); |
1134 | 0 |
|
1135 | 0 | unsigned MaxExtLevel; |
1136 | 0 | getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); |
1137 | 0 |
|
1138 | 0 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && |
1139 | 0 | !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); |
1140 | 0 | if (HasExtLeaf1 && ((ECX >> 6) & 1)) |
1141 | 0 | setFeature(X86::FEATURE_SSE4_A); |
1142 | 0 | if (HasExtLeaf1 && ((ECX >> 11) & 1)) |
1143 | 0 | setFeature(X86::FEATURE_XOP); |
1144 | 0 | if (HasExtLeaf1 && ((ECX >> 16) & 1)) |
1145 | 0 | setFeature(X86::FEATURE_FMA4); |
1146 | 0 |
|
1147 | 0 | if (HasExtLeaf1 && ((EDX >> 29) & 1)) |
1148 | 0 | setFeature(X86::FEATURE_EM64T); |
1149 | 0 |
|
1150 | 0 | *FeaturesOut = Features; |
1151 | 0 | *Features2Out = Features2; |
1152 | 0 | *Features3Out = Features3; |
1153 | 0 | } |
1154 | | |
1155 | 0 | StringRef sys::getHostCPUName() { |
1156 | 0 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; |
1157 | 0 | unsigned MaxLeaf, Vendor; |
1158 | 0 |
|
1159 | 0 | #if defined(__GNUC__) || defined(__clang__) |
1160 | 0 | //FIXME: include cpuid.h from clang or copy __get_cpuid_max here |
1161 | 0 | // and simplify it to not invoke __cpuid (like cpu_model.c in |
1162 | 0 | // compiler-rt/lib/builtins/cpu_model.c? |
1163 | 0 | // Opting for the second option. |
1164 | 0 | if(!isCpuIdSupported()) |
1165 | 0 | return "generic"; |
1166 | 0 | #endif |
1167 | 0 | if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) |
1168 | 0 | return "generic"; |
1169 | 0 | getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); |
1170 | 0 |
|
1171 | 0 | unsigned Brand_id = EBX & 0xff; |
1172 | 0 | unsigned Family = 0, Model = 0; |
1173 | 0 | unsigned Features = 0, Features2 = 0, Features3 = 0; |
1174 | 0 | detectX86FamilyModel(EAX, &Family, &Model); |
1175 | 0 | getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3); |
1176 | 0 |
|
1177 | 0 | unsigned Type = 0; |
1178 | 0 | unsigned Subtype = 0; |
1179 | 0 |
|
1180 | 0 | if (Vendor == SIG_INTEL) { |
1181 | 0 | getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, |
1182 | 0 | Features2, Features3, &Type, &Subtype); |
1183 | 0 | } else if (Vendor == SIG_AMD) { |
1184 | 0 | getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); |
1185 | 0 | } |
1186 | 0 |
|
1187 | 0 | // Check subtypes first since those are more specific. |
1188 | 0 | #define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \ |
1189 | 0 | if (Subtype == X86::ENUM) \ |
1190 | 0 | return ARCHNAME; |
1191 | 0 | #include "llvm/Support/X86TargetParser.def" |
1192 | 0 |
|
1193 | 0 | // Now check types. |
1194 | 0 | #define X86_CPU_TYPE(ARCHNAME, ENUM) \ |
1195 | 0 | if (Type == X86::ENUM) \ |
1196 | 0 | return ARCHNAME; |
1197 | 0 | #include "llvm/Support/X86TargetParser.def" |
1198 | 0 |
|
1199 | 0 | return "generic"; |
1200 | 0 | } |
1201 | | |
1202 | | #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) |
1203 | | StringRef sys::getHostCPUName() { |
1204 | | host_basic_info_data_t hostInfo; |
1205 | | mach_msg_type_number_t infoCount; |
1206 | | |
1207 | | infoCount = HOST_BASIC_INFO_COUNT; |
1208 | | mach_port_t hostPort = mach_host_self(); |
1209 | | host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, |
1210 | | &infoCount); |
1211 | | mach_port_deallocate(mach_task_self(), hostPort); |
1212 | | |
1213 | | if (hostInfo.cpu_type != CPU_TYPE_POWERPC) |
1214 | | return "generic"; |
1215 | | |
1216 | | switch (hostInfo.cpu_subtype) { |
1217 | | case CPU_SUBTYPE_POWERPC_601: |
1218 | | return "601"; |
1219 | | case CPU_SUBTYPE_POWERPC_602: |
1220 | | return "602"; |
1221 | | case CPU_SUBTYPE_POWERPC_603: |
1222 | | return "603"; |
1223 | | case CPU_SUBTYPE_POWERPC_603e: |
1224 | | return "603e"; |
1225 | | case CPU_SUBTYPE_POWERPC_603ev: |
1226 | | return "603ev"; |
1227 | | case CPU_SUBTYPE_POWERPC_604: |
1228 | | return "604"; |
1229 | | case CPU_SUBTYPE_POWERPC_604e: |
1230 | | return "604e"; |
1231 | | case CPU_SUBTYPE_POWERPC_620: |
1232 | | return "620"; |
1233 | | case CPU_SUBTYPE_POWERPC_750: |
1234 | | return "750"; |
1235 | | case CPU_SUBTYPE_POWERPC_7400: |
1236 | | return "7400"; |
1237 | | case CPU_SUBTYPE_POWERPC_7450: |
1238 | | return "7450"; |
1239 | | case CPU_SUBTYPE_POWERPC_970: |
1240 | | return "970"; |
1241 | | default:; |
1242 | | } |
1243 | | |
1244 | | return "generic"; |
1245 | | } |
1246 | | #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) |
1247 | | StringRef sys::getHostCPUName() { |
1248 | | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
1249 | | StringRef Content = P ? P->getBuffer() : ""; |
1250 | | return detail::getHostCPUNameForPowerPC(Content); |
1251 | | } |
1252 | | #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) |
1253 | | StringRef sys::getHostCPUName() { |
1254 | | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
1255 | | StringRef Content = P ? P->getBuffer() : ""; |
1256 | | return detail::getHostCPUNameForARM(Content); |
1257 | | } |
1258 | | #elif defined(__linux__) && defined(__s390x__) |
1259 | | StringRef sys::getHostCPUName() { |
1260 | | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
1261 | | StringRef Content = P ? P->getBuffer() : ""; |
1262 | | return detail::getHostCPUNameForS390x(Content); |
1263 | | } |
1264 | | #elif defined(__APPLE__) && defined(__aarch64__) |
1265 | | StringRef sys::getHostCPUName() { |
1266 | | return "cyclone"; |
1267 | | } |
1268 | | #elif defined(__APPLE__) && defined(__arm__) |
1269 | | StringRef sys::getHostCPUName() { |
1270 | | host_basic_info_data_t hostInfo; |
1271 | | mach_msg_type_number_t infoCount; |
1272 | | |
1273 | | infoCount = HOST_BASIC_INFO_COUNT; |
1274 | | mach_port_t hostPort = mach_host_self(); |
1275 | | host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, |
1276 | | &infoCount); |
1277 | | mach_port_deallocate(mach_task_self(), hostPort); |
1278 | | |
1279 | | if (hostInfo.cpu_type != CPU_TYPE_ARM) { |
1280 | | assert(false && "CPUType not equal to ARM should not be possible on ARM"); |
1281 | | return "generic"; |
1282 | | } |
1283 | | switch (hostInfo.cpu_subtype) { |
1284 | | case CPU_SUBTYPE_ARM_V7S: |
1285 | | return "swift"; |
1286 | | default:; |
1287 | | } |
1288 | | |
1289 | | return "generic"; |
1290 | | } |
1291 | | #else |
1292 | | StringRef sys::getHostCPUName() { return "generic"; } |
1293 | | #endif |
1294 | | |
1295 | | #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) |
1296 | | // On Linux, the number of physical cores can be computed from /proc/cpuinfo, |
1297 | | // using the number of unique physical/core id pairs. The following |
1298 | | // implementation reads the /proc/cpuinfo format on an x86_64 system. |
1299 | 0 | int computeHostNumPhysicalCores() { |
1300 | 0 | // Enabled represents the number of physical id/core id pairs with at least |
1301 | 0 | // one processor id enabled by the CPU affinity mask. |
1302 | 0 | cpu_set_t Affinity, Enabled; |
1303 | 0 | if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) |
1304 | 0 | return -1; |
1305 | 0 | CPU_ZERO(&Enabled); |
1306 | 0 |
|
1307 | 0 | // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be |
1308 | 0 | // mmapped because it appears to have 0 size. |
1309 | 0 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = |
1310 | 0 | llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); |
1311 | 0 | if (std::error_code EC = Text.getError()) { |
1312 | 0 | llvm::errs() << "Can't read " |
1313 | 0 | << "/proc/cpuinfo: " << EC.message() << "\n"; |
1314 | 0 | return -1; |
1315 | 0 | } |
1316 | 0 | SmallVector<StringRef, 8> strs; |
1317 | 0 | (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, |
1318 | 0 | /*KeepEmpty=*/false); |
1319 | 0 | int CurProcessor = -1; |
1320 | 0 | int CurPhysicalId = -1; |
1321 | 0 | int CurSiblings = -1; |
1322 | 0 | int CurCoreId = -1; |
1323 | 0 | for (StringRef Line : strs) { |
1324 | 0 | std::pair<StringRef, StringRef> Data = Line.split(':'); |
1325 | 0 | auto Name = Data.first.trim(); |
1326 | 0 | auto Val = Data.second.trim(); |
1327 | 0 | // These fields are available if the kernel is configured with CONFIG_SMP. |
1328 | 0 | if (Name == "processor") |
1329 | 0 | Val.getAsInteger(10, CurProcessor); |
1330 | 0 | else if (Name == "physical id") |
1331 | 0 | Val.getAsInteger(10, CurPhysicalId); |
1332 | 0 | else if (Name == "siblings") |
1333 | 0 | Val.getAsInteger(10, CurSiblings); |
1334 | 0 | else if (Name == "core id") { |
1335 | 0 | Val.getAsInteger(10, CurCoreId); |
1336 | 0 | // The processor id corresponds to an index into cpu_set_t. |
1337 | 0 | if (CPU_ISSET(CurProcessor, &Affinity)) |
1338 | 0 | CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); |
1339 | 0 | } |
1340 | 0 | } |
1341 | 0 | return CPU_COUNT(&Enabled); |
1342 | 0 | } |
1343 | | #elif defined(__APPLE__) && defined(__x86_64__) |
1344 | | #include <sys/param.h> |
1345 | | #include <sys/sysctl.h> |
1346 | | |
1347 | | // Gets the number of *physical cores* on the machine. |
1348 | | int computeHostNumPhysicalCores() { |
1349 | | uint32_t count; |
1350 | | size_t len = sizeof(count); |
1351 | | sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); |
1352 | | if (count < 1) { |
1353 | | int nm[2]; |
1354 | | nm[0] = CTL_HW; |
1355 | | nm[1] = HW_AVAILCPU; |
1356 | | sysctl(nm, 2, &count, &len, NULL, 0); |
1357 | | if (count < 1) |
1358 | | return -1; |
1359 | | } |
1360 | | return count; |
1361 | | } |
1362 | | #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 |
1363 | | // Defined in llvm/lib/Support/Windows/Threading.inc |
1364 | | int computeHostNumPhysicalCores(); |
1365 | | #else |
1366 | | // On other systems, return -1 to indicate unknown. |
1367 | | static int computeHostNumPhysicalCores() { return -1; } |
1368 | | #endif |
1369 | | |
1370 | 0 | int sys::getHostNumPhysicalCores() { |
1371 | 0 | static int NumCores = computeHostNumPhysicalCores(); |
1372 | 0 | return NumCores; |
1373 | 0 | } |
1374 | | |
1375 | | #if defined(__i386__) || defined(_M_IX86) || \ |
1376 | | defined(__x86_64__) || defined(_M_X64) |
1377 | 0 | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { |
1378 | 0 | unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; |
1379 | 0 | unsigned MaxLevel; |
1380 | 0 | union { |
1381 | 0 | unsigned u[3]; |
1382 | 0 | char c[12]; |
1383 | 0 | } text; |
1384 | 0 |
|
1385 | 0 | if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) || |
1386 | 0 | MaxLevel < 1) |
1387 | 0 | return false; |
1388 | 0 | |
1389 | 0 | getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); |
1390 | 0 |
|
1391 | 0 | Features["cx8"] = (EDX >> 8) & 1; |
1392 | 0 | Features["cmov"] = (EDX >> 15) & 1; |
1393 | 0 | Features["mmx"] = (EDX >> 23) & 1; |
1394 | 0 | Features["fxsr"] = (EDX >> 24) & 1; |
1395 | 0 | Features["sse"] = (EDX >> 25) & 1; |
1396 | 0 | Features["sse2"] = (EDX >> 26) & 1; |
1397 | 0 |
|
1398 | 0 | Features["sse3"] = (ECX >> 0) & 1; |
1399 | 0 | Features["pclmul"] = (ECX >> 1) & 1; |
1400 | 0 | Features["ssse3"] = (ECX >> 9) & 1; |
1401 | 0 | Features["cx16"] = (ECX >> 13) & 1; |
1402 | 0 | Features["sse4.1"] = (ECX >> 19) & 1; |
1403 | 0 | Features["sse4.2"] = (ECX >> 20) & 1; |
1404 | 0 | Features["movbe"] = (ECX >> 22) & 1; |
1405 | 0 | Features["popcnt"] = (ECX >> 23) & 1; |
1406 | 0 | Features["aes"] = (ECX >> 25) & 1; |
1407 | 0 | Features["rdrnd"] = (ECX >> 30) & 1; |
1408 | 0 |
|
1409 | 0 | // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV |
1410 | 0 | // indicates that the AVX registers will be saved and restored on context |
1411 | 0 | // switch, then we have full AVX support. |
1412 | 0 | bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) && |
1413 | 0 | !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); |
1414 | | #if defined(__APPLE__) |
1415 | | // Darwin lazily saves the AVX512 context on first use: trust that the OS will |
1416 | | // save the AVX512 context if we use AVX512 instructions, even the bit is not |
1417 | | // set right now. |
1418 | | bool HasAVX512Save = true; |
1419 | | #else |
1420 | | // AVX512 requires additional context to be saved by the OS. |
1421 | 0 | bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); |
1422 | 0 | #endif |
1423 | 0 |
|
1424 | 0 | Features["avx"] = HasAVXSave; |
1425 | 0 | Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; |
1426 | 0 | // Only enable XSAVE if OS has enabled support for saving YMM state. |
1427 | 0 | Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; |
1428 | 0 | Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; |
1429 | 0 |
|
1430 | 0 | unsigned MaxExtLevel; |
1431 | 0 | getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); |
1432 | 0 |
|
1433 | 0 | bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && |
1434 | 0 | !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); |
1435 | 0 | Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); |
1436 | 0 | Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); |
1437 | 0 | Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); |
1438 | 0 | Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); |
1439 | 0 | Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; |
1440 | 0 | Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); |
1441 | 0 | Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; |
1442 | 0 | Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); |
1443 | 0 | Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); |
1444 | 0 |
|
1445 | 0 | Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); |
1446 | 0 |
|
1447 | 0 | // Miscellaneous memory related features, detected by |
1448 | 0 | // using the 0x80000008 leaf of the CPUID instruction |
1449 | 0 | bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && |
1450 | 0 | !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); |
1451 | 0 | Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); |
1452 | 0 | Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); |
1453 | 0 |
|
1454 | 0 | bool HasLeaf7 = |
1455 | 0 | MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); |
1456 | 0 |
|
1457 | 0 | Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); |
1458 | 0 | Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); |
1459 | 0 | Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); |
1460 | 0 | // AVX2 is only supported if we have the OS save support from AVX. |
1461 | 0 | Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; |
1462 | 0 | Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); |
1463 | 0 | Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); |
1464 | 0 | Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); |
1465 | 0 | // AVX512 is only supported if the OS supports the context save for it. |
1466 | 0 | Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; |
1467 | 0 | Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; |
1468 | 0 | Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); |
1469 | 0 | Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); |
1470 | 0 | Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; |
1471 | 0 | Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); |
1472 | 0 | Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); |
1473 | 0 | Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; |
1474 | 0 | Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; |
1475 | 0 | Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; |
1476 | 0 | Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); |
1477 | 0 | Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; |
1478 | 0 | Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; |
1479 | 0 |
|
1480 | 0 | Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); |
1481 | 0 | Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; |
1482 | 0 | Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); |
1483 | 0 | Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); |
1484 | 0 | Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; |
1485 | 0 | Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); |
1486 | 0 | Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); |
1487 | 0 | Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; |
1488 | 0 | Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; |
1489 | 0 | Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; |
1490 | 0 | Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; |
1491 | 0 | Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; |
1492 | 0 | Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); |
1493 | 0 | Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); |
1494 | 0 | Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); |
1495 | 0 | Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); |
1496 | 0 | Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); |
1497 | 0 |
|
1498 | 0 | Features["avx512vp2intersect"] = |
1499 | 0 | HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; |
1500 | 0 | Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); |
1501 | 0 | Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); |
1502 | 0 | // There are two CPUID leafs which information associated with the pconfig |
1503 | 0 | // instruction: |
1504 | 0 | // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th |
1505 | 0 | // bit of EDX), while the EAX=0x1b leaf returns information on the |
1506 | 0 | // availability of specific pconfig leafs. |
1507 | 0 | // The target feature here only refers to the the first of these two. |
1508 | 0 | // Users might need to check for the availability of specific pconfig |
1509 | 0 | // leaves using cpuid, since that information is ignored while |
1510 | 0 | // detecting features using the "-march=native" flag. |
1511 | 0 | // For more info, see X86 ISA docs. |
1512 | 0 | Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); |
1513 | 0 | bool HasLeaf7Subleaf1 = |
1514 | 0 | MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); |
1515 | 0 | Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; |
1516 | 0 |
|
1517 | 0 | bool HasLeafD = MaxLevel >= 0xd && |
1518 | 0 | !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); |
1519 | 0 |
|
1520 | 0 | // Only enable XSAVE if OS has enabled support for saving YMM state. |
1521 | 0 | Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; |
1522 | 0 | Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; |
1523 | 0 | Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; |
1524 | 0 |
|
1525 | 0 | bool HasLeaf14 = MaxLevel >= 0x14 && |
1526 | 0 | !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); |
1527 | 0 |
|
1528 | 0 | Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); |
1529 | 0 |
|
1530 | 0 | return true; |
1531 | 0 | } |
1532 | | #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) |
1533 | | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { |
1534 | | std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent(); |
1535 | | if (!P) |
1536 | | return false; |
1537 | | |
1538 | | SmallVector<StringRef, 32> Lines; |
1539 | | P->getBuffer().split(Lines, "\n"); |
1540 | | |
1541 | | SmallVector<StringRef, 32> CPUFeatures; |
1542 | | |
1543 | | // Look for the CPU features. |
1544 | | for (unsigned I = 0, E = Lines.size(); I != E; ++I) |
1545 | | if (Lines[I].startswith("Features")) { |
1546 | | Lines[I].split(CPUFeatures, ' '); |
1547 | | break; |
1548 | | } |
1549 | | |
1550 | | #if defined(__aarch64__) |
1551 | | // Keep track of which crypto features we have seen |
1552 | | enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; |
1553 | | uint32_t crypto = 0; |
1554 | | #endif |
1555 | | |
1556 | | for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { |
1557 | | StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I]) |
1558 | | #if defined(__aarch64__) |
1559 | | .Case("asimd", "neon") |
1560 | | .Case("fp", "fp-armv8") |
1561 | | .Case("crc32", "crc") |
1562 | | #else |
1563 | | .Case("half", "fp16") |
1564 | | .Case("neon", "neon") |
1565 | | .Case("vfpv3", "vfp3") |
1566 | | .Case("vfpv3d16", "d16") |
1567 | | .Case("vfpv4", "vfp4") |
1568 | | .Case("idiva", "hwdiv-arm") |
1569 | | .Case("idivt", "hwdiv") |
1570 | | #endif |
1571 | | .Default(""); |
1572 | | |
1573 | | #if defined(__aarch64__) |
1574 | | // We need to check crypto separately since we need all of the crypto |
1575 | | // extensions to enable the subtarget feature |
1576 | | if (CPUFeatures[I] == "aes") |
1577 | | crypto |= CAP_AES; |
1578 | | else if (CPUFeatures[I] == "pmull") |
1579 | | crypto |= CAP_PMULL; |
1580 | | else if (CPUFeatures[I] == "sha1") |
1581 | | crypto |= CAP_SHA1; |
1582 | | else if (CPUFeatures[I] == "sha2") |
1583 | | crypto |= CAP_SHA2; |
1584 | | #endif |
1585 | | |
1586 | | if (LLVMFeatureStr != "") |
1587 | | Features[LLVMFeatureStr] = true; |
1588 | | } |
1589 | | |
1590 | | #if defined(__aarch64__) |
1591 | | // If we have all crypto bits we can add the feature |
1592 | | if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) |
1593 | | Features["crypto"] = true; |
1594 | | #endif |
1595 | | |
1596 | | return true; |
1597 | | } |
1598 | | #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) |
1599 | | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { |
1600 | | if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) |
1601 | | Features["neon"] = true; |
1602 | | if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) |
1603 | | Features["crc"] = true; |
1604 | | if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) |
1605 | | Features["crypto"] = true; |
1606 | | |
1607 | | return true; |
1608 | | } |
1609 | | #else |
1610 | | bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } |
1611 | | #endif |
1612 | | |
1613 | 2 | std::string sys::getProcessTriple() { |
1614 | 2 | std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); |
1615 | 2 | Triple PT(Triple::normalize(TargetTripleString)); |
1616 | 2 | |
1617 | 2 | if (sizeof(void *) == 8 && PT.isArch32Bit()) |
1618 | 0 | PT = PT.get64BitArchVariant(); |
1619 | 2 | if (sizeof(void *) == 4 && PT.isArch64Bit()) |
1620 | 0 | PT = PT.get32BitArchVariant(); |
1621 | 2 | |
1622 | 2 | return PT.str(); |
1623 | 2 | } |