Coverage Report

Created: 2020-06-26 05:44

/home/arjun/llvm-project/llvm/lib/Support/Host.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file implements the operating system Host concept.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/Support/Host.h"
14
#include "llvm/ADT/SmallSet.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include "llvm/ADT/StringMap.h"
17
#include "llvm/ADT/StringRef.h"
18
#include "llvm/ADT/StringSwitch.h"
19
#include "llvm/ADT/Triple.h"
20
#include "llvm/Config/llvm-config.h"
21
#include "llvm/Support/Debug.h"
22
#include "llvm/Support/FileSystem.h"
23
#include "llvm/Support/MemoryBuffer.h"
24
#include "llvm/Support/TargetParser.h"
25
#include "llvm/Support/raw_ostream.h"
26
#include <assert.h>
27
#include <string.h>
28
29
// Include the platform-specific parts of this class.
30
#ifdef LLVM_ON_UNIX
31
#include "Unix/Host.inc"
32
#include <sched.h>
33
#endif
34
#ifdef _WIN32
35
#include "Windows/Host.inc"
36
#endif
37
#ifdef _MSC_VER
38
#include <intrin.h>
39
#endif
40
#if defined(__APPLE__) && (!defined(__x86_64__))
41
#include <mach/host_info.h>
42
#include <mach/mach.h>
43
#include <mach/mach_host.h>
44
#include <mach/machine.h>
45
#endif
46
47
#define DEBUG_TYPE "host-detection"
48
49
//===----------------------------------------------------------------------===//
50
//
51
//  Implementations of the CPU detection routines
52
//
53
//===----------------------------------------------------------------------===//
54
55
using namespace llvm;
56
57
static std::unique_ptr<llvm::MemoryBuffer>
58
0
    LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
59
0
  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
60
0
      llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
61
0
  if (std::error_code EC = Text.getError()) {
62
0
    llvm::errs() << "Can't read "
63
0
                 << "/proc/cpuinfo: " << EC.message() << "\n";
64
0
    return nullptr;
65
0
  }
66
0
  return std::move(*Text);
67
0
}
68
69
0
StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
70
0
  // Access to the Processor Version Register (PVR) on PowerPC is privileged,
71
0
  // and so we must use an operating-system interface to determine the current
72
0
  // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
73
0
  const char *generic = "generic";
74
0
75
0
  // The cpu line is second (after the 'processor: 0' line), so if this
76
0
  // buffer is too small then something has changed (or is wrong).
77
0
  StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
78
0
  StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
79
0
80
0
  StringRef::const_iterator CIP = CPUInfoStart;
81
0
82
0
  StringRef::const_iterator CPUStart = 0;
83
0
  size_t CPULen = 0;
84
0
85
0
  // We need to find the first line which starts with cpu, spaces, and a colon.
86
0
  // After the colon, there may be some additional spaces and then the cpu type.
87
0
  while (CIP < CPUInfoEnd && CPUStart == 0) {
88
0
    if (CIP < CPUInfoEnd && *CIP == '\n')
89
0
      ++CIP;
90
0
91
0
    if (CIP < CPUInfoEnd && *CIP == 'c') {
92
0
      ++CIP;
93
0
      if (CIP < CPUInfoEnd && *CIP == 'p') {
94
0
        ++CIP;
95
0
        if (CIP < CPUInfoEnd && *CIP == 'u') {
96
0
          ++CIP;
97
0
          while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
98
0
            ++CIP;
99
0
100
0
          if (CIP < CPUInfoEnd && *CIP == ':') {
101
0
            ++CIP;
102
0
            while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
103
0
              ++CIP;
104
0
105
0
            if (CIP < CPUInfoEnd) {
106
0
              CPUStart = CIP;
107
0
              while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
108
0
                                          *CIP != ',' && *CIP != '\n'))
109
0
                ++CIP;
110
0
              CPULen = CIP - CPUStart;
111
0
            }
112
0
          }
113
0
        }
114
0
      }
115
0
    }
116
0
117
0
    if (CPUStart == 0)
118
0
      while (CIP < CPUInfoEnd && *CIP != '\n')
119
0
        ++CIP;
120
0
  }
121
0
122
0
  if (CPUStart == 0)
123
0
    return generic;
124
0
125
0
  return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
126
0
      .Case("604e", "604e")
127
0
      .Case("604", "604")
128
0
      .Case("7400", "7400")
129
0
      .Case("7410", "7400")
130
0
      .Case("7447", "7400")
131
0
      .Case("7455", "7450")
132
0
      .Case("G4", "g4")
133
0
      .Case("POWER4", "970")
134
0
      .Case("PPC970FX", "970")
135
0
      .Case("PPC970MP", "970")
136
0
      .Case("G5", "g5")
137
0
      .Case("POWER5", "g5")
138
0
      .Case("A2", "a2")
139
0
      .Case("POWER6", "pwr6")
140
0
      .Case("POWER7", "pwr7")
141
0
      .Case("POWER8", "pwr8")
142
0
      .Case("POWER8E", "pwr8")
143
0
      .Case("POWER8NVL", "pwr8")
144
0
      .Case("POWER9", "pwr9")
145
0
      .Case("POWER10", "pwr10")
146
0
      // FIXME: If we get a simulator or machine with the capabilities of
147
0
      // mcpu=future, we should revisit this and add the name reported by the
148
0
      // simulator/machine.
149
0
      .Default(generic);
150
0
}
151
152
0
StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
153
0
  // The cpuid register on arm is not accessible from user space. On Linux,
154
0
  // it is exposed through the /proc/cpuinfo file.
155
0
156
0
  // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
157
0
  // in all cases.
158
0
  SmallVector<StringRef, 32> Lines;
159
0
  ProcCpuinfoContent.split(Lines, "\n");
160
0
161
0
  // Look for the CPU implementer line.
162
0
  StringRef Implementer;
163
0
  StringRef Hardware;
164
0
  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
165
0
    if (Lines[I].startswith("CPU implementer"))
166
0
      Implementer = Lines[I].substr(15).ltrim("\t :");
167
0
    if (Lines[I].startswith("Hardware"))
168
0
      Hardware = Lines[I].substr(8).ltrim("\t :");
169
0
  }
170
0
171
0
  if (Implementer == "0x41") { // ARM Ltd.
172
0
    // MSM8992/8994 may give cpu part for the core that the kernel is running on,
173
0
    // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
174
0
    if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
175
0
      return "cortex-a53";
176
0
177
0
178
0
    // Look for the CPU part line.
179
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I)
180
0
      if (Lines[I].startswith("CPU part"))
181
0
        // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
182
0
        // values correspond to the "Part number" in the CP15/c0 register. The
183
0
        // contents are specified in the various processor manuals.
184
0
        // This corresponds to the Main ID Register in Technical Reference Manuals.
185
0
        // and is used in programs like sys-utils
186
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
187
0
            .Case("0x926", "arm926ej-s")
188
0
            .Case("0xb02", "mpcore")
189
0
            .Case("0xb36", "arm1136j-s")
190
0
            .Case("0xb56", "arm1156t2-s")
191
0
            .Case("0xb76", "arm1176jz-s")
192
0
            .Case("0xc08", "cortex-a8")
193
0
            .Case("0xc09", "cortex-a9")
194
0
            .Case("0xc0f", "cortex-a15")
195
0
            .Case("0xc20", "cortex-m0")
196
0
            .Case("0xc23", "cortex-m3")
197
0
            .Case("0xc24", "cortex-m4")
198
0
            .Case("0xd22", "cortex-m55")
199
0
            .Case("0xd02", "cortex-a34")
200
0
            .Case("0xd04", "cortex-a35")
201
0
            .Case("0xd03", "cortex-a53")
202
0
            .Case("0xd07", "cortex-a57")
203
0
            .Case("0xd08", "cortex-a72")
204
0
            .Case("0xd09", "cortex-a73")
205
0
            .Case("0xd0a", "cortex-a75")
206
0
            .Case("0xd0b", "cortex-a76")
207
0
            .Case("0xd0c", "neoverse-n1")
208
0
            .Default("generic");
209
0
  }
210
0
211
0
  if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium.
212
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
213
0
      if (Lines[I].startswith("CPU part")) {
214
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
215
0
          .Case("0x516", "thunderx2t99")
216
0
          .Case("0x0516", "thunderx2t99")
217
0
          .Case("0xaf", "thunderx2t99")
218
0
          .Case("0x0af", "thunderx2t99")
219
0
          .Case("0xa1", "thunderxt88")
220
0
          .Case("0x0a1", "thunderxt88")
221
0
          .Default("generic");
222
0
      }
223
0
    }
224
0
  }
225
0
226
0
  if (Implementer == "0x46") { // Fujitsu Ltd.
227
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
228
0
      if (Lines[I].startswith("CPU part")) {
229
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
230
0
          .Case("0x001", "a64fx")
231
0
          .Default("generic");
232
0
      }
233
0
    }
234
0
  }
235
0
236
0
  if (Implementer == "0x4e") { // NVIDIA Corporation
237
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
238
0
      if (Lines[I].startswith("CPU part")) {
239
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
240
0
            .Case("0x004", "carmel")
241
0
            .Default("generic");
242
0
      }
243
0
    }
244
0
  }
245
0
246
0
  if (Implementer == "0x48") // HiSilicon Technologies, Inc.
247
0
    // Look for the CPU part line.
248
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I)
249
0
      if (Lines[I].startswith("CPU part"))
250
0
        // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
251
0
        // values correspond to the "Part number" in the CP15/c0 register. The
252
0
        // contents are specified in the various processor manuals.
253
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
254
0
          .Case("0xd01", "tsv110")
255
0
          .Default("generic");
256
0
257
0
  if (Implementer == "0x51") // Qualcomm Technologies, Inc.
258
0
    // Look for the CPU part line.
259
0
    for (unsigned I = 0, E = Lines.size(); I != E; ++I)
260
0
      if (Lines[I].startswith("CPU part"))
261
0
        // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
262
0
        // values correspond to the "Part number" in the CP15/c0 register. The
263
0
        // contents are specified in the various processor manuals.
264
0
        return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
265
0
            .Case("0x06f", "krait") // APQ8064
266
0
            .Case("0x201", "kryo")
267
0
            .Case("0x205", "kryo")
268
0
            .Case("0x211", "kryo")
269
0
            .Case("0x800", "cortex-a73")
270
0
            .Case("0x801", "cortex-a73")
271
0
            .Case("0x802", "cortex-a73")
272
0
            .Case("0x803", "cortex-a73")
273
0
            .Case("0x804", "cortex-a73")
274
0
            .Case("0x805", "cortex-a73")
275
0
            .Case("0xc00", "falkor")
276
0
            .Case("0xc01", "saphira")
277
0
            .Default("generic");
278
0
279
0
  if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
280
0
    // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
281
0
    // any predictive pattern across variants and parts.
282
0
    unsigned Variant = 0, Part = 0;
283
0
284
0
    // Look for the CPU variant line, whose value is a 1 digit hexadecimal
285
0
    // number, corresponding to the Variant bits in the CP15/C0 register.
286
0
    for (auto I : Lines)
287
0
      if (I.consume_front("CPU variant"))
288
0
        I.ltrim("\t :").getAsInteger(0, Variant);
289
0
290
0
    // Look for the CPU part line, whose value is a 3 digit hexadecimal
291
0
    // number, corresponding to the PartNum bits in the CP15/C0 register.
292
0
    for (auto I : Lines)
293
0
      if (I.consume_front("CPU part"))
294
0
        I.ltrim("\t :").getAsInteger(0, Part);
295
0
296
0
    unsigned Exynos = (Variant << 12) | Part;
297
0
    switch (Exynos) {
298
0
    default:
299
0
      // Default by falling through to Exynos M3.
300
0
      LLVM_FALLTHROUGH;
301
0
    case 0x1002:
302
0
      return "exynos-m3";
303
0
    case 0x1003:
304
0
      return "exynos-m4";
305
0
    }
306
0
  }
307
0
308
0
  return "generic";
309
0
}
310
311
0
StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) {
312
0
  // STIDP is a privileged operation, so use /proc/cpuinfo instead.
313
0
314
0
  // The "processor 0:" line comes after a fair amount of other information,
315
0
  // including a cache breakdown, but this should be plenty.
316
0
  SmallVector<StringRef, 32> Lines;
317
0
  ProcCpuinfoContent.split(Lines, "\n");
318
0
319
0
  // Look for the CPU features.
320
0
  SmallVector<StringRef, 32> CPUFeatures;
321
0
  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
322
0
    if (Lines[I].startswith("features")) {
323
0
      size_t Pos = Lines[I].find(":");
324
0
      if (Pos != StringRef::npos) {
325
0
        Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
326
0
        break;
327
0
      }
328
0
    }
329
0
330
0
  // We need to check for the presence of vector support independently of
331
0
  // the machine type, since we may only use the vector register set when
332
0
  // supported by the kernel (and hypervisor).
333
0
  bool HaveVectorSupport = false;
334
0
  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
335
0
    if (CPUFeatures[I] == "vx")
336
0
      HaveVectorSupport = true;
337
0
  }
338
0
339
0
  // Now check the processor machine type.
340
0
  for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
341
0
    if (Lines[I].startswith("processor ")) {
342
0
      size_t Pos = Lines[I].find("machine = ");
343
0
      if (Pos != StringRef::npos) {
344
0
        Pos += sizeof("machine = ") - 1;
345
0
        unsigned int Id;
346
0
        if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
347
0
          if (Id >= 8561 && HaveVectorSupport)
348
0
            return "z15";
349
0
          if (Id >= 3906 && HaveVectorSupport)
350
0
            return "z14";
351
0
          if (Id >= 2964 && HaveVectorSupport)
352
0
            return "z13";
353
0
          if (Id >= 2827)
354
0
            return "zEC12";
355
0
          if (Id >= 2817)
356
0
            return "z196";
357
0
        }
358
0
      }
359
0
      break;
360
0
    }
361
0
  }
362
0
363
0
  return "generic";
364
0
}
365
366
0
StringRef sys::detail::getHostCPUNameForBPF() {
367
#if !defined(__linux__) || !defined(__x86_64__)
368
  return "generic";
369
#else
370
  uint8_t v3_insns[40] __attribute__ ((aligned (8))) =
371
0
      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
372
0
    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
373
0
      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
374
0
      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
375
0
      /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
376
0
      0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
377
0
      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
378
0
      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
379
0
      /* BPF_EXIT_INSN() */
380
0
      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
381
0
382
0
  uint8_t v2_insns[40] __attribute__ ((aligned (8))) =
383
0
      /* BPF_MOV64_IMM(BPF_REG_0, 0) */
384
0
    { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
385
0
      /* BPF_MOV64_IMM(BPF_REG_2, 1) */
386
0
      0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
387
0
      /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */
388
0
      0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0,
389
0
      /* BPF_MOV64_IMM(BPF_REG_0, 1) */
390
0
      0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0,
391
0
      /* BPF_EXIT_INSN() */
392
0
      0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
393
0
394
0
  struct bpf_prog_load_attr {
395
0
    uint32_t prog_type;
396
0
    uint32_t insn_cnt;
397
0
    uint64_t insns;
398
0
    uint64_t license;
399
0
    uint32_t log_level;
400
0
    uint32_t log_size;
401
0
    uint64_t log_buf;
402
0
    uint32_t kern_version;
403
0
    uint32_t prog_flags;
404
0
  } attr = {};
405
0
  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
406
0
  attr.insn_cnt = 5;
407
0
  attr.insns = (uint64_t)v3_insns;
408
0
  attr.license = (uint64_t)"DUMMY";
409
0
410
0
  int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr,
411
0
                   sizeof(attr));
412
0
  if (fd >= 0) {
413
0
    close(fd);
414
0
    return "v3";
415
0
  }
416
0
417
0
  /* Clear the whole attr in case its content changed by syscall. */
418
0
  memset(&attr, 0, sizeof(attr));
419
0
  attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */
420
0
  attr.insn_cnt = 5;
421
0
  attr.insns = (uint64_t)v2_insns;
422
0
  attr.license = (uint64_t)"DUMMY";
423
0
  fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr));
424
0
  if (fd >= 0) {
425
0
    close(fd);
426
0
    return "v2";
427
0
  }
428
0
  return "v1";
429
0
#endif
430
0
}
431
432
#if defined(__i386__) || defined(_M_IX86) || \
433
    defined(__x86_64__) || defined(_M_X64)
434
435
enum VendorSignatures {
436
  SIG_INTEL = 0x756e6547 /* Genu */,
437
  SIG_AMD = 0x68747541 /* Auth */
438
};
439
440
// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
441
// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
442
// support. Consequently, for i386, the presence of CPUID is checked first
443
// via the corresponding eflags bit.
444
// Removal of cpuid.h header motivated by PR30384
445
// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
446
// or test-suite, but are used in external projects e.g. libstdcxx
447
0
static bool isCpuIdSupported() {
448
0
#if defined(__GNUC__) || defined(__clang__)
449
#if defined(__i386__)
450
  int __cpuid_supported;
451
  __asm__("  pushfl\n"
452
          "  popl   %%eax\n"
453
          "  movl   %%eax,%%ecx\n"
454
          "  xorl   $0x00200000,%%eax\n"
455
          "  pushl  %%eax\n"
456
          "  popfl\n"
457
          "  pushfl\n"
458
          "  popl   %%eax\n"
459
          "  movl   $0,%0\n"
460
          "  cmpl   %%eax,%%ecx\n"
461
          "  je     1f\n"
462
          "  movl   $1,%0\n"
463
          "1:"
464
          : "=r"(__cpuid_supported)
465
          :
466
          : "eax", "ecx");
467
  if (!__cpuid_supported)
468
    return false;
469
#endif
470
  return true;
471
0
#endif
472
0
  return true;
473
0
}
474
475
/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
476
/// the specified arguments.  If we can't run cpuid on the host, return true.
477
static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
478
0
                               unsigned *rECX, unsigned *rEDX) {
479
0
#if defined(__GNUC__) || defined(__clang__)
480
0
#if defined(__x86_64__)
481
0
  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
482
0
  // FIXME: should we save this for Clang?
483
0
  __asm__("movq\t%%rbx, %%rsi\n\t"
484
0
          "cpuid\n\t"
485
0
          "xchgq\t%%rbx, %%rsi\n\t"
486
0
          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
487
0
          : "a"(value));
488
0
  return false;
489
#elif defined(__i386__)
490
  __asm__("movl\t%%ebx, %%esi\n\t"
491
          "cpuid\n\t"
492
          "xchgl\t%%ebx, %%esi\n\t"
493
          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
494
          : "a"(value));
495
  return false;
496
#else
497
  return true;
498
#endif
499
#elif defined(_MSC_VER)
500
  // The MSVC intrinsic is portable across x86 and x64.
501
  int registers[4];
502
  __cpuid(registers, value);
503
  *rEAX = registers[0];
504
  *rEBX = registers[1];
505
  *rECX = registers[2];
506
  *rEDX = registers[3];
507
  return false;
508
#else
509
  return true;
510
#endif
511
}
512
513
/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
514
/// the 4 values in the specified arguments.  If we can't run cpuid on the host,
515
/// return true.
516
static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
517
                                 unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
518
0
                                 unsigned *rEDX) {
519
0
#if defined(__GNUC__) || defined(__clang__)
520
0
#if defined(__x86_64__)
521
0
  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
522
0
  // FIXME: should we save this for Clang?
523
0
  __asm__("movq\t%%rbx, %%rsi\n\t"
524
0
          "cpuid\n\t"
525
0
          "xchgq\t%%rbx, %%rsi\n\t"
526
0
          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
527
0
          : "a"(value), "c"(subleaf));
528
0
  return false;
529
#elif defined(__i386__)
530
  __asm__("movl\t%%ebx, %%esi\n\t"
531
          "cpuid\n\t"
532
          "xchgl\t%%ebx, %%esi\n\t"
533
          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
534
          : "a"(value), "c"(subleaf));
535
  return false;
536
#else
537
  return true;
538
#endif
539
#elif defined(_MSC_VER)
540
  int registers[4];
541
  __cpuidex(registers, value, subleaf);
542
  *rEAX = registers[0];
543
  *rEBX = registers[1];
544
  *rECX = registers[2];
545
  *rEDX = registers[3];
546
  return false;
547
#else
548
  return true;
549
#endif
550
}
551
552
// Read control register 0 (XCR0). Used to detect features such as AVX.
553
0
static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
554
0
#if defined(__GNUC__) || defined(__clang__)
555
0
  // Check xgetbv; this uses a .byte sequence instead of the instruction
556
0
  // directly because older assemblers do not include support for xgetbv and
557
0
  // there is no easy way to conditionally compile based on the assembler used.
558
0
  __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0));
559
0
  return false;
560
#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
561
  unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
562
  *rEAX = Result;
563
  *rEDX = Result >> 32;
564
  return false;
565
#else
566
  return true;
567
#endif
568
}
569
570
static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
571
0
                                 unsigned *Model) {
572
0
  *Family = (EAX >> 8) & 0xf; // Bits 8 - 11
573
0
  *Model = (EAX >> 4) & 0xf;  // Bits 4 - 7
574
0
  if (*Family == 6 || *Family == 0xf) {
575
0
    if (*Family == 0xf)
576
0
      // Examine extended family ID if family ID is F.
577
0
      *Family += (EAX >> 20) & 0xff; // Bits 20 - 27
578
0
    // Examine extended model ID if family ID is 6 or F.
579
0
    *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
580
0
  }
581
0
}
582
583
static void
584
getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
585
                                unsigned Brand_id, unsigned Features,
586
                                unsigned Features2, unsigned Features3,
587
0
                                unsigned *Type, unsigned *Subtype) {
588
0
  if (Brand_id != 0)
589
0
    return;
590
0
  switch (Family) {
591
0
  case 3:
592
0
    *Type = X86::INTEL_i386;
593
0
    break;
594
0
  case 4:
595
0
    *Type = X86::INTEL_i486;
596
0
    break;
597
0
  case 5:
598
0
    if (Features & (1 << X86::FEATURE_MMX)) {
599
0
      *Type = X86::INTEL_PENTIUM_MMX;
600
0
      break;
601
0
    }
602
0
    *Type = X86::INTEL_PENTIUM;
603
0
    break;
604
0
  case 6:
605
0
    switch (Model) {
606
0
    case 0x01: // Pentium Pro processor
607
0
      *Type = X86::INTEL_PENTIUM_PRO;
608
0
      break;
609
0
    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
610
0
               // model 03
611
0
    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
612
0
               // model 05, and Intel Celeron processor, model 05
613
0
    case 0x06: // Celeron processor, model 06
614
0
      *Type = X86::INTEL_PENTIUM_II;
615
0
      break;
616
0
    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
617
0
               // processor, model 07
618
0
    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
619
0
               // model 08, and Celeron processor, model 08
620
0
    case 0x0a: // Pentium III Xeon processor, model 0Ah
621
0
    case 0x0b: // Pentium III processor, model 0Bh
622
0
      *Type = X86::INTEL_PENTIUM_III;
623
0
      break;
624
0
    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
625
0
    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
626
0
               // 0Dh. All processors are manufactured using the 90 nm process.
627
0
    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
628
0
               // Integrated Processor with Intel QuickAssist Technology
629
0
      *Type = X86::INTEL_PENTIUM_M;
630
0
      break;
631
0
    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
632
0
               // 0Eh. All processors are manufactured using the 65 nm process.
633
0
      *Type = X86::INTEL_CORE_DUO;
634
0
      break;   // yonah
635
0
    case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
636
0
               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
637
0
               // mobile processor, Intel Core 2 Extreme processor, Intel
638
0
               // Pentium Dual-Core processor, Intel Xeon processor, model
639
0
               // 0Fh. All processors are manufactured using the 65 nm process.
640
0
    case 0x16: // Intel Celeron processor model 16h. All processors are
641
0
               // manufactured using the 65 nm process
642
0
      *Type = X86::INTEL_CORE2; // "core2"
643
0
      *Subtype = X86::INTEL_CORE2_65;
644
0
      break;
645
0
    case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
646
0
               // 17h. All processors are manufactured using the 45 nm process.
647
0
               //
648
0
               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
649
0
    case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
650
0
               // the 45 nm process.
651
0
      *Type = X86::INTEL_CORE2; // "penryn"
652
0
      *Subtype = X86::INTEL_CORE2_45;
653
0
      break;
654
0
    case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
655
0
               // processors are manufactured using the 45 nm process.
656
0
    case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
657
0
               // As found in a Summer 2010 model iMac.
658
0
    case 0x1f:
659
0
    case 0x2e:             // Nehalem EX
660
0
      *Type = X86::INTEL_COREI7; // "nehalem"
661
0
      *Subtype = X86::INTEL_COREI7_NEHALEM;
662
0
      break;
663
0
    case 0x25: // Intel Core i7, laptop version.
664
0
    case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
665
0
               // processors are manufactured using the 32 nm process.
666
0
    case 0x2f: // Westmere EX
667
0
      *Type = X86::INTEL_COREI7; // "westmere"
668
0
      *Subtype = X86::INTEL_COREI7_WESTMERE;
669
0
      break;
670
0
    case 0x2a: // Intel Core i7 processor. All processors are manufactured
671
0
               // using the 32 nm process.
672
0
    case 0x2d:
673
0
      *Type = X86::INTEL_COREI7; //"sandybridge"
674
0
      *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
675
0
      break;
676
0
    case 0x3a:
677
0
    case 0x3e:             // Ivy Bridge EP
678
0
      *Type = X86::INTEL_COREI7; // "ivybridge"
679
0
      *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
680
0
      break;
681
0
682
0
    // Haswell:
683
0
    case 0x3c:
684
0
    case 0x3f:
685
0
    case 0x45:
686
0
    case 0x46:
687
0
      *Type = X86::INTEL_COREI7; // "haswell"
688
0
      *Subtype = X86::INTEL_COREI7_HASWELL;
689
0
      break;
690
0
691
0
    // Broadwell:
692
0
    case 0x3d:
693
0
    case 0x47:
694
0
    case 0x4f:
695
0
    case 0x56:
696
0
      *Type = X86::INTEL_COREI7; // "broadwell"
697
0
      *Subtype = X86::INTEL_COREI7_BROADWELL;
698
0
      break;
699
0
700
0
    // Skylake:
701
0
    case 0x4e:              // Skylake mobile
702
0
    case 0x5e:              // Skylake desktop
703
0
    case 0x8e:              // Kaby Lake mobile
704
0
    case 0x9e:              // Kaby Lake desktop
705
0
    case 0xa5:              // Comet Lake-H/S
706
0
    case 0xa6:              // Comet Lake-U
707
0
      *Type = X86::INTEL_COREI7; // "skylake"
708
0
      *Subtype = X86::INTEL_COREI7_SKYLAKE;
709
0
      break;
710
0
711
0
    // Skylake Xeon:
712
0
    case 0x55:
713
0
      *Type = X86::INTEL_COREI7;
714
0
      if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32)))
715
0
        *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
716
0
      else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
717
0
        *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
718
0
      else
719
0
        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
720
0
      break;
721
0
722
0
    // Cannonlake:
723
0
    case 0x66:
724
0
      *Type = X86::INTEL_COREI7;
725
0
      *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake"
726
0
      break;
727
0
728
0
    // Icelake:
729
0
    case 0x7d:
730
0
    case 0x7e:
731
0
      *Type = X86::INTEL_COREI7;
732
0
      *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
733
0
      break;
734
0
735
0
    // Icelake Xeon:
736
0
    case 0x6a:
737
0
    case 0x6c:
738
0
      *Type = X86::INTEL_COREI7;
739
0
      *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
740
0
      break;
741
0
742
0
    case 0x1c: // Most 45 nm Intel Atom processors
743
0
    case 0x26: // 45 nm Atom Lincroft
744
0
    case 0x27: // 32 nm Atom Medfield
745
0
    case 0x35: // 32 nm Atom Midview
746
0
    case 0x36: // 32 nm Atom Midview
747
0
      *Type = X86::INTEL_BONNELL;
748
0
      break; // "bonnell"
749
0
750
0
    // Atom Silvermont codes from the Intel software optimization guide.
751
0
    case 0x37:
752
0
    case 0x4a:
753
0
    case 0x4d:
754
0
    case 0x5a:
755
0
    case 0x5d:
756
0
    case 0x4c: // really airmont
757
0
      *Type = X86::INTEL_SILVERMONT;
758
0
      break; // "silvermont"
759
0
    // Goldmont:
760
0
    case 0x5c: // Apollo Lake
761
0
    case 0x5f: // Denverton
762
0
      *Type = X86::INTEL_GOLDMONT;
763
0
      break; // "goldmont"
764
0
    case 0x7a:
765
0
      *Type = X86::INTEL_GOLDMONT_PLUS;
766
0
      break;
767
0
    case 0x86:
768
0
      *Type = X86::INTEL_TREMONT;
769
0
      break;
770
0
771
0
    case 0x57:
772
0
      *Type = X86::INTEL_KNL; // knl
773
0
      break;
774
0
775
0
    case 0x85:
776
0
      *Type = X86::INTEL_KNM; // knm
777
0
      break;
778
0
779
0
    default: // Unknown family 6 CPU, try to guess.
780
0
      // TODO detect tigerlake host
781
0
      if (Features2 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 32))) {
782
0
        *Type = X86::INTEL_COREI7;
783
0
        *Subtype = X86::INTEL_COREI7_TIGERLAKE;
784
0
        break;
785
0
      }
786
0
787
0
      if (Features & (1 << X86::FEATURE_AVX512VBMI2)) {
788
0
        *Type = X86::INTEL_COREI7;
789
0
        *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
790
0
        break;
791
0
      }
792
0
793
0
      if (Features & (1 << X86::FEATURE_AVX512VBMI)) {
794
0
        *Type = X86::INTEL_COREI7;
795
0
        *Subtype = X86::INTEL_COREI7_CANNONLAKE;
796
0
        break;
797
0
      }
798
0
799
0
      if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) {
800
0
        *Type = X86::INTEL_COREI7;
801
0
        *Subtype = X86::INTEL_COREI7_COOPERLAKE;
802
0
        break;
803
0
      }
804
0
805
0
      if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
806
0
        *Type = X86::INTEL_COREI7;
807
0
        *Subtype = X86::INTEL_COREI7_CASCADELAKE;
808
0
        break;
809
0
      }
810
0
811
0
      if (Features & (1 << X86::FEATURE_AVX512VL)) {
812
0
        *Type = X86::INTEL_COREI7;
813
0
        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
814
0
        break;
815
0
      }
816
0
817
0
      if (Features & (1 << X86::FEATURE_AVX512ER)) {
818
0
        *Type = X86::INTEL_KNL; // knl
819
0
        break;
820
0
      }
821
0
822
0
      if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) {
823
0
        if (Features3 & (1 << (X86::FEATURE_SHA - 64))) {
824
0
          *Type = X86::INTEL_GOLDMONT;
825
0
        } else {
826
0
          *Type = X86::INTEL_COREI7;
827
0
          *Subtype = X86::INTEL_COREI7_SKYLAKE;
828
0
        }
829
0
        break;
830
0
      }
831
0
      if (Features3 & (1 << (X86::FEATURE_ADX - 64))) {
832
0
        *Type = X86::INTEL_COREI7;
833
0
        *Subtype = X86::INTEL_COREI7_BROADWELL;
834
0
        break;
835
0
      }
836
0
      if (Features & (1 << X86::FEATURE_AVX2)) {
837
0
        *Type = X86::INTEL_COREI7;
838
0
        *Subtype = X86::INTEL_COREI7_HASWELL;
839
0
        break;
840
0
      }
841
0
      if (Features & (1 << X86::FEATURE_AVX)) {
842
0
        *Type = X86::INTEL_COREI7;
843
0
        *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
844
0
        break;
845
0
      }
846
0
      if (Features & (1 << X86::FEATURE_SSE4_2)) {
847
0
        if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
848
0
          *Type = X86::INTEL_SILVERMONT;
849
0
        } else {
850
0
          *Type = X86::INTEL_COREI7;
851
0
          *Subtype = X86::INTEL_COREI7_NEHALEM;
852
0
        }
853
0
        break;
854
0
      }
855
0
      if (Features & (1 << X86::FEATURE_SSE4_1)) {
856
0
        *Type = X86::INTEL_CORE2; // "penryn"
857
0
        *Subtype = X86::INTEL_CORE2_45;
858
0
        break;
859
0
      }
860
0
      if (Features & (1 << X86::FEATURE_SSSE3)) {
861
0
        if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
862
0
          *Type = X86::INTEL_BONNELL; // "bonnell"
863
0
        } else {
864
0
          *Type = X86::INTEL_CORE2; // "core2"
865
0
          *Subtype = X86::INTEL_CORE2_65;
866
0
        }
867
0
        break;
868
0
      }
869
0
      if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
870
0
        *Type = X86::INTEL_CORE2; // "core2"
871
0
        *Subtype = X86::INTEL_CORE2_65;
872
0
        break;
873
0
      }
874
0
      if (Features & (1 << X86::FEATURE_SSE3)) {
875
0
        *Type = X86::INTEL_CORE_DUO;
876
0
        break;
877
0
      }
878
0
      if (Features & (1 << X86::FEATURE_SSE2)) {
879
0
        *Type = X86::INTEL_PENTIUM_M;
880
0
        break;
881
0
      }
882
0
      if (Features & (1 << X86::FEATURE_SSE)) {
883
0
        *Type = X86::INTEL_PENTIUM_III;
884
0
        break;
885
0
      }
886
0
      if (Features & (1 << X86::FEATURE_MMX)) {
887
0
        *Type = X86::INTEL_PENTIUM_II;
888
0
        break;
889
0
      }
890
0
      *Type = X86::INTEL_PENTIUM_PRO;
891
0
      break;
892
0
    }
893
0
    break;
894
0
  case 15: {
895
0
    if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
896
0
      *Type = X86::INTEL_NOCONA;
897
0
      break;
898
0
    }
899
0
    if (Features & (1 << X86::FEATURE_SSE3)) {
900
0
      *Type = X86::INTEL_PRESCOTT;
901
0
      break;
902
0
    }
903
0
    *Type = X86::INTEL_PENTIUM_IV;
904
0
    break;
905
0
  }
906
0
  default:
907
0
    break; /*"generic"*/
908
0
  }
909
0
}
910
911
static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
912
                                          unsigned Features, unsigned *Type,
913
0
                                          unsigned *Subtype) {
914
0
  // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
915
0
  // appears to be no way to generate the wide variety of AMD-specific targets
916
0
  // from the information returned from CPUID.
917
0
  switch (Family) {
918
0
  case 4:
919
0
    *Type = X86::AMD_i486;
920
0
    break;
921
0
  case 5:
922
0
    *Type = X86::AMDPENTIUM;
923
0
    switch (Model) {
924
0
    case 6:
925
0
    case 7:
926
0
      *Subtype = X86::AMDPENTIUM_K6;
927
0
      break; // "k6"
928
0
    case 8:
929
0
      *Subtype = X86::AMDPENTIUM_K62;
930
0
      break; // "k6-2"
931
0
    case 9:
932
0
    case 13:
933
0
      *Subtype = X86::AMDPENTIUM_K63;
934
0
      break; // "k6-3"
935
0
    case 10:
936
0
      *Subtype = X86::AMDPENTIUM_GEODE;
937
0
      break; // "geode"
938
0
    }
939
0
    break;
940
0
  case 6:
941
0
    if (Features & (1 << X86::FEATURE_SSE)) {
942
0
      *Type = X86::AMD_ATHLON_XP;
943
0
      break; // "athlon-xp"
944
0
    }
945
0
    *Type = X86::AMD_ATHLON;
946
0
    break; // "athlon"
947
0
  case 15:
948
0
    if (Features & (1 << X86::FEATURE_SSE3)) {
949
0
      *Type = X86::AMD_K8SSE3;
950
0
      break; // "k8-sse3"
951
0
    }
952
0
    *Type = X86::AMD_K8;
953
0
    break; // "k8"
954
0
  case 16:
955
0
    *Type = X86::AMDFAM10H; // "amdfam10"
956
0
    switch (Model) {
957
0
    case 2:
958
0
      *Subtype = X86::AMDFAM10H_BARCELONA;
959
0
      break;
960
0
    case 4:
961
0
      *Subtype = X86::AMDFAM10H_SHANGHAI;
962
0
      break;
963
0
    case 8:
964
0
      *Subtype = X86::AMDFAM10H_ISTANBUL;
965
0
      break;
966
0
    }
967
0
    break;
968
0
  case 20:
969
0
    *Type = X86::AMD_BTVER1;
970
0
    break; // "btver1";
971
0
  case 21:
972
0
    *Type = X86::AMDFAM15H;
973
0
    if (Model >= 0x60 && Model <= 0x7f) {
974
0
      *Subtype = X86::AMDFAM15H_BDVER4;
975
0
      break; // "bdver4"; 60h-7Fh: Excavator
976
0
    }
977
0
    if (Model >= 0x30 && Model <= 0x3f) {
978
0
      *Subtype = X86::AMDFAM15H_BDVER3;
979
0
      break; // "bdver3"; 30h-3Fh: Steamroller
980
0
    }
981
0
    if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
982
0
      *Subtype = X86::AMDFAM15H_BDVER2;
983
0
      break; // "bdver2"; 02h, 10h-1Fh: Piledriver
984
0
    }
985
0
    if (Model <= 0x0f) {
986
0
      *Subtype = X86::AMDFAM15H_BDVER1;
987
0
      break; // "bdver1"; 00h-0Fh: Bulldozer
988
0
    }
989
0
    break;
990
0
  case 22:
991
0
    *Type = X86::AMD_BTVER2;
992
0
    break; // "btver2"
993
0
  case 23:
994
0
    *Type = X86::AMDFAM17H;
995
0
    if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
996
0
      *Subtype = X86::AMDFAM17H_ZNVER2;
997
0
      break; // "znver2"; 30h-3fh, 71h: Zen2
998
0
    }
999
0
    if (Model <= 0x0f) {
1000
0
      *Subtype = X86::AMDFAM17H_ZNVER1;
1001
0
      break; // "znver1"; 00h-0Fh: Zen1
1002
0
    }
1003
0
    break;
1004
0
  default:
1005
0
    break; // "generic"
1006
0
  }
1007
0
}
1008
1009
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
1010
                                 unsigned *FeaturesOut, unsigned *Features2Out,
1011
0
                                 unsigned *Features3Out) {
1012
0
  unsigned Features = 0;
1013
0
  unsigned Features2 = 0;
1014
0
  unsigned Features3 = 0;
1015
0
  unsigned EAX, EBX;
1016
0
1017
0
  auto setFeature = [&](unsigned F) {
1018
0
    if (F < 32)
1019
0
      Features |= 1U << (F & 0x1f);
1020
0
    else if (F < 64)
1021
0
      Features2 |= 1U << ((F - 32) & 0x1f);
1022
0
    else if (F < 96)
1023
0
      Features3 |= 1U << ((F - 64) & 0x1f);
1024
0
    else
1025
0
      llvm_unreachable("Unexpected FeatureBit");
1026
0
  };
1027
0
1028
0
  if ((EDX >> 15) & 1)
1029
0
    setFeature(X86::FEATURE_CMOV);
1030
0
  if ((EDX >> 23) & 1)
1031
0
    setFeature(X86::FEATURE_MMX);
1032
0
  if ((EDX >> 25) & 1)
1033
0
    setFeature(X86::FEATURE_SSE);
1034
0
  if ((EDX >> 26) & 1)
1035
0
    setFeature(X86::FEATURE_SSE2);
1036
0
1037
0
  if ((ECX >> 0) & 1)
1038
0
    setFeature(X86::FEATURE_SSE3);
1039
0
  if ((ECX >> 1) & 1)
1040
0
    setFeature(X86::FEATURE_PCLMUL);
1041
0
  if ((ECX >> 9) & 1)
1042
0
    setFeature(X86::FEATURE_SSSE3);
1043
0
  if ((ECX >> 12) & 1)
1044
0
    setFeature(X86::FEATURE_FMA);
1045
0
  if ((ECX >> 19) & 1)
1046
0
    setFeature(X86::FEATURE_SSE4_1);
1047
0
  if ((ECX >> 20) & 1)
1048
0
    setFeature(X86::FEATURE_SSE4_2);
1049
0
  if ((ECX >> 23) & 1)
1050
0
    setFeature(X86::FEATURE_POPCNT);
1051
0
  if ((ECX >> 25) & 1)
1052
0
    setFeature(X86::FEATURE_AES);
1053
0
1054
0
  if ((ECX >> 22) & 1)
1055
0
    setFeature(X86::FEATURE_MOVBE);
1056
0
1057
0
  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1058
0
  // indicates that the AVX registers will be saved and restored on context
1059
0
  // switch, then we have full AVX support.
1060
0
  const unsigned AVXBits = (1 << 27) | (1 << 28);
1061
0
  bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
1062
0
                ((EAX & 0x6) == 0x6);
1063
#if defined(__APPLE__)
1064
  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1065
  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1066
  // set right now.
1067
  bool HasAVX512Save = true;
1068
#else
1069
  // AVX512 requires additional context to be saved by the OS.
1070
0
  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
1071
0
#endif
1072
0
1073
0
  if (HasAVX)
1074
0
    setFeature(X86::FEATURE_AVX);
1075
0
1076
0
  bool HasLeaf7 =
1077
0
      MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1078
0
1079
0
  if (HasLeaf7 && ((EBX >> 3) & 1))
1080
0
    setFeature(X86::FEATURE_BMI);
1081
0
  if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
1082
0
    setFeature(X86::FEATURE_AVX2);
1083
0
  if (HasLeaf7 && ((EBX >> 8) & 1))
1084
0
    setFeature(X86::FEATURE_BMI2);
1085
0
  if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
1086
0
    setFeature(X86::FEATURE_AVX512F);
1087
0
  if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
1088
0
    setFeature(X86::FEATURE_AVX512DQ);
1089
0
  if (HasLeaf7 && ((EBX >> 19) & 1))
1090
0
    setFeature(X86::FEATURE_ADX);
1091
0
  if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
1092
0
    setFeature(X86::FEATURE_AVX512IFMA);
1093
0
  if (HasLeaf7 && ((EBX >> 23) & 1))
1094
0
    setFeature(X86::FEATURE_CLFLUSHOPT);
1095
0
  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
1096
0
    setFeature(X86::FEATURE_AVX512PF);
1097
0
  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
1098
0
    setFeature(X86::FEATURE_AVX512ER);
1099
0
  if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
1100
0
    setFeature(X86::FEATURE_AVX512CD);
1101
0
  if (HasLeaf7 && ((EBX >> 29) & 1))
1102
0
    setFeature(X86::FEATURE_SHA);
1103
0
  if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
1104
0
    setFeature(X86::FEATURE_AVX512BW);
1105
0
  if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
1106
0
    setFeature(X86::FEATURE_AVX512VL);
1107
0
1108
0
  if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
1109
0
    setFeature(X86::FEATURE_AVX512VBMI);
1110
0
  if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
1111
0
    setFeature(X86::FEATURE_AVX512VBMI2);
1112
0
  if (HasLeaf7 && ((ECX >> 8) & 1))
1113
0
    setFeature(X86::FEATURE_GFNI);
1114
0
  if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
1115
0
    setFeature(X86::FEATURE_VPCLMULQDQ);
1116
0
  if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
1117
0
    setFeature(X86::FEATURE_AVX512VNNI);
1118
0
  if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save)
1119
0
    setFeature(X86::FEATURE_AVX512BITALG);
1120
0
  if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
1121
0
    setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
1122
0
1123
0
  if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
1124
0
    setFeature(X86::FEATURE_AVX5124VNNIW);
1125
0
  if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
1126
0
    setFeature(X86::FEATURE_AVX5124FMAPS);
1127
0
  if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
1128
0
    setFeature(X86::FEATURE_AVX512VP2INTERSECT);
1129
0
1130
0
  bool HasLeaf7Subleaf1 =
1131
0
      MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1132
0
  if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
1133
0
    setFeature(X86::FEATURE_AVX512BF16);
1134
0
1135
0
  unsigned MaxExtLevel;
1136
0
  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1137
0
1138
0
  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1139
0
                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1140
0
  if (HasExtLeaf1 && ((ECX >> 6) & 1))
1141
0
    setFeature(X86::FEATURE_SSE4_A);
1142
0
  if (HasExtLeaf1 && ((ECX >> 11) & 1))
1143
0
    setFeature(X86::FEATURE_XOP);
1144
0
  if (HasExtLeaf1 && ((ECX >> 16) & 1))
1145
0
    setFeature(X86::FEATURE_FMA4);
1146
0
1147
0
  if (HasExtLeaf1 && ((EDX >> 29) & 1))
1148
0
    setFeature(X86::FEATURE_EM64T);
1149
0
1150
0
  *FeaturesOut  = Features;
1151
0
  *Features2Out = Features2;
1152
0
  *Features3Out = Features3;
1153
0
}
1154
1155
0
StringRef sys::getHostCPUName() {
1156
0
  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1157
0
  unsigned MaxLeaf, Vendor;
1158
0
1159
0
#if defined(__GNUC__) || defined(__clang__)
1160
0
  //FIXME: include cpuid.h from clang or copy __get_cpuid_max here
1161
0
  // and simplify it to not invoke __cpuid (like cpu_model.c in
1162
0
  // compiler-rt/lib/builtins/cpu_model.c?
1163
0
  // Opting for the second option.
1164
0
  if(!isCpuIdSupported())
1165
0
    return "generic";
1166
0
#endif
1167
0
  if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1)
1168
0
    return "generic";
1169
0
  getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
1170
0
1171
0
  unsigned Brand_id = EBX & 0xff;
1172
0
  unsigned Family = 0, Model = 0;
1173
0
  unsigned Features = 0, Features2 = 0, Features3 = 0;
1174
0
  detectX86FamilyModel(EAX, &Family, &Model);
1175
0
  getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2, &Features3);
1176
0
1177
0
  unsigned Type = 0;
1178
0
  unsigned Subtype = 0;
1179
0
1180
0
  if (Vendor == SIG_INTEL) {
1181
0
    getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features,
1182
0
                                    Features2, Features3, &Type, &Subtype);
1183
0
  } else if (Vendor == SIG_AMD) {
1184
0
    getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
1185
0
  }
1186
0
1187
0
  // Check subtypes first since those are more specific.
1188
0
#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \
1189
0
  if (Subtype == X86::ENUM) \
1190
0
    return ARCHNAME;
1191
0
#include "llvm/Support/X86TargetParser.def"
1192
0
1193
0
  // Now check types.
1194
0
#define X86_CPU_TYPE(ARCHNAME, ENUM) \
1195
0
  if (Type == X86::ENUM) \
1196
0
    return ARCHNAME;
1197
0
#include "llvm/Support/X86TargetParser.def"
1198
0
1199
0
  return "generic";
1200
0
}
1201
1202
#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
1203
StringRef sys::getHostCPUName() {
1204
  host_basic_info_data_t hostInfo;
1205
  mach_msg_type_number_t infoCount;
1206
1207
  infoCount = HOST_BASIC_INFO_COUNT;
1208
  mach_port_t hostPort = mach_host_self();
1209
  host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1210
            &infoCount);
1211
  mach_port_deallocate(mach_task_self(), hostPort);
1212
1213
  if (hostInfo.cpu_type != CPU_TYPE_POWERPC)
1214
    return "generic";
1215
1216
  switch (hostInfo.cpu_subtype) {
1217
  case CPU_SUBTYPE_POWERPC_601:
1218
    return "601";
1219
  case CPU_SUBTYPE_POWERPC_602:
1220
    return "602";
1221
  case CPU_SUBTYPE_POWERPC_603:
1222
    return "603";
1223
  case CPU_SUBTYPE_POWERPC_603e:
1224
    return "603e";
1225
  case CPU_SUBTYPE_POWERPC_603ev:
1226
    return "603ev";
1227
  case CPU_SUBTYPE_POWERPC_604:
1228
    return "604";
1229
  case CPU_SUBTYPE_POWERPC_604e:
1230
    return "604e";
1231
  case CPU_SUBTYPE_POWERPC_620:
1232
    return "620";
1233
  case CPU_SUBTYPE_POWERPC_750:
1234
    return "750";
1235
  case CPU_SUBTYPE_POWERPC_7400:
1236
    return "7400";
1237
  case CPU_SUBTYPE_POWERPC_7450:
1238
    return "7450";
1239
  case CPU_SUBTYPE_POWERPC_970:
1240
    return "970";
1241
  default:;
1242
  }
1243
1244
  return "generic";
1245
}
1246
#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
1247
StringRef sys::getHostCPUName() {
1248
  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1249
  StringRef Content = P ? P->getBuffer() : "";
1250
  return detail::getHostCPUNameForPowerPC(Content);
1251
}
1252
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1253
StringRef sys::getHostCPUName() {
1254
  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1255
  StringRef Content = P ? P->getBuffer() : "";
1256
  return detail::getHostCPUNameForARM(Content);
1257
}
1258
#elif defined(__linux__) && defined(__s390x__)
1259
StringRef sys::getHostCPUName() {
1260
  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1261
  StringRef Content = P ? P->getBuffer() : "";
1262
  return detail::getHostCPUNameForS390x(Content);
1263
}
1264
#elif defined(__APPLE__) && defined(__aarch64__)
1265
StringRef sys::getHostCPUName() {
1266
  return "cyclone";
1267
}
1268
#elif defined(__APPLE__) && defined(__arm__)
1269
StringRef sys::getHostCPUName() {
1270
  host_basic_info_data_t hostInfo;
1271
  mach_msg_type_number_t infoCount;
1272
1273
  infoCount = HOST_BASIC_INFO_COUNT;
1274
  mach_port_t hostPort = mach_host_self();
1275
  host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo,
1276
            &infoCount);
1277
  mach_port_deallocate(mach_task_self(), hostPort);
1278
1279
  if (hostInfo.cpu_type != CPU_TYPE_ARM) {
1280
    assert(false && "CPUType not equal to ARM should not be possible on ARM");
1281
    return "generic";
1282
  }
1283
  switch (hostInfo.cpu_subtype) {
1284
    case CPU_SUBTYPE_ARM_V7S:
1285
      return "swift";
1286
    default:;
1287
    }
1288
1289
  return "generic";
1290
}
1291
#else
1292
StringRef sys::getHostCPUName() { return "generic"; }
1293
#endif
1294
1295
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
1296
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
1297
// using the number of unique physical/core id pairs. The following
1298
// implementation reads the /proc/cpuinfo format on an x86_64 system.
1299
0
int computeHostNumPhysicalCores() {
1300
0
  // Enabled represents the number of physical id/core id pairs with at least
1301
0
  // one processor id enabled by the CPU affinity mask.
1302
0
  cpu_set_t Affinity, Enabled;
1303
0
  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
1304
0
    return -1;
1305
0
  CPU_ZERO(&Enabled);
1306
0
1307
0
  // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
1308
0
  // mmapped because it appears to have 0 size.
1309
0
  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
1310
0
      llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
1311
0
  if (std::error_code EC = Text.getError()) {
1312
0
    llvm::errs() << "Can't read "
1313
0
                 << "/proc/cpuinfo: " << EC.message() << "\n";
1314
0
    return -1;
1315
0
  }
1316
0
  SmallVector<StringRef, 8> strs;
1317
0
  (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
1318
0
                             /*KeepEmpty=*/false);
1319
0
  int CurProcessor = -1;
1320
0
  int CurPhysicalId = -1;
1321
0
  int CurSiblings = -1;
1322
0
  int CurCoreId = -1;
1323
0
  for (StringRef Line : strs) {
1324
0
    std::pair<StringRef, StringRef> Data = Line.split(':');
1325
0
    auto Name = Data.first.trim();
1326
0
    auto Val = Data.second.trim();
1327
0
    // These fields are available if the kernel is configured with CONFIG_SMP.
1328
0
    if (Name == "processor")
1329
0
      Val.getAsInteger(10, CurProcessor);
1330
0
    else if (Name == "physical id")
1331
0
      Val.getAsInteger(10, CurPhysicalId);
1332
0
    else if (Name == "siblings")
1333
0
      Val.getAsInteger(10, CurSiblings);
1334
0
    else if (Name == "core id") {
1335
0
      Val.getAsInteger(10, CurCoreId);
1336
0
      // The processor id corresponds to an index into cpu_set_t.
1337
0
      if (CPU_ISSET(CurProcessor, &Affinity))
1338
0
        CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
1339
0
    }
1340
0
  }
1341
0
  return CPU_COUNT(&Enabled);
1342
0
}
1343
#elif defined(__APPLE__) && defined(__x86_64__)
1344
#include <sys/param.h>
1345
#include <sys/sysctl.h>
1346
1347
// Gets the number of *physical cores* on the machine.
1348
int computeHostNumPhysicalCores() {
1349
  uint32_t count;
1350
  size_t len = sizeof(count);
1351
  sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
1352
  if (count < 1) {
1353
    int nm[2];
1354
    nm[0] = CTL_HW;
1355
    nm[1] = HW_AVAILCPU;
1356
    sysctl(nm, 2, &count, &len, NULL, 0);
1357
    if (count < 1)
1358
      return -1;
1359
  }
1360
  return count;
1361
}
1362
#elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0
1363
// Defined in llvm/lib/Support/Windows/Threading.inc
1364
int computeHostNumPhysicalCores();
1365
#else
1366
// On other systems, return -1 to indicate unknown.
1367
static int computeHostNumPhysicalCores() { return -1; }
1368
#endif
1369
1370
0
int sys::getHostNumPhysicalCores() {
1371
0
  static int NumCores = computeHostNumPhysicalCores();
1372
0
  return NumCores;
1373
0
}
1374
1375
#if defined(__i386__) || defined(_M_IX86) || \
1376
    defined(__x86_64__) || defined(_M_X64)
1377
0
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1378
0
  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
1379
0
  unsigned MaxLevel;
1380
0
  union {
1381
0
    unsigned u[3];
1382
0
    char c[12];
1383
0
  } text;
1384
0
1385
0
  if (getX86CpuIDAndInfo(0, &MaxLevel, text.u + 0, text.u + 2, text.u + 1) ||
1386
0
      MaxLevel < 1)
1387
0
    return false;
1388
0
1389
0
  getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX);
1390
0
1391
0
  Features["cx8"]    = (EDX >>  8) & 1;
1392
0
  Features["cmov"]   = (EDX >> 15) & 1;
1393
0
  Features["mmx"]    = (EDX >> 23) & 1;
1394
0
  Features["fxsr"]   = (EDX >> 24) & 1;
1395
0
  Features["sse"]    = (EDX >> 25) & 1;
1396
0
  Features["sse2"]   = (EDX >> 26) & 1;
1397
0
1398
0
  Features["sse3"]   = (ECX >>  0) & 1;
1399
0
  Features["pclmul"] = (ECX >>  1) & 1;
1400
0
  Features["ssse3"]  = (ECX >>  9) & 1;
1401
0
  Features["cx16"]   = (ECX >> 13) & 1;
1402
0
  Features["sse4.1"] = (ECX >> 19) & 1;
1403
0
  Features["sse4.2"] = (ECX >> 20) & 1;
1404
0
  Features["movbe"]  = (ECX >> 22) & 1;
1405
0
  Features["popcnt"] = (ECX >> 23) & 1;
1406
0
  Features["aes"]    = (ECX >> 25) & 1;
1407
0
  Features["rdrnd"]  = (ECX >> 30) & 1;
1408
0
1409
0
  // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
1410
0
  // indicates that the AVX registers will be saved and restored on context
1411
0
  // switch, then we have full AVX support.
1412
0
  bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
1413
0
                    !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
1414
#if defined(__APPLE__)
1415
  // Darwin lazily saves the AVX512 context on first use: trust that the OS will
1416
  // save the AVX512 context if we use AVX512 instructions, even the bit is not
1417
  // set right now.
1418
  bool HasAVX512Save = true;
1419
#else
1420
  // AVX512 requires additional context to be saved by the OS.
1421
0
  bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
1422
0
#endif
1423
0
1424
0
  Features["avx"]   = HasAVXSave;
1425
0
  Features["fma"]   = ((ECX >> 12) & 1) && HasAVXSave;
1426
0
  // Only enable XSAVE if OS has enabled support for saving YMM state.
1427
0
  Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave;
1428
0
  Features["f16c"]  = ((ECX >> 29) & 1) && HasAVXSave;
1429
0
1430
0
  unsigned MaxExtLevel;
1431
0
  getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
1432
0
1433
0
  bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
1434
0
                     !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
1435
0
  Features["sahf"]   = HasExtLeaf1 && ((ECX >>  0) & 1);
1436
0
  Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
1437
0
  Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
1438
0
  Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
1439
0
  Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
1440
0
  Features["lwp"]    = HasExtLeaf1 && ((ECX >> 15) & 1);
1441
0
  Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
1442
0
  Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
1443
0
  Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
1444
0
1445
0
  Features["64bit"]  = HasExtLeaf1 && ((EDX >> 29) & 1);
1446
0
1447
0
  // Miscellaneous memory related features, detected by
1448
0
  // using the 0x80000008 leaf of the CPUID instruction
1449
0
  bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
1450
0
                     !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
1451
0
  Features["clzero"]   = HasExtLeaf8 && ((EBX >> 0) & 1);
1452
0
  Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1);
1453
0
1454
0
  bool HasLeaf7 =
1455
0
      MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
1456
0
1457
0
  Features["fsgsbase"]   = HasLeaf7 && ((EBX >>  0) & 1);
1458
0
  Features["sgx"]        = HasLeaf7 && ((EBX >>  2) & 1);
1459
0
  Features["bmi"]        = HasLeaf7 && ((EBX >>  3) & 1);
1460
0
  // AVX2 is only supported if we have the OS save support from AVX.
1461
0
  Features["avx2"]       = HasLeaf7 && ((EBX >>  5) & 1) && HasAVXSave;
1462
0
  Features["bmi2"]       = HasLeaf7 && ((EBX >>  8) & 1);
1463
0
  Features["invpcid"]    = HasLeaf7 && ((EBX >> 10) & 1);
1464
0
  Features["rtm"]        = HasLeaf7 && ((EBX >> 11) & 1);
1465
0
  // AVX512 is only supported if the OS supports the context save for it.
1466
0
  Features["avx512f"]    = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
1467
0
  Features["avx512dq"]   = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
1468
0
  Features["rdseed"]     = HasLeaf7 && ((EBX >> 18) & 1);
1469
0
  Features["adx"]        = HasLeaf7 && ((EBX >> 19) & 1);
1470
0
  Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
1471
0
  Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
1472
0
  Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
1473
0
  Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
1474
0
  Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
1475
0
  Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
1476
0
  Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
1477
0
  Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
1478
0
  Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
1479
0
1480
0
  Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
1481
0
  Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
1482
0
  Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
1483
0
  Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
1484
0
  Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
1485
0
  Features["shstk"]           = HasLeaf7 && ((ECX >>  7) & 1);
1486
0
  Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);
1487
0
  Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;
1488
0
  Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
1489
0
  Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
1490
0
  Features["avx512bitalg"]    = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save;
1491
0
  Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
1492
0
  Features["rdpid"]           = HasLeaf7 && ((ECX >> 22) & 1);
1493
0
  Features["cldemote"]        = HasLeaf7 && ((ECX >> 25) & 1);
1494
0
  Features["movdiri"]         = HasLeaf7 && ((ECX >> 27) & 1);
1495
0
  Features["movdir64b"]       = HasLeaf7 && ((ECX >> 28) & 1);
1496
0
  Features["enqcmd"]          = HasLeaf7 && ((ECX >> 29) & 1);
1497
0
1498
0
  Features["avx512vp2intersect"] =
1499
0
      HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save;
1500
0
  Features["serialize"]       = HasLeaf7 && ((EDX >> 14) & 1);
1501
0
  Features["tsxldtrk"]        = HasLeaf7 && ((EDX >> 16) & 1);
1502
0
  // There are two CPUID leafs which information associated with the pconfig
1503
0
  // instruction:
1504
0
  // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th
1505
0
  // bit of EDX), while the EAX=0x1b leaf returns information on the
1506
0
  // availability of specific pconfig leafs.
1507
0
  // The target feature here only refers to the the first of these two.
1508
0
  // Users might need to check for the availability of specific pconfig
1509
0
  // leaves using cpuid, since that information is ignored while
1510
0
  // detecting features using the "-march=native" flag.
1511
0
  // For more info, see X86 ISA docs.
1512
0
  Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
1513
0
  bool HasLeaf7Subleaf1 =
1514
0
      MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
1515
0
  Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1516
0
1517
0
  bool HasLeafD = MaxLevel >= 0xd &&
1518
0
                  !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
1519
0
1520
0
  // Only enable XSAVE if OS has enabled support for saving YMM state.
1521
0
  Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave;
1522
0
  Features["xsavec"]   = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave;
1523
0
  Features["xsaves"]   = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave;
1524
0
1525
0
  bool HasLeaf14 = MaxLevel >= 0x14 &&
1526
0
                  !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
1527
0
1528
0
  Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1);
1529
0
1530
0
  return true;
1531
0
}
1532
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
1533
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1534
  std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
1535
  if (!P)
1536
    return false;
1537
1538
  SmallVector<StringRef, 32> Lines;
1539
  P->getBuffer().split(Lines, "\n");
1540
1541
  SmallVector<StringRef, 32> CPUFeatures;
1542
1543
  // Look for the CPU features.
1544
  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
1545
    if (Lines[I].startswith("Features")) {
1546
      Lines[I].split(CPUFeatures, ' ');
1547
      break;
1548
    }
1549
1550
#if defined(__aarch64__)
1551
  // Keep track of which crypto features we have seen
1552
  enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 };
1553
  uint32_t crypto = 0;
1554
#endif
1555
1556
  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
1557
    StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
1558
#if defined(__aarch64__)
1559
                                   .Case("asimd", "neon")
1560
                                   .Case("fp", "fp-armv8")
1561
                                   .Case("crc32", "crc")
1562
#else
1563
                                   .Case("half", "fp16")
1564
                                   .Case("neon", "neon")
1565
                                   .Case("vfpv3", "vfp3")
1566
                                   .Case("vfpv3d16", "d16")
1567
                                   .Case("vfpv4", "vfp4")
1568
                                   .Case("idiva", "hwdiv-arm")
1569
                                   .Case("idivt", "hwdiv")
1570
#endif
1571
                                   .Default("");
1572
1573
#if defined(__aarch64__)
1574
    // We need to check crypto separately since we need all of the crypto
1575
    // extensions to enable the subtarget feature
1576
    if (CPUFeatures[I] == "aes")
1577
      crypto |= CAP_AES;
1578
    else if (CPUFeatures[I] == "pmull")
1579
      crypto |= CAP_PMULL;
1580
    else if (CPUFeatures[I] == "sha1")
1581
      crypto |= CAP_SHA1;
1582
    else if (CPUFeatures[I] == "sha2")
1583
      crypto |= CAP_SHA2;
1584
#endif
1585
1586
    if (LLVMFeatureStr != "")
1587
      Features[LLVMFeatureStr] = true;
1588
  }
1589
1590
#if defined(__aarch64__)
1591
  // If we have all crypto bits we can add the feature
1592
  if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
1593
    Features["crypto"] = true;
1594
#endif
1595
1596
  return true;
1597
}
1598
#elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64))
1599
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
1600
  if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE))
1601
    Features["neon"] = true;
1602
  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
1603
    Features["crc"] = true;
1604
  if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
1605
    Features["crypto"] = true;
1606
1607
  return true;
1608
}
1609
#else
1610
bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
1611
#endif
1612
1613
2
std::string sys::getProcessTriple() {
1614
2
  std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
1615
2
  Triple PT(Triple::normalize(TargetTripleString));
1616
2
1617
2
  if (sizeof(void *) == 8 && PT.isArch32Bit())
1618
0
    PT = PT.get64BitArchVariant();
1619
2
  if (sizeof(void *) == 4 && PT.isArch64Bit())
1620
0
    PT = PT.get32BitArchVariant();
1621
2
1622
2
  return PT.str();
1623
2
}