1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
27 const char *host_detect_local_cpu (int argc, const char **argv);
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
43 describe_cache (struct cache_desc level1, struct cache_desc level2)
45 char size[100], line[100], size2[100];
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
58 return concat (size, line, size2, NULL);
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
64 detect_l2_cache (struct cache_desc *level2)
66 unsigned eax, ebx, ecx, edx;
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
74 assoc = (ecx >> 12) & 0xf;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
84 level2->assoc = assoc;
87 /* Returns the description of caches for an AMD processor. */
90 detect_caches_amd (unsigned max_ext_level)
92 unsigned eax, ebx, ecx, edx;
94 struct cache_desc level1, level2 = {0, 0, 0};
96 if (max_ext_level < 0x80000005)
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
108 return describe_cache (level1, level2);
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
249 /* Detect cache parameters using CPUID function 2. */
252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
260 nreps = regs[0] & 0x0f;
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
289 struct cache_desc *cache;
291 unsigned eax, ebx, ecx, edx;
294 for (count = 0;; count++)
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
304 switch ((eax >> 5) & 0x07)
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
337 /* Returns the description of caches for an Intel processor. */
340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
352 if (level1.sizekb == 0)
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
365 *l2sizekb = level2.sizekb;
367 return describe_cache (level1, level2);
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
380 ARGC and ARGV are set depending on the actual arguments given
383 const char *host_detect_local_cpu (int argc, const char **argv)
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
388 const char *cache = "";
389 const char *options = "";
391 unsigned int eax, ebx, ecx, edx;
393 unsigned int max_level, ext_level;
396 unsigned int model, family;
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_shstk = 0;
424 unsigned int has_avx512vnni = 0, has_vaes = 0;
425 unsigned int has_vpclmulqdq = 0;
426 unsigned int has_movdiri = 0, has_movdir64b = 0;
430 unsigned int l2sizekb = 0;
435 arch = !strcmp (argv[0], "arch");
437 if (!arch && strcmp (argv[0], "tune"))
440 max_level = __get_cpuid_max (0, &vendor);
444 __cpuid (1, eax, ebx, ecx, edx);
446 model = (eax >> 4) & 0x0f;
447 family = (eax >> 8) & 0x0f;
448 if (vendor == signature_INTEL_ebx
449 || vendor == signature_AMD_ebx)
451 unsigned int extended_model, extended_family;
453 extended_model = (eax >> 12) & 0xf0;
454 extended_family = (eax >> 20) & 0xff;
457 family += extended_family;
458 model += extended_model;
460 else if (family == 0x06)
461 model += extended_model;
464 has_sse3 = ecx & bit_SSE3;
465 has_ssse3 = ecx & bit_SSSE3;
466 has_sse4_1 = ecx & bit_SSE4_1;
467 has_sse4_2 = ecx & bit_SSE4_2;
468 has_avx = ecx & bit_AVX;
469 has_osxsave = ecx & bit_OSXSAVE;
470 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
471 has_movbe = ecx & bit_MOVBE;
472 has_popcnt = ecx & bit_POPCNT;
473 has_aes = ecx & bit_AES;
474 has_pclmul = ecx & bit_PCLMUL;
475 has_fma = ecx & bit_FMA;
476 has_f16c = ecx & bit_F16C;
477 has_rdrnd = ecx & bit_RDRND;
478 has_xsave = ecx & bit_XSAVE;
480 has_cmpxchg8b = edx & bit_CMPXCHG8B;
481 has_cmov = edx & bit_CMOV;
482 has_mmx = edx & bit_MMX;
483 has_fxsr = edx & bit_FXSAVE;
484 has_sse = edx & bit_SSE;
485 has_sse2 = edx & bit_SSE2;
489 __cpuid_count (7, 0, eax, ebx, ecx, edx);
491 has_bmi = ebx & bit_BMI;
492 has_sgx = ebx & bit_SGX;
493 has_hle = ebx & bit_HLE;
494 has_rtm = ebx & bit_RTM;
495 has_avx2 = ebx & bit_AVX2;
496 has_bmi2 = ebx & bit_BMI2;
497 has_fsgsbase = ebx & bit_FSGSBASE;
498 has_rdseed = ebx & bit_RDSEED;
499 has_adx = ebx & bit_ADX;
500 has_avx512f = ebx & bit_AVX512F;
501 has_avx512er = ebx & bit_AVX512ER;
502 has_avx512pf = ebx & bit_AVX512PF;
503 has_avx512cd = ebx & bit_AVX512CD;
504 has_sha = ebx & bit_SHA;
505 has_clflushopt = ebx & bit_CLFLUSHOPT;
506 has_clwb = ebx & bit_CLWB;
507 has_avx512dq = ebx & bit_AVX512DQ;
508 has_avx512bw = ebx & bit_AVX512BW;
509 has_avx512vl = ebx & bit_AVX512VL;
510 has_avx512ifma = ebx & bit_AVX512IFMA;
512 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
513 has_avx512vbmi = ecx & bit_AVX512VBMI;
514 has_pku = ecx & bit_OSPKE;
515 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
516 has_avx512vnni = ecx & bit_AVX512VNNI;
517 has_rdpid = ecx & bit_RDPID;
518 has_gfni = ecx & bit_GFNI;
519 has_vaes = ecx & bit_VAES;
520 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
521 has_avx512bitalg = ecx & bit_AVX512BITALG;
522 has_movdiri = ecx & bit_MOVDIRI;
523 has_movdir64b = ecx & bit_MOVDIR64B;
525 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
526 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
528 has_shstk = ecx & bit_SHSTK;
529 has_pconfig = edx & bit_PCONFIG;
534 __cpuid_count (13, 1, eax, ebx, ecx, edx);
536 has_xsaveopt = eax & bit_XSAVEOPT;
537 has_xsavec = eax & bit_XSAVEC;
538 has_xsaves = eax & bit_XSAVES;
541 /* Check cpuid level of extended features. */
542 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
544 if (ext_level >= 0x80000001)
546 __cpuid (0x80000001, eax, ebx, ecx, edx);
548 has_lahf_lm = ecx & bit_LAHF_LM;
549 has_sse4a = ecx & bit_SSE4a;
550 has_abm = ecx & bit_ABM;
551 has_lwp = ecx & bit_LWP;
552 has_fma4 = ecx & bit_FMA4;
553 has_xop = ecx & bit_XOP;
554 has_tbm = ecx & bit_TBM;
555 has_lzcnt = ecx & bit_LZCNT;
556 has_prfchw = ecx & bit_PRFCHW;
558 has_longmode = edx & bit_LM;
559 has_3dnowp = edx & bit_3DNOWP;
560 has_3dnow = edx & bit_3DNOW;
561 has_mwaitx = ecx & bit_MWAITX;
564 if (ext_level >= 0x80000008)
566 __cpuid (0x80000008, eax, ebx, ecx, edx);
567 has_clzero = ebx & bit_CLZERO;
568 has_wbnoinvd = ebx & bit_WBNOINVD;
571 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
572 #define XCR_XFEATURE_ENABLED_MASK 0x0
573 #define XSTATE_FP 0x1
574 #define XSTATE_SSE 0x2
575 #define XSTATE_YMM 0x4
576 #define XSTATE_OPMASK 0x20
577 #define XSTATE_ZMM 0x40
578 #define XSTATE_HI_ZMM 0x80
580 #define XCR_AVX_ENABLED_MASK \
581 (XSTATE_SSE | XSTATE_YMM)
582 #define XCR_AVX512F_ENABLED_MASK \
583 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
586 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
587 : "=a" (eax), "=d" (edx)
588 : "c" (XCR_XFEATURE_ENABLED_MASK));
592 /* Check if AVX registers are supported. */
593 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
607 /* Check if AVX512F registers are supported. */
608 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
621 if (vendor == signature_AMD_ebx
622 || vendor == signature_CENTAUR_ebx
623 || vendor == signature_CYRIX_ebx
624 || vendor == signature_NSC_ebx)
625 cache = detect_caches_amd (ext_level);
626 else if (vendor == signature_INTEL_ebx)
628 bool xeon_mp = (family == 15 && model == 6);
629 cache = detect_caches_intel (xeon_mp, max_level,
630 ext_level, &l2sizekb);
634 if (vendor == signature_AMD_ebx)
638 /* Detect geode processor by its processor signature. */
639 if (ext_level >= 0x80000002)
640 __cpuid (0x80000002, name, ebx, ecx, edx);
644 if (name == signature_NSC_ebx)
645 processor = PROCESSOR_GEODE;
646 else if (has_movbe && family == 22)
647 processor = PROCESSOR_BTVER2;
649 processor = PROCESSOR_ZNVER1;
651 processor = PROCESSOR_BDVER4;
652 else if (has_xsaveopt)
653 processor = PROCESSOR_BDVER3;
655 processor = PROCESSOR_BDVER2;
657 processor = PROCESSOR_BDVER1;
658 else if (has_sse4a && has_ssse3)
659 processor = PROCESSOR_BTVER1;
661 processor = PROCESSOR_AMDFAM10;
662 else if (has_sse2 || has_longmode)
663 processor = PROCESSOR_K8;
664 else if (has_3dnowp && family == 6)
665 processor = PROCESSOR_ATHLON;
667 processor = PROCESSOR_K6;
669 processor = PROCESSOR_PENTIUM;
671 else if (vendor == signature_CENTAUR_ebx)
673 processor = PROCESSOR_GENERIC;
678 /* We have no idea. */
682 if (has_3dnow || has_mmx)
683 processor = PROCESSOR_I486;
688 processor = PROCESSOR_K8;
690 processor = PROCESSOR_PENTIUMPRO;
692 processor = PROCESSOR_I486;
700 processor = PROCESSOR_I486;
703 processor = PROCESSOR_PENTIUM;
706 processor = PROCESSOR_PENTIUMPRO;
709 processor = PROCESSOR_PENTIUM4;
712 /* We have no idea. */
713 processor = PROCESSOR_GENERIC;
723 if (arch && vendor == signature_CENTAUR_ebx)
730 /* Assume WinChip C6. */
736 case PROCESSOR_PENTIUM:
742 case PROCESSOR_PENTIUMPRO:
811 /* Skylake with AVX-512. */
812 cpu = "skylake-avx512";
815 /* Knights Landing. */
829 /* This is unknown family 0x6 CPU. */
830 /* Assume Ice Lake Server. */
832 cpu = "icelake-server";
833 /* Assume Ice Lake. */
835 cpu = "icelake-client";
836 /* Assume Cannon Lake. */
837 else if (has_avx512vbmi)
839 /* Assume Knights Mill. */
840 else if (has_avx5124vnniw)
842 /* Assume Knights Landing. */
843 else if (has_avx512er)
845 /* Assume Skylake with AVX-512. */
846 else if (has_avx512f)
847 cpu = "skylake-avx512";
848 /* Assume Skylake. */
849 else if (has_clflushopt)
851 /* Assume Broadwell. */
855 /* Assume Haswell. */
858 /* Assume Sandy Bridge. */
863 /* Assume Silvermont. */
866 /* Assume Nehalem. */
872 /* Assume Bonnell. */
878 else if (has_longmode)
879 /* Perhaps some emulator? Assume x86-64, otherwise gcc
880 -march=native would be unusable for 64-bit compilations,
881 as all the CPUs below are 32-bit only. */
885 if (vendor == signature_CENTAUR_ebx)
886 /* C7 / Eden "Esther" */
889 /* It is Core Duo. */
893 /* It is Pentium M. */
897 if (vendor == signature_CENTAUR_ebx)
900 /* Eden "Nehemiah" */
906 /* It is Pentium III. */
910 /* It is Pentium II. */
913 /* Default to Pentium Pro. */
917 /* For -mtune, we default to -mtune=generic. */
922 case PROCESSOR_PENTIUM4:
933 case PROCESSOR_GEODE:
937 if (arch && has_3dnow)
942 case PROCESSOR_ATHLON:
951 if (vendor == signature_CENTAUR_ebx)
954 /* Nano 3000 | Nano dual / quad core | Eden X4 */
957 /* Nano 1000 | Nano 2000 */
972 /* For -mtune, we default to -mtune=k8 */
975 case PROCESSOR_AMDFAM10:
978 case PROCESSOR_BDVER1:
981 case PROCESSOR_BDVER2:
984 case PROCESSOR_BDVER3:
987 case PROCESSOR_BDVER4:
990 case PROCESSOR_ZNVER1:
993 case PROCESSOR_BTVER1:
996 case PROCESSOR_BTVER2:
1001 /* Use something reasonable. */
1013 else if (has_longmode)
1014 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1015 -march=native would be unusable for 64-bit compilations,
1016 as all the CPUs below are 32-bit only. */
1023 cpu = "pentium-mmx";
1024 else if (has_cmpxchg8b)
1033 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1034 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1035 const char *sse = has_sse ? " -msse" : " -mno-sse";
1036 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1037 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1038 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1039 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1040 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1041 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1042 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1043 const char *aes = has_aes ? " -maes" : " -mno-aes";
1044 const char *sha = has_sha ? " -msha" : " -mno-sha";
1045 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1046 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1047 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1048 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1049 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1050 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1051 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1052 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1053 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1054 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1055 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1056 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1057 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1058 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1059 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1060 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1061 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1062 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1063 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1064 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1065 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1066 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1067 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1068 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1069 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1070 const char *adx = has_adx ? " -madx" : " -mno-adx";
1071 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1072 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1073 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1074 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1075 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1076 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1077 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1078 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1079 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1080 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1081 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1082 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1083 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1084 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1085 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1086 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1087 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1088 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1089 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1090 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1091 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1092 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1093 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1094 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1095 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1096 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1097 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1098 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1099 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1100 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1101 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1102 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1103 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1104 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1105 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1107 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1108 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1109 fxsr, xsave, xsaveopt, avx512f, avx512er,
1110 avx512cd, avx512pf, prefetchwt1, clflushopt,
1111 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1112 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1113 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1114 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1115 avx512bitalg, movdiri, movdir64b, NULL);
1119 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1123 /* If we are compiling with GCC where %EBX register is fixed, then the
1124 driver will just ignore -march and -mtune "native" target and will leave
1125 to the newly built compiler to generate code for its default target. */
1127 const char *host_detect_local_cpu (int, const char **)
1131 #endif /* __GNUC__ */