diff options
Diffstat (limited to 'libFLAC/cpu.c')
-rw-r--r-- | libFLAC/cpu.c | 605 |
1 files changed, 339 insertions, 266 deletions
diff --git a/libFLAC/cpu.c b/libFLAC/cpu.c index 60b73bf..40c7950 100644 --- a/libFLAC/cpu.c +++ b/libFLAC/cpu.c @@ -1,5 +1,6 @@ /* libFLAC - Free Lossless Audio Codec library - * Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson + * Copyright (C) 2001-2009 Josh Coalson + * Copyright (C) 2011-2014 Xiph.Org Foundation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -29,47 +30,46 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#if HAVE_CONFIG_H +#ifdef HAVE_CONFIG_H # include <config.h> #endif #include "private/cpu.h" #include <stdlib.h> -#include <stdio.h> +#include <memory.h> +#ifdef DEBUG +# include <stdio.h> +#endif #if defined FLAC__CPU_IA32 # include <signal.h> -#elif defined FLAC__CPU_PPC -# if !defined FLAC__NO_ASM -# if defined FLAC__SYS_DARWIN -# include <sys/sysctl.h> -# include <mach/mach.h> -# include <mach/mach_host.h> -# include <mach/host_info.h> -# include <mach/machine.h> -# ifndef CPU_SUBTYPE_POWERPC_970 -# define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100) -# endif -# else /* FLAC__SYS_DARWIN */ - -# include <signal.h> -# include <setjmp.h> - -static sigjmp_buf jmpbuf; -static volatile sig_atomic_t canjump = 0; - -static void sigill_handler (int sig) + +static void disable_sse(FLAC__CPUInfo *info) { - if (!canjump) { - signal (sig, SIG_DFL); - raise (sig); - } - canjump = 0; - siglongjmp (jmpbuf, 1); + info->ia32.sse = false; + info->ia32.sse2 = false; + info->ia32.sse3 = false; + info->ia32.ssse3 = false; + info->ia32.sse41 = false; + info->ia32.sse42 = false; +} + +static void disable_avx(FLAC__CPUInfo *info) +{ + info->ia32.avx = false; + info->ia32.avx2 = false; + info->ia32.fma = false; } -# endif /* FLAC__SYS_DARWIN */ -# endif /* FLAC__NO_ASM */ -#endif /* FLAC__CPU_PPC */ + +#elif defined FLAC__CPU_X86_64 + +static void disable_avx(FLAC__CPUInfo *info) +{ + info->x86.avx = false; + info->x86.avx2 = false; + info->x86.fma = false; +} +#endif #if defined (__NetBSD__) || defined(__OpenBSD__) #include <sys/param.h> @@ -86,25 +86,34 @@ static void sigill_handler (int sig) /* how to get sysctlbyname()? */ #endif +#ifdef FLAC__CPU_IA32 /* these are flags in EDX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000; static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000; static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000; +#endif + /* these are flags in ECX of CPUID AX=00000001 */ static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001; static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200; -/* these are flags in EDX of CPUID AX=80000001 */ -static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000; -static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000; -static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000; +#if defined FLAC__AVX_SUPPORTED +/* these are flags in ECX of CPUID AX=00000001 */ +static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE = 0x08000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX = 0x10000000; +static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA = 0x00001000; +/* these are flags in EBX of CPUID AX=00000007 */ +static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2 = 0x00000020; +#endif /* * Extra stuff needed for detection of OS support for SSE on IA-32 */ -#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS +#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS # if defined(__linux__) /* * If the OS doesn't support SSE, we will get here with a SIGILL. We @@ -119,35 +128,14 @@ static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000; * 6 bytes extra in case our estimate is wrong * 12 bytes puts us in the NOP "landing zone" */ -# undef USE_OBSOLETE_SIGCONTEXT_FLAVOR /* #define this to use the older signal handler method */ -# ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR - static void sigill_handler_sse_os(int signal, struct sigcontext sc) - { - (void)signal; - sc.eip += 3 + 3 + 6; - } -# else # include <sys/ucontext.h> static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc) { (void)signal, (void)si; ((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6; } -# endif # elif defined(_MSC_VER) # include <windows.h> -# undef USE_TRY_CATCH_FLAVOR /* #define this to use the try/catch method for catching illegal opcode exception */ -# ifdef USE_TRY_CATCH_FLAVOR -# else - LONG CALLBACK sigill_handler_sse_os(EXCEPTION_POINTERS *ep) - { - if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) { - ep->ContextRecord->Eip += 3 + 3 + 6; - return EXCEPTION_CONTINUE_EXECUTION; - } - return EXCEPTION_CONTINUE_SEARCH; - } -# endif # endif #endif @@ -158,261 +146,346 @@ void FLAC__cpu_info(FLAC__CPUInfo *info) * IA32-specific */ #ifdef FLAC__CPU_IA32 + FLAC__bool ia32_fxsr = false; + FLAC__bool ia32_osxsave = false; + (void) ia32_fxsr; (void) ia32_osxsave; /* to avoid warnings about unused variables */ + memset(info, 0, sizeof(*info)); info->type = FLAC__CPUINFO_TYPE_IA32; -#if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM +#if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */ - info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false; - info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */ - info->data.ia32.cmov = false; - info->data.ia32.mmx = false; - info->data.ia32.fxsr = false; - info->data.ia32.sse = false; - info->data.ia32.sse2 = false; - info->data.ia32.sse3 = false; - info->data.ia32.ssse3 = false; - info->data.ia32._3dnow = false; - info->data.ia32.ext3dnow = false; - info->data.ia32.extmmx = false; - if(info->data.ia32.cpuid) { - /* http://www.sandpile.org/ia32/cpuid.htm */ - FLAC__uint32 flags_edx, flags_ecx; - FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx); - info->data.ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false; - info->data.ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; - info->data.ia32.fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false; - info->data.ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; - info->data.ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; - info->data.ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; - info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; - -#ifdef FLAC__USE_3DNOW - flags_edx = FLAC__cpu_info_extended_amd_asm_ia32(); - info->data.ia32._3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW )? true : false; - info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false; - info->data.ia32.extmmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX )? true : false; +#ifdef FLAC__HAS_X86INTRIN + if(!FLAC__cpu_have_cpuid_x86()) + return; #else - info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false; + if(!FLAC__cpu_have_cpuid_asm_ia32()) + return; #endif + { + /* http://www.sandpile.org/x86/cpuid.htm */ +#ifdef FLAC__HAS_X86INTRIN + FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; + FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->ia32.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ + FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); +#else + FLAC__uint32 flags_ecx, flags_edx; + FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx); +#endif + info->ia32.cmov = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false; + info->ia32.mmx = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX )? true : false; + ia32_fxsr = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false; + info->ia32.sse = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE )? true : false; + info->ia32.sse2 = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false; + info->ia32.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; + info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; + info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; + info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; +#if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED + ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; + info->ia32.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; + info->ia32.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; + FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->ia32.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; +#endif + } #ifdef DEBUG - fprintf(stderr, "CPU info (IA-32):\n"); - fprintf(stderr, " CPUID ...... %c\n", info->data.ia32.cpuid ? 'Y' : 'n'); - fprintf(stderr, " BSWAP ...... %c\n", info->data.ia32.bswap ? 'Y' : 'n'); - fprintf(stderr, " CMOV ....... %c\n", info->data.ia32.cmov ? 'Y' : 'n'); - fprintf(stderr, " MMX ........ %c\n", info->data.ia32.mmx ? 'Y' : 'n'); - fprintf(stderr, " FXSR ....... %c\n", info->data.ia32.fxsr ? 'Y' : 'n'); - fprintf(stderr, " SSE ........ %c\n", info->data.ia32.sse ? 'Y' : 'n'); - fprintf(stderr, " SSE2 ....... %c\n", info->data.ia32.sse2 ? 'Y' : 'n'); - fprintf(stderr, " SSE3 ....... %c\n", info->data.ia32.sse3 ? 'Y' : 'n'); - fprintf(stderr, " SSSE3 ...... %c\n", info->data.ia32.ssse3 ? 'Y' : 'n'); - fprintf(stderr, " 3DNow! ..... %c\n", info->data.ia32._3dnow ? 'Y' : 'n'); - fprintf(stderr, " 3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n'); - fprintf(stderr, " 3DNow!-MMX . %c\n", info->data.ia32.extmmx ? 'Y' : 'n'); + fprintf(stderr, "CPU info (IA-32):\n"); + fprintf(stderr, " CMOV ....... %c\n", info->ia32.cmov ? 'Y' : 'n'); + fprintf(stderr, " MMX ........ %c\n", info->ia32.mmx ? 'Y' : 'n'); + fprintf(stderr, " SSE ........ %c\n", info->ia32.sse ? 'Y' : 'n'); + fprintf(stderr, " SSE2 ....... %c\n", info->ia32.sse2 ? 'Y' : 'n'); + fprintf(stderr, " SSE3 ....... %c\n", info->ia32.sse3 ? 'Y' : 'n'); + fprintf(stderr, " SSSE3 ...... %c\n", info->ia32.ssse3 ? 'Y' : 'n'); + fprintf(stderr, " SSE41 ...... %c\n", info->ia32.sse41 ? 'Y' : 'n'); + fprintf(stderr, " SSE42 ...... %c\n", info->ia32.sse42 ? 'Y' : 'n'); +# if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED + fprintf(stderr, " AVX ........ %c\n", info->ia32.avx ? 'Y' : 'n'); + fprintf(stderr, " FMA ........ %c\n", info->ia32.fma ? 'Y' : 'n'); + fprintf(stderr, " AVX2 ....... %c\n", info->ia32.avx2 ? 'Y' : 'n'); +# endif #endif - /* - * now have to check for OS support of SSE/SSE2 - */ - if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) { + /* + * now have to check for OS support of SSE instructions + */ + if(info->ia32.sse) { #if defined FLAC__NO_SSE_OS - /* assume user knows better than us; turn it off */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + /* assume user knows better than us; turn it off */ + disable_sse(info); #elif defined FLAC__SSE_OS - /* assume user knows better than us; leave as detected above */ + /* assume user knows better than us; leave as detected above */ #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__) - int sse = 0; - size_t len; - /* at least one of these must work: */ - len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); - len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ - if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + int sse = 0; + size_t len; + /* at least one of these must work: */ + len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse); + len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse" , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */ + if(!sse) + disable_sse(info); #elif defined(__NetBSD__) || defined (__OpenBSD__) # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__) - int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; - size_t len = sizeof(val); - if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; - else { /* double-check SSE2 */ - mib[1] = CPU_SSE2; - len = sizeof(val); - if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) - info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE }; + size_t len = sizeof(val); + if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) + disable_sse(info); + else { /* double-check SSE2 */ + mib[1] = CPU_SSE2; + len = sizeof(val); + if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) { + disable_sse(info); + info->ia32.sse = true; } + } # else - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + disable_sse(info); # endif #elif defined(__linux__) - int sse = 0; - struct sigaction sigill_save; -#ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR - if(0 == sigaction(SIGILL, NULL, &sigill_save) && signal(SIGILL, (void (*)(int))sigill_handler_sse_os) != SIG_ERR) -#else - struct sigaction sigill_sse; - sigill_sse.sa_sigaction = sigill_handler_sse_os; - __sigemptyset(&sigill_sse.sa_mask); - sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */ - if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save)) -#endif - { - /* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */ - /* see sigill_handler_sse_os() for an explanation of the following: */ - asm volatile ( - "xorl %0,%0\n\t" /* for some reason, still need to do this to clear 'sse' var */ - "xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */ - "incl %0\n\t" /* SIGILL handler will jump over this */ - /* landing zone */ - "nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */ - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" - "nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */ - "nop\n\t" - "nop" /* SIGILL jump lands here if "inc" is 1 byte */ - : "=r"(sse) - : "r"(sse) - ); - - sigaction(SIGILL, &sigill_save, NULL); - } + int sse = 0; + struct sigaction sigill_save; + struct sigaction sigill_sse; + sigill_sse.sa_sigaction = sigill_handler_sse_os; + __sigemptyset(&sigill_sse.sa_mask); + sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */ + if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save)) + { + /* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */ + /* see sigill_handler_sse_os() for an explanation of the following: */ + asm volatile ( + "xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */ + "incl %0\n\t" /* SIGILL handler will jump over this */ + /* landing zone */ + "nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */ + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */ + "nop\n\t" + "nop" /* SIGILL jump lands here if "inc" is 1 byte */ + : "=r"(sse) + : "0"(sse) + ); - if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + sigaction(SIGILL, &sigill_save, NULL); + } + + if(!sse) + disable_sse(info); #elif defined(_MSC_VER) -# ifdef USE_TRY_CATCH_FLAVOR - _try { - __asm { -# if _MSC_VER <= 1200 - /* VC6 assembler doesn't know SSE, have to emit bytecode instead */ - _emit 0x0F - _emit 0x57 - _emit 0xC0 -# else - xorps xmm0,xmm0 -# endif - } - } - _except(EXCEPTION_EXECUTE_HANDLER) { - if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; - } -# else - int sse = 0; - LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os); - /* see GCC version above for explanation */ - /* http://msdn2.microsoft.com/en-us/library/4ks26t93.aspx */ - /* http://www.codeproject.com/cpp/gccasm.asp */ - /* http://www.hick.org/~mmiller/msvc_inline_asm.html */ + __try { __asm { -# if _MSC_VER <= 1200 - /* VC6 assembler doesn't know SSE, have to emit bytecode instead */ - _emit 0x0F - _emit 0x57 - _emit 0xC0 -# else xorps xmm0,xmm0 -# endif - inc sse - nop - nop - nop - nop - nop - nop - nop - nop - nop } - SetUnhandledExceptionFilter(save); - if(!sse) - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; -# endif + } + __except(EXCEPTION_EXECUTE_HANDLER) { + if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION) + disable_sse(info); + } +#elif defined(__GNUC__) /* MinGW goes here */ + int sse = 0; + /* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */ + /* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */ + if (ia32_fxsr) { + struct { + FLAC__uint32 buff[128]; + } __attribute__((aligned(16))) fxsr; + FLAC__uint32 old_val, new_val; + + asm volatile ("fxsave %0" : "=m" (fxsr) : "m" (fxsr)); + old_val = fxsr.buff[50]; + fxsr.buff[50] ^= 0x0013c0de; /* change value in the buffer */ + asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* try to change SSE register */ + fxsr.buff[50] = old_val; /* restore old value in the buffer */ + asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr)); /* old value will be overwritten if SSE register was changed */ + new_val = fxsr.buff[50]; /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */ + fxsr.buff[50] = old_val; /* again restore old value in the buffer */ + asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr)); /* restore old values of registers */ + + if ((old_val^new_val) == 0x0013c0de) + sse = 1; + } + if(!sse) + disable_sse(info); #else - /* no way to test, disable to be safe */ - info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false; + /* no way to test, disable to be safe */ + disable_sse(info); #endif #ifdef DEBUG - fprintf(stderr, " SSE OS sup . %c\n", info->data.ia32.sse ? 'Y' : 'n'); + fprintf(stderr, " SSE OS sup . %c\n", info->ia32.sse ? 'Y' : 'n'); #endif + } + else /* info->ia32.sse == false */ + disable_sse(info); - } + /* + * now have to check for OS support of AVX instructions + */ + if(info->ia32.avx && ia32_osxsave) { + FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); + if ((ecr & 0x6) != 0x6) + disable_avx(info); +#ifdef DEBUG + fprintf(stderr, " AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n'); +#endif } + else /* no OS AVX support*/ + disable_avx(info); #else info->use_asm = false; #endif /* - * PPC-specific + * x86-64-specific */ -#elif defined FLAC__CPU_PPC - info->type = FLAC__CPUINFO_TYPE_PPC; -# if !defined FLAC__NO_ASM +#elif defined FLAC__CPU_X86_64 + FLAC__bool x86_osxsave = false; + (void) x86_osxsave; /* to avoid warnings about unused variables */ + memset(info, 0, sizeof(*info)); + info->type = FLAC__CPUINFO_TYPE_X86_64; +#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN info->use_asm = true; -# ifdef FLAC__USE_ALTIVEC -# if defined FLAC__SYS_DARWIN { - int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT }; - size_t len = sizeof(val); - info->data.ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val); + /* http://www.sandpile.org/x86/cpuid.htm */ + FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx; + FLAC__cpu_info_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E)? true : false; /* GenuineIntel */ + FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->x86.sse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false; + info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false; + info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false; + info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false; +#if defined FLAC__AVX_SUPPORTED + x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false; + info->x86.avx = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX )? true : false; + info->x86.fma = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA )? true : false; + FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx); + info->x86.avx2 = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2 )? true : false; +#endif } - { - host_basic_info_data_t hostInfo; - mach_msg_type_number_t infoCount; - - infoCount = HOST_BASIC_INFO_COUNT; - host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount); +#ifdef DEBUG + fprintf(stderr, "CPU info (x86-64):\n"); + fprintf(stderr, " SSE3 ....... %c\n", info->x86.sse3 ? 'Y' : 'n'); + fprintf(stderr, " SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n'); + fprintf(stderr, " SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n'); + fprintf(stderr, " SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n'); +# if defined FLAC__AVX_SUPPORTED + fprintf(stderr, " AVX ........ %c\n", info->x86.avx ? 'Y' : 'n'); + fprintf(stderr, " FMA ........ %c\n", info->x86.fma ? 'Y' : 'n'); + fprintf(stderr, " AVX2 ....... %c\n", info->x86.avx2 ? 'Y' : 'n'); +# endif +#endif - info->data.ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970); + /* + * now have to check for OS support of AVX instructions + */ + if(info->x86.avx && x86_osxsave) { + FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86(); + if ((ecr & 0x6) != 0x6) + disable_avx(info); +#ifdef DEBUG + fprintf(stderr, " AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n'); +#endif } -# else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */ - { - /* no Darwin, do it the brute-force way */ - /* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */ - info->data.ppc.altivec = 0; - info->data.ppc.ppc64 = 0; - - signal (SIGILL, sigill_handler); - canjump = 0; - if (!sigsetjmp (jmpbuf, 1)) { - canjump = 1; + else /* no OS AVX support*/ + disable_avx(info); +#else + info->use_asm = false; +#endif - asm volatile ( - "mtspr 256, %0\n\t" - "vand %%v0, %%v0, %%v0" - : - : "r" (-1) - ); +/* + * unknown CPU + */ +#else + info->type = FLAC__CPUINFO_TYPE_UNKNOWN; + info->use_asm = false; +#endif +} - info->data.ppc.altivec = 1; - } - canjump = 0; - if (!sigsetjmp (jmpbuf, 1)) { - int x = 0; - canjump = 1; +#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN - /* PPC64 hardware implements the cntlzd instruction */ - asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) ); +#if defined _MSC_VER +#include <intrin.h> /* for __cpuid() and _xgetbv() */ +#elif defined __GNUC__ && defined HAVE_CPUID_H +#include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */ +#endif - info->data.ppc.ppc64 = 1; - } - signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */ +FLAC__uint32 FLAC__cpu_have_cpuid_x86(void) +{ +#ifdef FLAC__CPU_X86_64 + return 1; +#else +# if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */ + FLAC__uint32 flags1, flags2; + __asm { + pushfd + pushfd + pop eax + mov flags1, eax + xor eax, 0x200000 + push eax + popfd + pushfd + pop eax + mov flags2, eax + popfd } -# endif -# else /* !FLAC__USE_ALTIVEC */ - info->data.ppc.altivec = 0; - info->data.ppc.ppc64 = 0; -# endif + if (((flags1^flags2) & 0x200000) != 0) + return 1; + else + return 0; +# elif defined __GNUC__ && defined HAVE_CPUID_H + if (__get_cpuid_max(0, 0) != 0) + return 1; + else + return 0; # else - info->use_asm = false; + return 0; # endif +#endif +} -/* - * unknown CPI - */ +void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx) +{ +#if defined _MSC_VER || defined __INTEL_COMPILER + int cpuinfo[4]; + int ext = level & 0x80000000; + __cpuid(cpuinfo, ext); + if((unsigned)cpuinfo[0] < level) { + *eax = *ebx = *ecx = *edx = 0; + return; + } +#if defined FLAC__AVX_SUPPORTED + __cpuidex(cpuinfo, level, 0); /* for AVX2 detection */ #else - info->type = FLAC__CPUINFO_TYPE_UNKNOWN; - info->use_asm = false; + __cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */ +#endif + *eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3]; +#elif defined __GNUC__ && defined HAVE_CPUID_H + FLAC__uint32 ext = level & 0x80000000; + __cpuid(ext, *eax, *ebx, *ecx, *edx); + if (*eax < level) { + *eax = *ebx = *ecx = *edx = 0; + return; + } + __cpuid_count(level, 0, *eax, *ebx, *ecx, *edx); +#else + *eax = *ebx = *ecx = *edx = 0; #endif } + +FLAC__uint32 FLAC__cpu_xgetbv_x86(void) +{ +#if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED + return (FLAC__uint32)_xgetbv(0); +#elif defined __GNUC__ + FLAC__uint32 lo, hi; + asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0)); + return lo; +#else + return 0; +#endif +} + +#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */ |