@@ -422,14 +422,128 @@ void ffCPUDetectByCpuid(FFCPUResult* cpu)
422422
423423 #undef HAS_CAP
424424}
425+ #elif _WIN32
426+ #include <processthreadsapi.h>
427+
428+ // Missing from winnt.h of MinGW-w64
429+ #define PF_ARM_LSE2_AVAILABLE 62
430+ #define PF_RESERVED_FEATURE 63
431+ #define PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE 64
432+ #define PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE 65
433+ #define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
434+ #define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
435+ #define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
436+ #define PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE 69
437+ #define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
438+ #define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
439+ #define PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE 72
440+ #define PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE 73
441+ #define PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE 74
442+ #define PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE 75
443+ #define PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE 76
444+ #define PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE 77
445+ #define PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE 78
446+ #define PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE 79
447+ #define PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE 80
448+ #define PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE 81
449+ #define PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE 82
450+ #define PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE 83
451+ #define PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE 84
452+ #define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
453+ #define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
454+ #define PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE 87
455+ #define PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE 88
456+
457+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
458+ {
459+ // ARMv8-A
460+ bool has_vfp = IsProcessorFeaturePresent (PF_ARM_VFP_32_REGISTERS_AVAILABLE ); // Implies basic FP support
461+ bool has_neon = IsProcessorFeaturePresent (PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ); // NEON (ASIMD)
462+
463+ // ARMv8.1-A
464+ bool has_atomics = IsProcessorFeaturePresent (PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE ); // LSE atomics
465+ bool has_crc32 = IsProcessorFeaturePresent (PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE ); // CRC32
466+
467+ // ARMv8.2-A
468+ bool has_fp16 = IsProcessorFeaturePresent (PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE ); // Half-precision FP
469+
470+ // ARMv8.3-A
471+ bool has_lrcpc = IsProcessorFeaturePresent (PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE ); // LDAPR/LR with RCPC semantics
472+ bool has_jscvt = IsProcessorFeaturePresent (PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE ); // FJCVTZS
473+
474+ // ARMv8.4-A
475+ // My CPU (Apple M1 Pro in VM) does support LSE2, but Windows doesn't detect it for some reason
476+ // bool has_lse2 = IsProcessorFeaturePresent(PF_ARM_LSE2_AVAILABLE); // Large System Extensions version 2, optional from v8.2
477+ bool has_dp = IsProcessorFeaturePresent (PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE ); // DotProd, optional from v8.1 (*)
478+
479+ // ARMv9.0-A
480+ bool has_sve2 = IsProcessorFeaturePresent (PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE ); // SVE2
481+
482+ // ARMv9.1-A
483+ // ARMv8.6-A
484+ bool has_bf16 = IsProcessorFeaturePresent (PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE ); // BF16, optional from v8.2
485+ bool has_i8mm = IsProcessorFeaturePresent (PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE ); // Int8 matrix multiply, optional from v8.2
486+
487+ // ARMv8.7-A
488+ bool has_ebf16 = IsProcessorFeaturePresent (PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE ); // Extended BFloat16 behaviors, optional from v8.2
489+
490+ // ARMv9.2-A
491+ bool has_sme = IsProcessorFeaturePresent (PF_ARM_SME_INSTRUCTIONS_AVAILABLE ); // SME
492+
493+ // ARMv9.3-A
494+ bool has_sme2 = IsProcessorFeaturePresent (PF_ARM_SME2_INSTRUCTIONS_AVAILABLE ); // SME2
495+
496+ // ARMv9.4-A
497+ bool has_sme2p1 = IsProcessorFeaturePresent (PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE ); // SME2.1
498+
499+
500+ if (has_sve2 || has_sme )
501+ {
502+ // ARMv9 family
503+ if (has_sme2p1 ) {
504+ cpu -> march = "ARMv9.4-A" ;
505+ } else if (has_sme2 ) {
506+ cpu -> march = "ARMv9.3-A" ;
507+ } else if (has_sme ) {
508+ cpu -> march = "ARMv9.2-A" ;
509+ } else if (has_i8mm && has_bf16 ) {
510+ cpu -> march = "ARMv9.1-A" ;
511+ } else {
512+ cpu -> march = "ARMv9.0-A" ;
513+ }
514+ }
515+ else
516+ {
517+ // ARMv8 family
518+ if (has_ebf16 ) {
519+ cpu -> march = "ARMv8.7-A" ;
520+ } else if (has_i8mm && has_bf16 ) {
521+ cpu -> march = "ARMv8.6-A" ;
522+ } else if (has_dp ) {
523+ cpu -> march = "ARMv8.4-A" ;
524+ } else if (has_lrcpc && has_jscvt ) {
525+ cpu -> march = "ARMv8.3-A" ;
526+ } else if (has_fp16 ) {
527+ cpu -> march = "ARMv8.2-A" ;
528+ } else if (has_atomics && has_crc32 ) {
529+ cpu -> march = "ARMv8.1-A" ;
530+ } else if (has_neon && has_vfp ) {
531+ cpu -> march = "ARMv8-A" ;
532+ }
533+ }
534+ }
425535#else
426- #endif // __linux__
536+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
537+ {
538+ // Unsupported system
539+ }
540+ #endif
427541
428542#else
429543
430544void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
431545{
432- // Unsupported platform
546+ // Unsupported architecture
433547}
434548
435549#endif
0 commit comments