diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f9acbc..cd92063 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,6 +217,7 @@ if (IS_OS_WINDOWS) src/system/CPU_MSVC_AMD64.asm src/HRNG_MSVC.asm src/Math_MSVC_AMD64.asm src/io/Directory_W32.cpp include/ehs/io/Directory_W32.h include/ehs/io/socket/ICMP_W32.h src/io/socket/ICMP_W32.cpp + src/system/AVX2_MSVC_AMD64.asm src/system/AVX512_MSVC_AMD64.asm ) elseif (IS_OS_LINUX) @@ -236,6 +237,7 @@ elseif (IS_OS_LINUX) src/io/Directory_LNX.cpp include/ehs/io/Directory_LNX.h src/io/Usb_LNX.cpp include/ehs/io/Usb_LNX.h include/ehs/io/socket/ICMP_LNX.h src/io/socket/ICMP_LNX.cpp + src/system/AVX2_GCC_AMD64.asm src/system/AVX512_GCC_AMD64.asm ) #set(LINUX_WINDOW_SYSTEM "Wayland" CACHE STRING "Linux Window System") @@ -254,9 +256,9 @@ elseif (IS_OS_LINUX) endif () if (IS_ARCH_AMD64) - list(APPEND EHS_SOURCES src/system/CPU_GCC_AMD64.asm src/HRNG_GCC.asm src/Math_GCC_AMD64.asm src/system/AVX2_GCC_AMD64.asm src/system/AVX512_GCC_AMD64.asm) + list(APPEND EHS_SOURCES src/system/CPU_GCC_AMD64.asm src/HRNG_GCC.asm src/Math_GCC_AMD64.asm) elseif (IS_ARCH_ARM64) - list(APPEND EHS_SOURCES src/system/CPU_ARM64.cpp src/HRNG_ARM64.cpp src/Math_GCC_ARM64.s) + list(APPEND EHS_SOURCES src/system/CPU_ARM64.cpp src/HRNG_ARM64.cpp src/Math_GCC_ARM64.s src/system/AVX2_AARCH64.cpp src/system/AVX512_AARCH64.cpp) endif () endif() diff --git a/include/ehs/system/AVX512.h b/include/ehs/system/AVX512.h index ca13a1a..73e256d 100644 --- a/include/ehs/system/AVX512.h +++ b/include/ehs/system/AVX512.h @@ -33,4 +33,4 @@ namespace ehs /// @note The parameters "a", and "b" must have alignas(32). static bool CompareAligned(const SInt_64 *a, const SInt_64 *b); }; -} \ No newline at end of file +} diff --git a/src/Util.cpp b/src/Util.cpp index c373964..dcbbe4c 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -15,21 +15,7 @@ namespace ehs while (i < size) { - if (CPU::hasAVX512F && remainder >= 64) - { - if (!AVX512::CompareUnaligned((UInt_64*)&aBytes[i], (UInt_64*)&bBytes[i])) - return false; - - i += 64; - } - else if (CPU::hasAVX2 && remainder >= 32) - { - if (!AVX2::CompareUnaligned((UInt_64*)&aBytes[i], (UInt_64*)&bBytes[i])) - return false; - - i += 32; - } - else if (remainder >= sizeof(UInt_64)) + if (remainder >= sizeof(UInt_64)) { if (*(UInt_64*)&aBytes[i] != *(UInt_64*)&bBytes[i]) return false; diff --git a/src/system/AVX2_AARCH64.cpp b/src/system/AVX2_AARCH64.cpp new file mode 100644 index 0000000..654853b --- /dev/null +++ b/src/system/AVX2_AARCH64.cpp @@ -0,0 +1,24 @@ +#include "ehs/system/AVX2.h" + +namespace ehs +{ + bool AVX2::CompareUnaligned(const UInt_64* a, const UInt_64* b) + { + return false; + } + + bool AVX2::CompareUnaligned(const SInt_64* a, const SInt_64* b) + { + return false; + } + + bool AVX2::CompareAligned(const UInt_64* a, const UInt_64* b) + { + return false; + } + + bool AVX2::CompareAligned(const SInt_64* a, const SInt_64* b) + { + return false; + } +} \ No newline at end of file diff --git a/src/system/AVX2_MSVC_AMD64.asm b/src/system/AVX2_MSVC_AMD64.asm new file mode 100644 index 0000000..93cd8d8 --- /dev/null +++ b/src/system/AVX2_MSVC_AMD64.asm @@ -0,0 +1,49 @@ +global ?CompareUnaligned@AVX2@ehs@@SA_NPEBK0@Z +global ?CompareUnaligned@AVX2@ehs@@SA_NPEBJ0@Z +global ?CompareAligned@AVX2@ehs@@SA_NPEBK0@Z +global ?CompareAligned@AVX2@ehs@@SA_NPEBJ0@Z + +section .text + ?CompareUnaligned@AVX2@ehs@@SA_NPEBK0@Z: + VMOVDQU YMM0, [RCX] + VMOVDQU YMM1, [RDX] + VPCMPEQQ YMM2, YMM0, YMM1 + + VPMOVMSKB EAX, YMM2 + CMP EAX, 0xFFFFFFFF + SETE AL + + RET + + ?CompareUnaligned@AVX2@ehs@@SA_NPEBJ0@Z: + VMOVDQU YMM0, [RCX] + VMOVDQU YMM1, [RDX] + VPCMPEQQ YMM2, YMM0, YMM1 + + VPMOVMSKB EAX, YMM2 + CMP EAX, 0xFFFFFFFF + SETE AL + + RET + + ?CompareAligned@AVX2@ehs@@SA_NPEBK0@Z: + VMOVDQA YMM0, [RCX] + VMOVDQA YMM1, [RDX] + VPCMPEQQ YMM2, YMM0, YMM1 + + VPMOVMSKB EAX, YMM2 + CMP EAX, 0xFFFFFFFF + SETE AL + + RET + + ?CompareAligned@AVX2@ehs@@SA_NPEBJ0@Z: + VMOVDQA YMM0, [RCX] + VMOVDQA YMM1, [RDX] + VPCMPEQQ YMM2, YMM0, YMM1 + + VPMOVMSKB EAX, YMM2 + CMP EAX, 0xFFFFFFFF + SETE AL + + RET \ No newline at end of file diff --git a/src/system/AVX512_AARCH64.cpp b/src/system/AVX512_AARCH64.cpp new file mode 100644 index 0000000..46890be --- /dev/null +++ b/src/system/AVX512_AARCH64.cpp @@ -0,0 +1,24 @@ +#include "ehs/system/AVX512.h" + +namespace ehs +{ + bool AVX512::CompareUnaligned(const UInt_64* a, const UInt_64* b) + { + return false; + } + + bool AVX512::CompareUnaligned(const SInt_64* a, const SInt_64* b) + { + return false; + } + + bool AVX512::CompareAligned(const UInt_64* a, const UInt_64* b) + { + return false; + } + + bool AVX512::CompareAligned(const SInt_64* a, const SInt_64* b) + { + return false; + } +} \ No newline at end of file diff --git a/src/system/AVX512_MSVC_AMD64.asm b/src/system/AVX512_MSVC_AMD64.asm new file mode 100644 index 0000000..49321cb --- /dev/null +++ b/src/system/AVX512_MSVC_AMD64.asm @@ -0,0 +1,49 @@ +global ?CompareUnaligned@AVX512@ehs@@SA_NPEBK0@Z +global ?CompareUnaligned@AVX512@ehs@@SA_NPEBJ0@Z +global ?CompareAligned@AVX512@ehs@@SA_NPEBK0@Z +global ?CompareAligned@AVX512@ehs@@SA_NPEBJ0@Z + +section .text + ?CompareUnaligned@AVX512@ehs@@SA_NPEBK0@Z: + VMOVDQU64 ZMM0, [RCX] + VMOVDQU64 ZMM1, [RDX] + + VPCMPEQQ K1, ZMM0, ZMM1 + + KORTESTQ K1, K1 + SETC AL + + RET + + ?CompareUnaligned@AVX512@ehs@@SA_NPEBJ0@Z: + VMOVDQU64 ZMM0, [RCX] + VMOVDQU64 ZMM1, [RDX] + + VPCMPEQQ K1, ZMM0, ZMM1 + + KORTESTQ K1, K1 + SETC AL + + RET + + ?CompareAligned@AVX512@ehs@@SA_NPEBK0@Z: + VMOVDQA64 ZMM0, [RCX] + VMOVDQA64 ZMM1, [RDX] + + VPCMPEQQ K1, ZMM0, ZMM1 + + KORTESTQ K1, K1 + SETC AL + + RET + + ?CompareAligned@AVX512@ehs@@SA_NPEBJ0@Z: + VMOVDQA64 ZMM0, [RCX] + VMOVDQA64 ZMM1, [RDX] + + VPCMPEQQ K1, ZMM0, ZMM1 + + KORTESTQ K1, K1 + SETC AL + + RET \ No newline at end of file diff --git a/src/system/CPU.cpp b/src/system/CPU.cpp index 7cc8c75..935a628 100644 --- a/src/system/CPU.cpp +++ b/src/system/CPU.cpp @@ -744,7 +744,7 @@ namespace ehs bool CPU::RetrieveAVX512F() { #ifdef EHS_ARCH_X64 - return GetExtFeatureBits_1() & 0b00000000000000001000000000000000; + return GetExtFeatureBits_1() & 0b00000000000000010000000000000000; #else return false; #endif