diff --git a/src/pew/vector/intrinsic.hpp b/src/pew/vector/intrinsic.hpp index 8891a75402b2f2dd7e0a88b693629677d2aa0bbe..43c0dd3b82893d511827031cad61aa1698d5fb62 100644 --- a/src/pew/vector/intrinsic.hpp +++ b/src/pew/vector/intrinsic.hpp @@ -1,6 +1,7 @@ #ifndef PEW_INTRINSIC #define PEW_INTRINSIC +#include <cstdint> #include <emmintrin.h> #include <immintrin.h> #include <smmintrin.h> @@ -8,6 +9,7 @@ #include <xmmintrin.h> namespace pew { + using imm = uint8_t; template<typename T> struct intrinsic; @@ -187,11 +189,10 @@ namespace pew { return _mm_sub_ps(left, right); } - inline static float + template<imm mask = 0xff> + inline static auto dot(const type& left, const type& right) { - alignas(alignment) float arr[4]; - store(arr, mul(left, right)); - return arr[0] + arr[1] + arr[2]; + return _mm_dp_ps(left, right, mask); } inline static auto @@ -200,10 +201,9 @@ namespace pew { } inline static auto - abs(const type& abs) { - alignas(alignment) float arr[4]; - store(arr, abs); - return set(std::abs(arr[0]), std::abs(arr[1]), std::abs(arr[2])); + abs(const type& reg) { + auto mask = _mm_cmplt_ps(reg, set(0)); + return _mm_blendv_ps(reg, sign(reg), mask); } }; } // namespace pew