From 7ac1274e2c5a14402992926839ff967662ad53b9 Mon Sep 17 00:00:00 2001 From: Evy Garden <evysgarden@protonmail.com> Date: Wed, 5 Oct 2022 14:32:38 +0200 Subject: [PATCH] updated intrinsics --- src/pew/vector/intrinsic.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pew/vector/intrinsic.hpp b/src/pew/vector/intrinsic.hpp index 8891a75..43c0dd3 100644 --- a/src/pew/vector/intrinsic.hpp +++ b/src/pew/vector/intrinsic.hpp @@ -1,6 +1,7 @@ #ifndef PEW_INTRINSIC #define PEW_INTRINSIC +#include <cstdint> #include <emmintrin.h> #include <immintrin.h> #include <smmintrin.h> @@ -8,6 +9,7 @@ #include <xmmintrin.h> namespace pew { + using imm = uint8_t; template<typename T> struct intrinsic; @@ -187,11 +189,10 @@ namespace pew { return _mm_sub_ps(left, right); } - inline static float + template<imm mask = 0xff> + inline static auto dot(const type& left, const type& right) { - alignas(alignment) float arr[4]; - store(arr, mul(left, right)); - return arr[0] + arr[1] + arr[2]; + return _mm_dp_ps(left, right, mask); } inline static auto @@ -200,10 +201,9 @@ namespace pew { } inline static auto - abs(const type& abs) { - alignas(alignment) float arr[4]; - store(arr, abs); - return set(std::abs(arr[0]), std::abs(arr[1]), std::abs(arr[2])); + abs(const type& reg) { + auto mask = _mm_cmplt_ps(reg, set(0)); + return _mm_blendv_ps(reg, sign(reg), mask); } }; } // namespace pew -- GitLab