Skip to content
Snippets Groups Projects
Commit 7ac1274e authored by Evy Storozhenko's avatar Evy Storozhenko
Browse files

updated intrinsics

parent c222159c
No related branches found
No related tags found
No related merge requests found
#ifndef PEW_INTRINSIC #ifndef PEW_INTRINSIC
#define PEW_INTRINSIC #define PEW_INTRINSIC
#include <cstdint>
#include <emmintrin.h> #include <emmintrin.h>
#include <immintrin.h> #include <immintrin.h>
#include <smmintrin.h> #include <smmintrin.h>
...@@ -8,6 +9,7 @@ ...@@ -8,6 +9,7 @@
#include <xmmintrin.h> #include <xmmintrin.h>
namespace pew { namespace pew {
using imm = uint8_t;
template<typename T> template<typename T>
struct intrinsic; struct intrinsic;
...@@ -187,11 +189,10 @@ namespace pew { ...@@ -187,11 +189,10 @@ namespace pew {
return _mm_sub_ps(left, right); return _mm_sub_ps(left, right);
} }
inline static float template<imm mask = 0xff>
inline static auto
dot(const type& left, const type& right) { dot(const type& left, const type& right) {
alignas(alignment) float arr[4]; return _mm_dp_ps(left, right, mask);
store(arr, mul(left, right));
return arr[0] + arr[1] + arr[2];
} }
inline static auto inline static auto
...@@ -200,10 +201,9 @@ namespace pew { ...@@ -200,10 +201,9 @@ namespace pew {
} }
inline static auto inline static auto
abs(const type& abs) { abs(const type& reg) {
alignas(alignment) float arr[4]; auto mask = _mm_cmplt_ps(reg, set(0));
store(arr, abs); return _mm_blendv_ps(reg, sign(reg), mask);
return set(std::abs(arr[0]), std::abs(arr[1]), std::abs(arr[2]));
} }
}; };
} // namespace pew } // namespace pew
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment