54 #ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H
55 #define INCLUDED_volk_16ic_deinterleave_real_8i_a_H
61 #include <tmmintrin.h>
64 volk_16ic_deinterleave_real_8i_a_ssse3(
int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
66 unsigned int number = 0;
68 int8_t* iBufferPtr = iBuffer;
69 __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
70 __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
71 __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
73 unsigned int sixteenthPoints = num_points / 16;
75 for(number = 0; number < sixteenthPoints; number++){
76 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
77 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
79 complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
80 complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
82 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
83 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
85 complexVal1 = _mm_or_si128(complexVal1, complexVal2);
87 complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
88 complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);
90 complexVal3 = _mm_or_si128(complexVal3, complexVal4);
93 complexVal1 = _mm_srai_epi16(complexVal1, 8);
94 complexVal3 = _mm_srai_epi16(complexVal3, 8);
96 iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);
98 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
103 number = sixteenthPoints * 16;
105 for(; number < num_points; number++){
106 *iBufferPtr++ = ((
int8_t)(*int16ComplexVectorPtr++ >> 8));
107 int16ComplexVectorPtr++;
112 #ifdef LV_HAVE_GENERIC
115 volk_16ic_deinterleave_real_8i_generic(
int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
117 unsigned int number = 0;
119 int8_t* iBufferPtr = iBuffer;
120 for(number = 0; number < num_points; number++){
121 *iBufferPtr++ = ((
int8_t)(*complexVectorPtr++ >> 8));
128 #include <arm_neon.h>
131 volk_16ic_deinterleave_real_8i_neon(
int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
134 int8_t* iBufferPtr = iBuffer;
135 unsigned int eighth_points = num_points / 8;
138 int16x8x2_t complexInput;
140 for(number = 0; number < eighth_points; number++){
141 complexInput = vld2q_s16(complexVectorPtr);
142 realOutput = vshrn_n_s16(complexInput.val[0], 8);
143 vst1_s8(iBufferPtr, realOutput);
144 complexVectorPtr += 16;
148 for(number = eighth_points*8; number < num_points; number++){
149 *iBufferPtr++ = ((
int8_t)(*complexVectorPtr++ >> 8));
158 volk_16ic_deinterleave_real_8i_a_orc_impl(
int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points);
161 volk_16ic_deinterleave_real_8i_u_orc(
int8_t* iBuffer,
const lv_16sc_t* complexVector,
unsigned int num_points)
163 volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75