56 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
57 #define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
64 #include <xmmintrin.h>
67 void volk_16ic_s32f_deinterleave_32f_x2_a_sse(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 float* iBufferPtr = iBuffer;
71 float* qBufferPtr = qBuffer;
74 const uint64_t quarterPoints = num_points / 4;
75 __m128 cplxValue1, cplxValue2, iValue, qValue;
77 __m128 invScalar = _mm_set_ps1(1.0/scalar);
82 for(;number < quarterPoints; number++){
84 floatBuffer[0] = (float)(complexVectorPtr[0]);
85 floatBuffer[1] = (float)(complexVectorPtr[1]);
86 floatBuffer[2] = (float)(complexVectorPtr[2]);
87 floatBuffer[3] = (float)(complexVectorPtr[3]);
89 floatBuffer[4] = (float)(complexVectorPtr[4]);
90 floatBuffer[5] = (float)(complexVectorPtr[5]);
91 floatBuffer[6] = (float)(complexVectorPtr[6]);
92 floatBuffer[7] = (float)(complexVectorPtr[7]);
94 cplxValue1 = _mm_load_ps(&floatBuffer[0]);
95 cplxValue2 = _mm_load_ps(&floatBuffer[4]);
97 complexVectorPtr += 8;
99 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
103 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
105 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
107 _mm_store_ps(iBufferPtr, iValue);
108 _mm_store_ps(qBufferPtr, qValue);
114 number = quarterPoints * 4;
115 complexVectorPtr = (
int16_t*)&complexVector[number];
116 for(; number < num_points; number++){
117 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
118 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
123 #ifdef LV_HAVE_GENERIC
126 volk_16ic_s32f_deinterleave_32f_x2_generic(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
127 const float scalar,
unsigned int num_points)
130 float* iBufferPtr = iBuffer;
131 float* qBufferPtr = qBuffer;
133 for(number = 0; number < num_points; number++){
134 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
135 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
143 volk_16ic_s32f_deinterleave_32f_x2_neon(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
144 const float scalar,
unsigned int num_points)
147 float* iBufferPtr = iBuffer;
148 float* qBufferPtr = qBuffer;
149 unsigned int eighth_points = num_points / 4;
151 float iScalar = 1.f/scalar;
152 float32x4_t invScalar;
153 invScalar = vld1q_dup_f32(&iScalar);
155 int16x4x2_t complexInput_s16;
156 int32x4x2_t complexInput_s32;
157 float32x4x2_t complexFloat;
159 for(number = 0; number < eighth_points; number++){
160 complexInput_s16 = vld2_s16(complexVectorPtr);
161 complexInput_s32.val[0] = vmovl_s16(complexInput_s16.val[0]);
162 complexInput_s32.val[1] = vmovl_s16(complexInput_s16.val[1]);
163 complexFloat.val[0] = vcvtq_f32_s32(complexInput_s32.val[0]);
164 complexFloat.val[1] = vcvtq_f32_s32(complexInput_s32.val[1]);
165 complexFloat.val[0] = vmulq_f32(complexFloat.val[0], invScalar);
166 complexFloat.val[1] = vmulq_f32(complexFloat.val[1], invScalar);
167 vst1q_f32(iBufferPtr, complexFloat.val[0]);
168 vst1q_f32(qBufferPtr, complexFloat.val[1]);
169 complexVectorPtr += 8;
174 for(number = eighth_points*4; number < num_points; number++){
175 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
176 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
183 volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
184 const float scalar,
unsigned int num_points);
187 volk_16ic_s32f_deinterleave_32f_x2_u_orc(
float* iBuffer,
float* qBuffer,
const lv_16sc_t* complexVector,
188 const float scalar,
unsigned int num_points)
190 volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, complexVector, scalar, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
unsigned __int64 uint64_t
Definition: stdint.h:90
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27