73 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
74 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
80 #include <immintrin.h>
82 volk_32fc_deinterleave_32f_x2_a_avx(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
83 unsigned int num_points)
85 const float* complexVectorPtr = (
float*)complexVector;
86 float* iBufferPtr = iBuffer;
87 float* qBufferPtr = qBuffer;
89 unsigned int number = 0;
91 const unsigned int eighthPoints = num_points / 8;
92 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
93 for(;number < eighthPoints; number++){
94 cplxValue1 = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
97 cplxValue2 = _mm256_load_ps(complexVectorPtr);
98 complexVectorPtr += 8;
100 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
101 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
104 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
106 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
108 _mm256_store_ps(iBufferPtr, iValue);
109 _mm256_store_ps(qBufferPtr, qValue);
115 number = eighthPoints * 8;
116 for(; number < num_points; number++){
117 *iBufferPtr++ = *complexVectorPtr++;
118 *qBufferPtr++ = *complexVectorPtr++;
124 #include <xmmintrin.h>
127 volk_32fc_deinterleave_32f_x2_a_sse(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
128 unsigned int num_points)
130 const float* complexVectorPtr = (
float*)complexVector;
131 float* iBufferPtr = iBuffer;
132 float* qBufferPtr = qBuffer;
134 unsigned int number = 0;
135 const unsigned int quarterPoints = num_points / 4;
136 __m128 cplxValue1, cplxValue2, iValue, qValue;
137 for(;number < quarterPoints; number++){
138 cplxValue1 = _mm_load_ps(complexVectorPtr);
139 complexVectorPtr += 4;
141 cplxValue2 = _mm_load_ps(complexVectorPtr);
142 complexVectorPtr += 4;
145 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
147 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
149 _mm_store_ps(iBufferPtr, iValue);
150 _mm_store_ps(qBufferPtr, qValue);
156 number = quarterPoints * 4;
157 for(; number < num_points; number++){
158 *iBufferPtr++ = *complexVectorPtr++;
159 *qBufferPtr++ = *complexVectorPtr++;
166 #include <arm_neon.h>
169 volk_32fc_deinterleave_32f_x2_neon(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
170 unsigned int num_points)
172 unsigned int number = 0;
173 unsigned int quarter_points = num_points / 4;
174 const float* complexVectorPtr = (
float*)complexVector;
175 float* iBufferPtr = iBuffer;
176 float* qBufferPtr = qBuffer;
177 float32x4x2_t complexInput;
179 for(number = 0; number < quarter_points; number++){
180 complexInput = vld2q_f32(complexVectorPtr);
181 vst1q_f32( iBufferPtr, complexInput.val[0] );
182 vst1q_f32( qBufferPtr, complexInput.val[1] );
183 complexVectorPtr += 8;
188 for(number = quarter_points*4; number < num_points; number++){
189 *iBufferPtr++ = *complexVectorPtr++;
190 *qBufferPtr++ = *complexVectorPtr++;
196 #ifdef LV_HAVE_GENERIC
199 volk_32fc_deinterleave_32f_x2_generic(
float* iBuffer,
float* qBuffer,
const lv_32fc_t* complexVector,
200 unsigned int num_points)
202 const float* complexVectorPtr = (
float*)complexVector;
203 float* iBufferPtr = iBuffer;
204 float* qBufferPtr = qBuffer;
206 for(number = 0; number < num_points; number++){
207 *iBufferPtr++ = *complexVectorPtr++;
208 *qBufferPtr++ = *complexVectorPtr++;
float complex lv_32fc_t
Definition: volk_complex.h:56