71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
79 #include <immintrin.h>
82 volk_32fc_magnitude_squared_32f_u_avx(
float* magnitudeVector,
const lv_32fc_t* complexVector,
83 unsigned int num_points)
85 unsigned int number = 0;
86 const unsigned int eighthPoints = num_points / 8;
88 const float* complexVectorPtr = (
float*)complexVector;
89 float* magnitudeVectorPtr = magnitudeVector;
91 __m256 cplxValue1, cplxValue2, complex1, complex2, result;
92 for(;number < eighthPoints; number++){
93 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
94 complexVectorPtr += 8;
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
97 complexVectorPtr += 8;
99 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
100 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
102 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
103 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
105 result = _mm256_hadd_ps(complex1, complex2);
107 _mm256_storeu_ps(magnitudeVectorPtr, result);
108 magnitudeVectorPtr += 8;
111 number = eighthPoints * 8;
112 for(; number < num_points; number++){
113 float val1Real = *complexVectorPtr++;
114 float val1Imag = *complexVectorPtr++;
115 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
122 #include <pmmintrin.h>
125 volk_32fc_magnitude_squared_32f_u_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
126 unsigned int num_points)
128 unsigned int number = 0;
129 const unsigned int quarterPoints = num_points / 4;
131 const float* complexVectorPtr = (
float*)complexVector;
132 float* magnitudeVectorPtr = magnitudeVector;
134 __m128 cplxValue1, cplxValue2, result;
135 for(;number < quarterPoints; number++){
136 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
137 complexVectorPtr += 4;
139 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
140 complexVectorPtr += 4;
142 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
143 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
145 result = _mm_hadd_ps(cplxValue1, cplxValue2);
147 _mm_storeu_ps(magnitudeVectorPtr, result);
148 magnitudeVectorPtr += 4;
151 number = quarterPoints * 4;
152 for(; number < num_points; number++){
153 float val1Real = *complexVectorPtr++;
154 float val1Imag = *complexVectorPtr++;
155 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
162 #include <xmmintrin.h>
165 volk_32fc_magnitude_squared_32f_u_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
166 unsigned int num_points)
168 unsigned int number = 0;
169 const unsigned int quarterPoints = num_points / 4;
171 const float* complexVectorPtr = (
float*)complexVector;
172 float* magnitudeVectorPtr = magnitudeVector;
174 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
175 for(;number < quarterPoints; number++){
176 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
177 complexVectorPtr += 4;
179 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
180 complexVectorPtr += 4;
183 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
185 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
187 iValue = _mm_mul_ps(iValue, iValue);
188 qValue = _mm_mul_ps(qValue, qValue);
190 result = _mm_add_ps(iValue, qValue);
192 _mm_storeu_ps(magnitudeVectorPtr, result);
193 magnitudeVectorPtr += 4;
196 number = quarterPoints * 4;
197 for(; number < num_points; number++){
198 float val1Real = *complexVectorPtr++;
199 float val1Imag = *complexVectorPtr++;
200 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
206 #ifdef LV_HAVE_GENERIC
209 volk_32fc_magnitude_squared_32f_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
210 unsigned int num_points)
212 const float* complexVectorPtr = (
float*)complexVector;
213 float* magnitudeVectorPtr = magnitudeVector;
214 unsigned int number = 0;
215 for(number = 0; number < num_points; number++){
216 const float real = *complexVectorPtr++;
217 const float imag = *complexVectorPtr++;
218 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
226 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
227 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
234 #include <immintrin.h>
237 volk_32fc_magnitude_squared_32f_a_avx(
float* magnitudeVector,
const lv_32fc_t* complexVector,
238 unsigned int num_points)
240 unsigned int number = 0;
241 const unsigned int eighthPoints = num_points / 8;
243 const float* complexVectorPtr = (
float*)complexVector;
244 float* magnitudeVectorPtr = magnitudeVector;
246 __m256 cplxValue1, cplxValue2, complex1, complex2, result;
247 for(;number < eighthPoints; number++){
248 cplxValue1 = _mm256_load_ps(complexVectorPtr);
249 complexVectorPtr += 8;
251 cplxValue2 = _mm256_load_ps(complexVectorPtr);
252 complexVectorPtr += 8;
254 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
255 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
257 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
258 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
260 result = _mm256_hadd_ps(complex1, complex2);
262 _mm256_store_ps(magnitudeVectorPtr, result);
263 magnitudeVectorPtr += 8;
266 number = eighthPoints * 8;
267 for(; number < num_points; number++){
268 float val1Real = *complexVectorPtr++;
269 float val1Imag = *complexVectorPtr++;
270 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
277 #include <pmmintrin.h>
280 volk_32fc_magnitude_squared_32f_a_sse3(
float* magnitudeVector,
const lv_32fc_t* complexVector,
281 unsigned int num_points)
283 unsigned int number = 0;
284 const unsigned int quarterPoints = num_points / 4;
286 const float* complexVectorPtr = (
float*)complexVector;
287 float* magnitudeVectorPtr = magnitudeVector;
289 __m128 cplxValue1, cplxValue2, result;
290 for(;number < quarterPoints; number++){
291 cplxValue1 = _mm_load_ps(complexVectorPtr);
292 complexVectorPtr += 4;
294 cplxValue2 = _mm_load_ps(complexVectorPtr);
295 complexVectorPtr += 4;
297 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
298 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
300 result = _mm_hadd_ps(cplxValue1, cplxValue2);
302 _mm_store_ps(magnitudeVectorPtr, result);
303 magnitudeVectorPtr += 4;
306 number = quarterPoints * 4;
307 for(; number < num_points; number++){
308 float val1Real = *complexVectorPtr++;
309 float val1Imag = *complexVectorPtr++;
310 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
317 #include <xmmintrin.h>
320 volk_32fc_magnitude_squared_32f_a_sse(
float* magnitudeVector,
const lv_32fc_t* complexVector,
321 unsigned int num_points)
323 unsigned int number = 0;
324 const unsigned int quarterPoints = num_points / 4;
326 const float* complexVectorPtr = (
float*)complexVector;
327 float* magnitudeVectorPtr = magnitudeVector;
329 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
330 for(;number < quarterPoints; number++){
331 cplxValue1 = _mm_load_ps(complexVectorPtr);
332 complexVectorPtr += 4;
334 cplxValue2 = _mm_load_ps(complexVectorPtr);
335 complexVectorPtr += 4;
338 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
340 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
342 iValue = _mm_mul_ps(iValue, iValue);
343 qValue = _mm_mul_ps(qValue, qValue);
345 result = _mm_add_ps(iValue, qValue);
347 _mm_store_ps(magnitudeVectorPtr, result);
348 magnitudeVectorPtr += 4;
351 number = quarterPoints * 4;
352 for(; number < num_points; number++){
353 float val1Real = *complexVectorPtr++;
354 float val1Imag = *complexVectorPtr++;
355 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
362 #include <arm_neon.h>
365 volk_32fc_magnitude_squared_32f_neon(
float* magnitudeVector,
const lv_32fc_t* complexVector,
366 unsigned int num_points)
368 unsigned int number = 0;
369 const unsigned int quarterPoints = num_points / 4;
371 const float* complexVectorPtr = (
float*)complexVector;
372 float* magnitudeVectorPtr = magnitudeVector;
374 float32x4x2_t cmplx_val;
376 for(;number < quarterPoints; number++){
377 cmplx_val = vld2q_f32(complexVectorPtr);
378 complexVectorPtr += 8;
380 cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]);
381 cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]);
383 result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]);
385 vst1q_f32(magnitudeVectorPtr, result);
386 magnitudeVectorPtr += 4;
389 number = quarterPoints * 4;
390 for(; number < num_points; number++){
391 float val1Real = *complexVectorPtr++;
392 float val1Imag = *complexVectorPtr++;
393 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
399 #ifdef LV_HAVE_GENERIC
402 volk_32fc_magnitude_squared_32f_a_generic(
float* magnitudeVector,
const lv_32fc_t* complexVector,
403 unsigned int num_points)
405 const float* complexVectorPtr = (
float*)complexVector;
406 float* magnitudeVectorPtr = magnitudeVector;
407 unsigned int number = 0;
408 for(number = 0; number < num_points; number++){
409 const float real = *complexVectorPtr++;
410 const float imag = *complexVectorPtr++;
411 *magnitudeVectorPtr++ = (real*real) + (imag*imag);
float complex lv_32fc_t
Definition: volk_complex.h:56