GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32fc_magnitude_squared_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32fc_magnitude_squared_32f
25  *
26  * \b Overview
27  *
28  * Calculates the magnitude squared of the complexVector and stores
29  * the results in the magnitudeVector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32fc_magnitude_squared_32f(float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector.
38  * \li num_points: The number of samples.
39  *
40  * \b Outputs
41  * \li magnitudeVector: The output value.
42  *
43  * \b Example
44  * Calculate the magnitude squared of \f$x^2 + x\f$ for points around the unit circle.
45  * \code
46  * int N = 10;
47  * unsigned int alignment = volk_get_alignment();
48  * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
49  * float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
50  *
51  * for(unsigned int ii = 0; ii < N/2; ++ii){
52  * float real = 2.f * ((float)ii / (float)N) - 1.f;
53  * float imag = std::sqrt(1.f - real * real);
54  * in[ii] = lv_cmake(real, imag);
55  * in[ii] = in[ii] * in[ii] + in[ii];
56  * in[N-ii] = lv_cmake(real, imag);
57  * in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
58  * }
59  *
60  * volk_32fc_magnitude_32f(magnitude, in, N);
61  *
62  * for(unsigned int ii = 0; ii < N; ++ii){
63  * printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
64  * }
65  *
66  * volk_free(in);
67  * volk_free(magnitude);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
72 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <math.h>
77 
78 #ifdef LV_HAVE_AVX
79 #include <immintrin.h>
80 
81 static inline void
82 volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
83  unsigned int num_points)
84 {
85  unsigned int number = 0;
86  const unsigned int eighthPoints = num_points / 8;
87 
88  const float* complexVectorPtr = (float*)complexVector;
89  float* magnitudeVectorPtr = magnitudeVector;
90 
91  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
92  for(;number < eighthPoints; number++){
93  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
94  complexVectorPtr += 8;
95 
96  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
97  complexVectorPtr += 8;
98 
99  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
100  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
101 
102  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
103  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
104 
105  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
106 
107  _mm256_storeu_ps(magnitudeVectorPtr, result);
108  magnitudeVectorPtr += 8;
109  }
110 
111  number = eighthPoints * 8;
112  for(; number < num_points; number++){
113  float val1Real = *complexVectorPtr++;
114  float val1Imag = *complexVectorPtr++;
115  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
116  }
117 }
118 #endif /* LV_HAVE_AVX */
119 
120 
121 #ifdef LV_HAVE_SSE3
122 #include <pmmintrin.h>
123 
124 static inline void
125 volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
126  unsigned int num_points)
127 {
128  unsigned int number = 0;
129  const unsigned int quarterPoints = num_points / 4;
130 
131  const float* complexVectorPtr = (float*)complexVector;
132  float* magnitudeVectorPtr = magnitudeVector;
133 
134  __m128 cplxValue1, cplxValue2, result;
135  for(;number < quarterPoints; number++){
136  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
137  complexVectorPtr += 4;
138 
139  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
140  complexVectorPtr += 4;
141 
142  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
143  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
144 
145  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
146 
147  _mm_storeu_ps(magnitudeVectorPtr, result);
148  magnitudeVectorPtr += 4;
149  }
150 
151  number = quarterPoints * 4;
152  for(; number < num_points; number++){
153  float val1Real = *complexVectorPtr++;
154  float val1Imag = *complexVectorPtr++;
155  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
156  }
157 }
158 #endif /* LV_HAVE_SSE3 */
159 
160 
161 #ifdef LV_HAVE_SSE
162 #include <xmmintrin.h>
163 
164 static inline void
165 volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
166  unsigned int num_points)
167 {
168  unsigned int number = 0;
169  const unsigned int quarterPoints = num_points / 4;
170 
171  const float* complexVectorPtr = (float*)complexVector;
172  float* magnitudeVectorPtr = magnitudeVector;
173 
174  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
175  for(;number < quarterPoints; number++){
176  cplxValue1 = _mm_loadu_ps(complexVectorPtr);
177  complexVectorPtr += 4;
178 
179  cplxValue2 = _mm_loadu_ps(complexVectorPtr);
180  complexVectorPtr += 4;
181 
182  // Arrange in i1i2i3i4 format
183  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
184  // Arrange in q1q2q3q4 format
185  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
186 
187  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
188  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
189 
190  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
191 
192  _mm_storeu_ps(magnitudeVectorPtr, result);
193  magnitudeVectorPtr += 4;
194  }
195 
196  number = quarterPoints * 4;
197  for(; number < num_points; number++){
198  float val1Real = *complexVectorPtr++;
199  float val1Imag = *complexVectorPtr++;
200  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
201  }
202 }
203 #endif /* LV_HAVE_SSE */
204 
205 
206 #ifdef LV_HAVE_GENERIC
207 
208 static inline void
209 volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
210  unsigned int num_points)
211 {
212  const float* complexVectorPtr = (float*)complexVector;
213  float* magnitudeVectorPtr = magnitudeVector;
214  unsigned int number = 0;
215  for(number = 0; number < num_points; number++){
216  const float real = *complexVectorPtr++;
217  const float imag = *complexVectorPtr++;
218  *magnitudeVectorPtr++ = (real*real) + (imag*imag);
219  }
220 }
221 #endif /* LV_HAVE_GENERIC */
222 
223 
224 
225 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
226 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
227 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
228 
229 #include <inttypes.h>
230 #include <stdio.h>
231 #include <math.h>
232 
233 #ifdef LV_HAVE_AVX
234 #include <immintrin.h>
235 
236 static inline void
237 volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector, const lv_32fc_t* complexVector,
238  unsigned int num_points)
239 {
240  unsigned int number = 0;
241  const unsigned int eighthPoints = num_points / 8;
242 
243  const float* complexVectorPtr = (float*)complexVector;
244  float* magnitudeVectorPtr = magnitudeVector;
245 
246  __m256 cplxValue1, cplxValue2, complex1, complex2, result;
247  for(;number < eighthPoints; number++){
248  cplxValue1 = _mm256_load_ps(complexVectorPtr);
249  complexVectorPtr += 8;
250 
251  cplxValue2 = _mm256_load_ps(complexVectorPtr);
252  complexVectorPtr += 8;
253 
254  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
255  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
256 
257  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
258  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
259 
260  result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
261 
262  _mm256_store_ps(magnitudeVectorPtr, result);
263  magnitudeVectorPtr += 8;
264  }
265 
266  number = eighthPoints * 8;
267  for(; number < num_points; number++){
268  float val1Real = *complexVectorPtr++;
269  float val1Imag = *complexVectorPtr++;
270  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
271  }
272 }
273 #endif /* LV_HAVE_AVX */
274 
275 
276 #ifdef LV_HAVE_SSE3
277 #include <pmmintrin.h>
278 
279 static inline void
280 volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector, const lv_32fc_t* complexVector,
281  unsigned int num_points)
282 {
283  unsigned int number = 0;
284  const unsigned int quarterPoints = num_points / 4;
285 
286  const float* complexVectorPtr = (float*)complexVector;
287  float* magnitudeVectorPtr = magnitudeVector;
288 
289  __m128 cplxValue1, cplxValue2, result;
290  for(;number < quarterPoints; number++){
291  cplxValue1 = _mm_load_ps(complexVectorPtr);
292  complexVectorPtr += 4;
293 
294  cplxValue2 = _mm_load_ps(complexVectorPtr);
295  complexVectorPtr += 4;
296 
297  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
298  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
299 
300  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
301 
302  _mm_store_ps(magnitudeVectorPtr, result);
303  magnitudeVectorPtr += 4;
304  }
305 
306  number = quarterPoints * 4;
307  for(; number < num_points; number++){
308  float val1Real = *complexVectorPtr++;
309  float val1Imag = *complexVectorPtr++;
310  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
311  }
312 }
313 #endif /* LV_HAVE_SSE3 */
314 
315 
316 #ifdef LV_HAVE_SSE
317 #include <xmmintrin.h>
318 
319 static inline void
320 volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector, const lv_32fc_t* complexVector,
321  unsigned int num_points)
322 {
323  unsigned int number = 0;
324  const unsigned int quarterPoints = num_points / 4;
325 
326  const float* complexVectorPtr = (float*)complexVector;
327  float* magnitudeVectorPtr = magnitudeVector;
328 
329  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
330  for(;number < quarterPoints; number++){
331  cplxValue1 = _mm_load_ps(complexVectorPtr);
332  complexVectorPtr += 4;
333 
334  cplxValue2 = _mm_load_ps(complexVectorPtr);
335  complexVectorPtr += 4;
336 
337  // Arrange in i1i2i3i4 format
338  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
339  // Arrange in q1q2q3q4 format
340  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
341 
342  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
343  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
344 
345  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
346 
347  _mm_store_ps(magnitudeVectorPtr, result);
348  magnitudeVectorPtr += 4;
349  }
350 
351  number = quarterPoints * 4;
352  for(; number < num_points; number++){
353  float val1Real = *complexVectorPtr++;
354  float val1Imag = *complexVectorPtr++;
355  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
356  }
357 }
358 #endif /* LV_HAVE_SSE */
359 
360 
361 #ifdef LV_HAVE_NEON
362 #include <arm_neon.h>
363 
364 static inline void
365 volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector, const lv_32fc_t* complexVector,
366  unsigned int num_points)
367 {
368  unsigned int number = 0;
369  const unsigned int quarterPoints = num_points / 4;
370 
371  const float* complexVectorPtr = (float*)complexVector;
372  float* magnitudeVectorPtr = magnitudeVector;
373 
374  float32x4x2_t cmplx_val;
375  float32x4_t result;
376  for(;number < quarterPoints; number++){
377  cmplx_val = vld2q_f32(complexVectorPtr);
378  complexVectorPtr += 8;
379 
380  cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
381  cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
382 
383  result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
384 
385  vst1q_f32(magnitudeVectorPtr, result);
386  magnitudeVectorPtr += 4;
387  }
388 
389  number = quarterPoints * 4;
390  for(; number < num_points; number++){
391  float val1Real = *complexVectorPtr++;
392  float val1Imag = *complexVectorPtr++;
393  *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
394  }
395 }
396 #endif /* LV_HAVE_NEON */
397 
398 
399 #ifdef LV_HAVE_GENERIC
400 
401 static inline void
402 volk_32fc_magnitude_squared_32f_a_generic(float* magnitudeVector, const lv_32fc_t* complexVector,
403  unsigned int num_points)
404 {
405  const float* complexVectorPtr = (float*)complexVector;
406  float* magnitudeVectorPtr = magnitudeVector;
407  unsigned int number = 0;
408  for(number = 0; number < num_points; number++){
409  const float real = *complexVectorPtr++;
410  const float imag = *complexVectorPtr++;
411  *magnitudeVectorPtr++ = (real*real) + (imag*imag);
412  }
413 }
414 #endif /* LV_HAVE_GENERIC */
415 
416 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
float complex lv_32fc_t
Definition: volk_complex.h:56