GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_16ic_magnitude_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_16ic_magnitude_16i
25  *
26  * \b Overview
27  *
28  * Computes the magnitude of the complexVector and stores the results
29  * in the magnitudeVector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_16ic_magnitude_16i(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector.
38  * \li num_points: The number of samples.
39  *
40  * \b Outputs
41  * \li magnitudeVector: The magnitude of the complex values.
42  *
43  * \b Example
44  * \code
45  * int N = 10000;
46  *
47  * volk_16ic_magnitude_16i();
48  *
49  * volk_free(x);
50  * volk_free(t);
51  * \endcode
52  */
53 
54 #ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
55 #define INCLUDED_volk_16ic_magnitude_16i_a_H
56 
57 #include <volk/volk_common.h>
58 #include <inttypes.h>
59 #include <stdio.h>
60 #include <math.h>
61 
62 #ifdef LV_HAVE_SSE3
63 #include <pmmintrin.h>
64 
65 static inline void
66 volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points)
67 {
68  unsigned int number = 0;
69  const unsigned int quarterPoints = num_points / 4;
70 
71  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
72  int16_t* magnitudeVectorPtr = magnitudeVector;
73 
74  __m128 vScalar = _mm_set_ps1(32768.0);
75  __m128 invScalar = _mm_set_ps1(1.0/32768.0);
76 
77  __m128 cplxValue1, cplxValue2, result;
78 
79  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
80  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
81 
82  for(;number < quarterPoints; number++){
83 
84  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
85  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
86  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
87  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
88 
89  inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
90  inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
91  inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
92  inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
93 
94  cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
95  cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
96 
97  complexVectorPtr += 8;
98 
99  cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100  cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
101 
102  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
103  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
104 
105  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
106 
107  result = _mm_sqrt_ps(result); // Square root the values
108 
109  result = _mm_mul_ps(result, vScalar); // Scale the results
110 
111  _mm_store_ps(outputFloatBuffer, result);
112  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
113  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
114  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
115  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
116  }
117 
118  number = quarterPoints * 4;
119  magnitudeVectorPtr = &magnitudeVector[number];
120  complexVectorPtr = (const int16_t*)&complexVector[number];
121  for(; number < num_points; number++){
122  const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
123  const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
124  const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
125  *magnitudeVectorPtr++ = (int16_t)(val1Result);
126  }
127 }
128 #endif /* LV_HAVE_SSE3 */
129 
130 #ifdef LV_HAVE_SSE
131 #include <xmmintrin.h>
132 
133 static inline void
134 volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points)
135 {
136  unsigned int number = 0;
137  const unsigned int quarterPoints = num_points / 4;
138 
139  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
140  int16_t* magnitudeVectorPtr = magnitudeVector;
141 
142  __m128 vScalar = _mm_set_ps1(32768.0);
143  __m128 invScalar = _mm_set_ps1(1.0/32768.0);
144 
145  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
146 
147  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4];
148  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
149 
150  for(;number < quarterPoints; number++){
151 
152  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
153  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
154  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
155  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
156 
157  cplxValue1 = _mm_load_ps(inputFloatBuffer);
158  complexVectorPtr += 4;
159 
160  inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
161  inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
162  inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
163  inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
164 
165  cplxValue2 = _mm_load_ps(inputFloatBuffer);
166  complexVectorPtr += 4;
167 
168  cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
169  cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
170 
171  // Arrange in i1i2i3i4 format
172  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
173  // Arrange in q1q2q3q4 format
174  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
175 
176  iValue = _mm_mul_ps(iValue, iValue); // Square the I values
177  qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
178 
179  result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
180 
181  result = _mm_sqrt_ps(result); // Square root the values
182 
183  result = _mm_mul_ps(result, vScalar); // Scale the results
184 
185  _mm_store_ps(outputFloatBuffer, result);
186  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
187  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
188  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
189  *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
190  }
191 
192  number = quarterPoints * 4;
193  magnitudeVectorPtr = &magnitudeVector[number];
194  complexVectorPtr = (const int16_t*)&complexVector[number];
195  for(; number < num_points; number++){
196  const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
197  const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
198  const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
199  *magnitudeVectorPtr++ = (int16_t)(val1Result);
200  }
201 }
202 #endif /* LV_HAVE_SSE */
203 
204 #ifdef LV_HAVE_GENERIC
205 
206 static inline void
207 volk_16ic_magnitude_16i_generic(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points)
208 {
209  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
210  int16_t* magnitudeVectorPtr = magnitudeVector;
211  unsigned int number = 0;
212  const float scalar = 32768.0;
213  for(number = 0; number < num_points; number++){
214  float real = ((float)(*complexVectorPtr++)) / scalar;
215  float imag = ((float)(*complexVectorPtr++)) / scalar;
216  *magnitudeVectorPtr++ = (int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
217  }
218 }
219 #endif /* LV_HAVE_GENERIC */
220 
221 #ifdef LV_HAVE_ORC_DISABLED
222 extern void
223 volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const lv_16sc_t* complexVector, float scalar, unsigned int num_points);
224 
225 static inline void
226 volk_16ic_magnitude_16i_u_orc(int16_t* magnitudeVector, const lv_16sc_t* complexVector, unsigned int num_points)
227 {
228  volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
229 }
230 #endif /* LV_HAVE_ORC */
231 
232 
233 #endif /* INCLUDED_volk_16ic_magnitude_16i_a_H */
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27