54 #ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
55 #define INCLUDED_volk_16ic_magnitude_16i_a_H
63 #include <pmmintrin.h>
66 volk_16ic_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
68 unsigned int number = 0;
69 const unsigned int quarterPoints = num_points / 4;
72 int16_t* magnitudeVectorPtr = magnitudeVector;
74 __m128 vScalar = _mm_set_ps1(32768.0);
75 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
77 __m128 cplxValue1, cplxValue2, result;
82 for(;number < quarterPoints; number++){
84 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
85 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
86 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
87 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
89 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
90 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
91 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
92 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
94 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
95 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
97 complexVectorPtr += 8;
99 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
102 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
103 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
105 result = _mm_hadd_ps(cplxValue1, cplxValue2);
107 result = _mm_sqrt_ps(result);
109 result = _mm_mul_ps(result, vScalar);
111 _mm_store_ps(outputFloatBuffer, result);
112 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
113 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
114 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
115 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
118 number = quarterPoints * 4;
119 magnitudeVectorPtr = &magnitudeVector[number];
120 complexVectorPtr = (
const int16_t*)&complexVector[number];
121 for(; number < num_points; number++){
122 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
123 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
124 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
125 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
131 #include <xmmintrin.h>
134 volk_16ic_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
136 unsigned int number = 0;
137 const unsigned int quarterPoints = num_points / 4;
140 int16_t* magnitudeVectorPtr = magnitudeVector;
142 __m128 vScalar = _mm_set_ps1(32768.0);
143 __m128 invScalar = _mm_set_ps1(1.0/32768.0);
145 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
150 for(;number < quarterPoints; number++){
152 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
153 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
154 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
155 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
157 cplxValue1 = _mm_load_ps(inputFloatBuffer);
158 complexVectorPtr += 4;
160 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
161 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
162 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
163 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
165 cplxValue2 = _mm_load_ps(inputFloatBuffer);
166 complexVectorPtr += 4;
168 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
169 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
172 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
174 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
176 iValue = _mm_mul_ps(iValue, iValue);
177 qValue = _mm_mul_ps(qValue, qValue);
179 result = _mm_add_ps(iValue, qValue);
181 result = _mm_sqrt_ps(result);
183 result = _mm_mul_ps(result, vScalar);
185 _mm_store_ps(outputFloatBuffer, result);
186 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[0]);
187 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[1]);
188 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[2]);
189 *magnitudeVectorPtr++ = (
int16_t)(outputFloatBuffer[3]);
192 number = quarterPoints * 4;
193 magnitudeVectorPtr = &magnitudeVector[number];
194 complexVectorPtr = (
const int16_t*)&complexVector[number];
195 for(; number < num_points; number++){
196 const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
197 const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
198 const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * 32768.0;
199 *magnitudeVectorPtr++ = (
int16_t)(val1Result);
204 #ifdef LV_HAVE_GENERIC
207 volk_16ic_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
210 int16_t* magnitudeVectorPtr = magnitudeVector;
211 unsigned int number = 0;
212 const float scalar = 32768.0;
213 for(number = 0; number < num_points; number++){
214 float real = ((float)(*complexVectorPtr++)) / scalar;
215 float imag = ((float)(*complexVectorPtr++)) / scalar;
216 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
221 #ifdef LV_HAVE_ORC_DISABLED
223 volk_16ic_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
float scalar,
unsigned int num_points);
226 volk_16ic_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_16sc_t* complexVector,
unsigned int num_points)
228 volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 32768.0, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27