73 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
74 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
82 #include <pmmintrin.h>
85 volk_32fc_s32f_magnitude_16i_a_sse3(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
86 const float scalar,
unsigned int num_points)
88 unsigned int number = 0;
89 const unsigned int quarterPoints = num_points / 4;
91 const float* complexVectorPtr = (
const float*)complexVector;
92 int16_t* magnitudeVectorPtr = magnitudeVector;
94 __m128 vScalar = _mm_set_ps1(scalar);
96 __m128 cplxValue1, cplxValue2, result;
100 for(;number < quarterPoints; number++){
101 cplxValue1 = _mm_load_ps(complexVectorPtr);
102 complexVectorPtr += 4;
104 cplxValue2 = _mm_load_ps(complexVectorPtr);
105 complexVectorPtr += 4;
107 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
108 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
110 result = _mm_hadd_ps(cplxValue1, cplxValue2);
112 result = _mm_sqrt_ps(result);
114 result = _mm_mul_ps(result, vScalar);
116 _mm_store_ps(floatBuffer, result);
117 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
118 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
119 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
120 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
123 number = quarterPoints * 4;
124 magnitudeVectorPtr = &magnitudeVector[number];
125 for(; number < num_points; number++){
126 float val1Real = *complexVectorPtr++;
127 float val1Imag = *complexVectorPtr++;
128 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
135 #include <xmmintrin.h>
138 volk_32fc_s32f_magnitude_16i_a_sse(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
139 const float scalar,
unsigned int num_points)
141 unsigned int number = 0;
142 const unsigned int quarterPoints = num_points / 4;
144 const float* complexVectorPtr = (
const float*)complexVector;
145 int16_t* magnitudeVectorPtr = magnitudeVector;
147 __m128 vScalar = _mm_set_ps1(scalar);
149 __m128 cplxValue1, cplxValue2, iValue, qValue, result;
153 for(;number < quarterPoints; number++){
154 cplxValue1 = _mm_load_ps(complexVectorPtr);
155 complexVectorPtr += 4;
157 cplxValue2 = _mm_load_ps(complexVectorPtr);
158 complexVectorPtr += 4;
161 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
163 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
165 iValue = _mm_mul_ps(iValue, iValue);
166 qValue = _mm_mul_ps(qValue, qValue);
168 result = _mm_add_ps(iValue, qValue);
170 result = _mm_sqrt_ps(result);
172 result = _mm_mul_ps(result, vScalar);
174 _mm_store_ps(floatBuffer, result);
175 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[0]);
176 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[1]);
177 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[2]);
178 *magnitudeVectorPtr++ = (
int16_t)(floatBuffer[3]);
181 number = quarterPoints * 4;
182 magnitudeVectorPtr = &magnitudeVector[number];
183 for(; number < num_points; number++){
184 float val1Real = *complexVectorPtr++;
185 float val1Imag = *complexVectorPtr++;
186 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((val1Real * val1Real) + (val1Imag * val1Imag)) * scalar);
191 #ifdef LV_HAVE_GENERIC
194 volk_32fc_s32f_magnitude_16i_generic(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
195 const float scalar,
unsigned int num_points)
197 const float* complexVectorPtr = (
float*)complexVector;
198 int16_t* magnitudeVectorPtr = magnitudeVector;
199 unsigned int number = 0;
200 for(number = 0; number < num_points; number++){
201 const float real = *complexVectorPtr++;
202 const float imag = *complexVectorPtr++;
203 *magnitudeVectorPtr++ = (
int16_t)(sqrtf((real*real) + (imag*imag)) * scalar);
212 volk_32fc_s32f_magnitude_16i_a_orc_impl(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
213 const float scalar,
unsigned int num_points);
216 volk_32fc_s32f_magnitude_16i_u_orc(
int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
217 const float scalar,
unsigned int num_points)
219 volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27
float complex lv_32fc_t
Definition: volk_complex.h:56