55 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
56 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
64 #include <pmmintrin.h>
67 volk_16ic_s32f_magnitude_32f_a_sse3(
float* magnitudeVector,
const lv_16sc_t* complexVector,
68 const float scalar,
unsigned int num_points)
70 unsigned int number = 0;
71 const unsigned int quarterPoints = num_points / 4;
74 float* magnitudeVectorPtr = magnitudeVector;
76 __m128 invScalar = _mm_set_ps1(1.0/scalar);
78 __m128 cplxValue1, cplxValue2, result;
82 for(;number < quarterPoints; number++){
84 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
85 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
86 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
87 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
89 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
90 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
91 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
92 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
94 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
95 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
97 complexVectorPtr += 8;
99 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
100 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
102 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
103 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
105 result = _mm_hadd_ps(cplxValue1, cplxValue2);
107 result = _mm_sqrt_ps(result);
109 _mm_store_ps(magnitudeVectorPtr, result);
111 magnitudeVectorPtr += 4;
114 number = quarterPoints * 4;
115 magnitudeVectorPtr = &magnitudeVector[number];
116 complexVectorPtr = (
const int16_t*)&complexVector[number];
117 for(; number < num_points; number++){
118 float val1Real = (float)(*complexVectorPtr++) / scalar;
119 float val1Imag = (float)(*complexVectorPtr++) / scalar;
120 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
126 #include <xmmintrin.h>
129 volk_16ic_s32f_magnitude_32f_a_sse(
float* magnitudeVector,
const lv_16sc_t* complexVector,
130 const float scalar,
unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
136 float* magnitudeVectorPtr = magnitudeVector;
138 const float iScalar = 1.0 / scalar;
139 __m128 invScalar = _mm_set_ps1(iScalar);
141 __m128 cplxValue1, cplxValue2, result, re, im;
145 for(;number < quarterPoints; number++){
146 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
147 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
148 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
149 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
151 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
152 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
153 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
154 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
156 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
157 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
159 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
160 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
162 complexVectorPtr += 8;
164 cplxValue1 = _mm_mul_ps(re, invScalar);
165 cplxValue2 = _mm_mul_ps(im, invScalar);
167 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
168 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
170 result = _mm_add_ps(cplxValue1, cplxValue2);
172 result = _mm_sqrt_ps(result);
174 _mm_store_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
179 number = quarterPoints * 4;
180 magnitudeVectorPtr = &magnitudeVector[number];
181 complexVectorPtr = (
const int16_t*)&complexVector[number];
182 for(; number < num_points; number++){
183 float val1Real = (float)(*complexVectorPtr++) * iScalar;
184 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
185 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
192 #ifdef LV_HAVE_GENERIC
195 volk_16ic_s32f_magnitude_32f_generic(
float* magnitudeVector,
const lv_16sc_t* complexVector,
196 const float scalar,
unsigned int num_points)
199 float* magnitudeVectorPtr = magnitudeVector;
200 unsigned int number = 0;
201 const float invScalar = 1.0 / scalar;
202 for(number = 0; number < num_points; number++){
203 float real = ( (float) (*complexVectorPtr++)) * invScalar;
204 float imag = ( (float) (*complexVectorPtr++)) * invScalar;
205 *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
210 #ifdef LV_HAVE_ORC_DISABLED
213 volk_16ic_s32f_magnitude_32f_a_orc_impl(
float* magnitudeVector,
const lv_16sc_t* complexVector,
214 const float scalar,
unsigned int num_points);
217 volk_16ic_s32f_magnitude_32f_u_orc(
float* magnitudeVector,
const lv_16sc_t* complexVector,
218 const float scalar,
unsigned int num_points)
220 volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, scalar, num_points);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27