75 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
76 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
83 #include <emmintrin.h>
86 volk_32f_x2_s32f_interleave_16ic_a_sse2(
lv_16sc_t* complexVector,
const float* iBuffer,
87 const float* qBuffer,
const float scalar,
unsigned int num_points)
89 unsigned int number = 0;
90 const float* iBufferPtr = iBuffer;
91 const float* qBufferPtr = qBuffer;
93 __m128 vScalar = _mm_set_ps1(scalar);
95 const unsigned int quarterPoints = num_points / 4;
97 __m128 iValue, qValue, cplxValue1, cplxValue2;
98 __m128i intValue1, intValue2;
102 for(;number < quarterPoints; number++){
103 iValue = _mm_load_ps(iBufferPtr);
104 qValue = _mm_load_ps(qBufferPtr);
107 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
108 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
111 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
112 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
114 intValue1 = _mm_cvtps_epi32(cplxValue1);
115 intValue2 = _mm_cvtps_epi32(cplxValue2);
117 intValue1 = _mm_packs_epi32(intValue1, intValue2);
119 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
120 complexVectorPtr += 8;
126 number = quarterPoints * 4;
127 complexVectorPtr = (
int16_t*)(&complexVector[number]);
128 for(; number < num_points; number++){
129 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
130 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
137 #include <xmmintrin.h>
140 volk_32f_x2_s32f_interleave_16ic_a_sse(
lv_16sc_t* complexVector,
const float* iBuffer,
141 const float* qBuffer,
const float scalar,
unsigned int num_points)
143 unsigned int number = 0;
144 const float* iBufferPtr = iBuffer;
145 const float* qBufferPtr = qBuffer;
147 __m128 vScalar = _mm_set_ps1(scalar);
149 const unsigned int quarterPoints = num_points / 4;
151 __m128 iValue, qValue, cplxValue;
157 for(;number < quarterPoints; number++){
158 iValue = _mm_load_ps(iBufferPtr);
159 qValue = _mm_load_ps(qBufferPtr);
162 cplxValue = _mm_unpacklo_ps(iValue, qValue);
163 cplxValue = _mm_mul_ps(cplxValue, vScalar);
165 _mm_store_ps(floatBuffer, cplxValue);
167 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
168 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
169 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
170 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
173 cplxValue = _mm_unpackhi_ps(iValue, qValue);
174 cplxValue = _mm_mul_ps(cplxValue, vScalar);
176 _mm_store_ps(floatBuffer, cplxValue);
178 *complexVectorPtr++ = (
int16_t)(floatBuffer[0]);
179 *complexVectorPtr++ = (
int16_t)(floatBuffer[1]);
180 *complexVectorPtr++ = (
int16_t)(floatBuffer[2]);
181 *complexVectorPtr++ = (
int16_t)(floatBuffer[3]);
187 number = quarterPoints * 4;
188 complexVectorPtr = (
int16_t*)(&complexVector[number]);
189 for(; number < num_points; number++){
190 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
191 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
197 #ifdef LV_HAVE_GENERIC
200 volk_32f_x2_s32f_interleave_16ic_generic(
lv_16sc_t* complexVector,
const float* iBuffer,
201 const float* qBuffer,
const float scalar,
unsigned int num_points)
204 const float* iBufferPtr = iBuffer;
205 const float* qBufferPtr = qBuffer;
206 unsigned int number = 0;
208 for(number = 0; number < num_points; number++){
209 *complexVectorPtr++ = (
int16_t)(*iBufferPtr++ * scalar);
210 *complexVectorPtr++ = (
int16_t)(*qBufferPtr++ * scalar);
short complex lv_16sc_t
Definition: volk_complex.h:53
signed short int16_t
Definition: stdint.h:76
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27