54 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
55 #define INCLUDED_volk_8i_s32f_convert_32f_u_H
61 #include <smmintrin.h>
64 volk_8i_s32f_convert_32f_u_sse4_1(
float* outputVector,
const int8_t* inputVector,
65 const float scalar,
unsigned int num_points)
67 unsigned int number = 0;
68 const unsigned int sixteenthPoints = num_points / 16;
70 float* outputVectorPtr = outputVector;
71 const float iScalar = 1.0 / scalar;
72 __m128 invScalar = _mm_set_ps1( iScalar );
73 const int8_t* inputVectorPtr = inputVector;
78 for(;number < sixteenthPoints; number++){
79 inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
81 interimVal = _mm_cvtepi8_epi32(inputVal);
82 ret = _mm_cvtepi32_ps(interimVal);
83 ret = _mm_mul_ps(ret, invScalar);
84 _mm_storeu_ps(outputVectorPtr, ret);
87 inputVal = _mm_srli_si128(inputVal, 4);
88 interimVal = _mm_cvtepi8_epi32(inputVal);
89 ret = _mm_cvtepi32_ps(interimVal);
90 ret = _mm_mul_ps(ret, invScalar);
91 _mm_storeu_ps(outputVectorPtr, ret);
94 inputVal = _mm_srli_si128(inputVal, 4);
95 interimVal = _mm_cvtepi8_epi32(inputVal);
96 ret = _mm_cvtepi32_ps(interimVal);
97 ret = _mm_mul_ps(ret, invScalar);
98 _mm_storeu_ps(outputVectorPtr, ret);
101 inputVal = _mm_srli_si128(inputVal, 4);
102 interimVal = _mm_cvtepi8_epi32(inputVal);
103 ret = _mm_cvtepi32_ps(interimVal);
104 ret = _mm_mul_ps(ret, invScalar);
105 _mm_storeu_ps(outputVectorPtr, ret);
106 outputVectorPtr += 4;
108 inputVectorPtr += 16;
111 number = sixteenthPoints * 16;
112 for(; number < num_points; number++){
113 outputVector[number] = (float)(inputVector[number]) * iScalar;
119 #ifdef LV_HAVE_GENERIC
122 volk_8i_s32f_convert_32f_generic(
float* outputVector,
const int8_t* inputVector,
123 const float scalar,
unsigned int num_points)
125 float* outputVectorPtr = outputVector;
126 const int8_t* inputVectorPtr = inputVector;
127 unsigned int number = 0;
128 const float iScalar = 1.0 / scalar;
130 for(number = 0; number < num_points; number++){
131 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
139 #ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
140 #define INCLUDED_volk_8i_s32f_convert_32f_a_H
145 #ifdef LV_HAVE_SSE4_1
146 #include <smmintrin.h>
149 volk_8i_s32f_convert_32f_a_sse4_1(
float* outputVector,
const int8_t* inputVector,
150 const float scalar,
unsigned int num_points)
152 unsigned int number = 0;
153 const unsigned int sixteenthPoints = num_points / 16;
155 float* outputVectorPtr = outputVector;
156 const float iScalar = 1.0 / scalar;
157 __m128 invScalar = _mm_set_ps1(iScalar);
158 const int8_t* inputVectorPtr = inputVector;
163 for(;number < sixteenthPoints; number++){
164 inputVal = _mm_load_si128((__m128i*)inputVectorPtr);
166 interimVal = _mm_cvtepi8_epi32(inputVal);
167 ret = _mm_cvtepi32_ps(interimVal);
168 ret = _mm_mul_ps(ret, invScalar);
169 _mm_store_ps(outputVectorPtr, ret);
170 outputVectorPtr += 4;
172 inputVal = _mm_srli_si128(inputVal, 4);
173 interimVal = _mm_cvtepi8_epi32(inputVal);
174 ret = _mm_cvtepi32_ps(interimVal);
175 ret = _mm_mul_ps(ret, invScalar);
176 _mm_store_ps(outputVectorPtr, ret);
177 outputVectorPtr += 4;
179 inputVal = _mm_srli_si128(inputVal, 4);
180 interimVal = _mm_cvtepi8_epi32(inputVal);
181 ret = _mm_cvtepi32_ps(interimVal);
182 ret = _mm_mul_ps(ret, invScalar);
183 _mm_store_ps(outputVectorPtr, ret);
184 outputVectorPtr += 4;
186 inputVal = _mm_srli_si128(inputVal, 4);
187 interimVal = _mm_cvtepi8_epi32(inputVal);
188 ret = _mm_cvtepi32_ps(interimVal);
189 ret = _mm_mul_ps(ret, invScalar);
190 _mm_store_ps(outputVectorPtr, ret);
191 outputVectorPtr += 4;
193 inputVectorPtr += 16;
196 number = sixteenthPoints * 16;
197 for(; number < num_points; number++){
198 outputVector[number] = (float)(inputVector[number]) * iScalar;
204 #ifdef LV_HAVE_GENERIC
207 volk_8i_s32f_convert_32f_a_generic(
float* outputVector,
const int8_t* inputVector,
208 const float scalar,
unsigned int num_points)
210 float* outputVectorPtr = outputVector;
211 const int8_t* inputVectorPtr = inputVector;
212 unsigned int number = 0;
213 const float iScalar = 1.0 / scalar;
215 for(number = 0; number < num_points; number++){
216 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
224 volk_8i_s32f_convert_32f_a_orc_impl(
float* outputVector,
const int8_t* inputVector,
225 const float scalar,
unsigned int num_points);
228 volk_8i_s32f_convert_32f_u_orc(
float* outputVector,
const int8_t* inputVector,
229 const float scalar,
unsigned int num_points)
231 float invscalar = 1.0 / scalar;
232 volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points);
signed char int8_t
Definition: stdint.h:75