66 #ifndef INCLUDED_volk_32f_convert_64f_u_H
67 #define INCLUDED_volk_32f_convert_64f_u_H
73 #include <immintrin.h>
75 static inline void volk_32f_convert_64f_u_avx(
double* outputVector,
const float* inputVector,
unsigned int num_points){
76 unsigned int number = 0;
78 const unsigned int quarterPoints = num_points / 4;
80 const float* inputVectorPtr = (
const float*)inputVector;
81 double* outputVectorPtr = outputVector;
85 for(;number < quarterPoints; number++){
86 inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
88 ret = _mm256_cvtps_pd(inputVal);
89 _mm256_storeu_pd(outputVectorPtr, ret);
94 number = quarterPoints * 4;
95 for(; number < num_points; number++){
96 outputVector[number] = (double)(inputVector[number]);
103 #include <emmintrin.h>
105 static inline void volk_32f_convert_64f_u_sse2(
double* outputVector,
const float* inputVector,
unsigned int num_points){
106 unsigned int number = 0;
108 const unsigned int quarterPoints = num_points / 4;
110 const float* inputVectorPtr = (
const float*)inputVector;
111 double* outputVectorPtr = outputVector;
115 for(;number < quarterPoints; number++){
116 inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
118 ret = _mm_cvtps_pd(inputVal);
120 _mm_storeu_pd(outputVectorPtr, ret);
121 outputVectorPtr += 2;
123 inputVal = _mm_movehl_ps(inputVal, inputVal);
125 ret = _mm_cvtps_pd(inputVal);
127 _mm_storeu_pd(outputVectorPtr, ret);
128 outputVectorPtr += 2;
131 number = quarterPoints * 4;
132 for(; number < num_points; number++){
133 outputVector[number] = (double)(inputVector[number]);
139 #ifdef LV_HAVE_GENERIC
141 static inline void volk_32f_convert_64f_generic(
double* outputVector,
const float* inputVector,
unsigned int num_points){
142 double* outputVectorPtr = outputVector;
143 const float* inputVectorPtr = inputVector;
144 unsigned int number = 0;
146 for(number = 0; number < num_points; number++){
147 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
158 #ifndef INCLUDED_volk_32f_convert_64f_a_H
159 #define INCLUDED_volk_32f_convert_64f_a_H
165 #include <immintrin.h>
167 static inline void volk_32f_convert_64f_a_avx(
double* outputVector,
const float* inputVector,
unsigned int num_points){
168 unsigned int number = 0;
170 const unsigned int quarterPoints = num_points / 4;
172 const float* inputVectorPtr = (
const float*)inputVector;
173 double* outputVectorPtr = outputVector;
177 for(;number < quarterPoints; number++){
178 inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
180 ret = _mm256_cvtps_pd(inputVal);
181 _mm256_store_pd(outputVectorPtr, ret);
183 outputVectorPtr += 4;
186 number = quarterPoints * 4;
187 for(; number < num_points; number++){
188 outputVector[number] = (double)(inputVector[number]);
194 #include <emmintrin.h>
196 static inline void volk_32f_convert_64f_a_sse2(
double* outputVector,
const float* inputVector,
unsigned int num_points){
197 unsigned int number = 0;
199 const unsigned int quarterPoints = num_points / 4;
201 const float* inputVectorPtr = (
const float*)inputVector;
202 double* outputVectorPtr = outputVector;
206 for(;number < quarterPoints; number++){
207 inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
209 ret = _mm_cvtps_pd(inputVal);
211 _mm_store_pd(outputVectorPtr, ret);
212 outputVectorPtr += 2;
214 inputVal = _mm_movehl_ps(inputVal, inputVal);
216 ret = _mm_cvtps_pd(inputVal);
218 _mm_store_pd(outputVectorPtr, ret);
219 outputVectorPtr += 2;
222 number = quarterPoints * 4;
223 for(; number < num_points; number++){
224 outputVector[number] = (double)(inputVector[number]);
230 #ifdef LV_HAVE_GENERIC
232 static inline void volk_32f_convert_64f_a_generic(
double* outputVector,
const float* inputVector,
unsigned int num_points){
233 double* outputVectorPtr = outputVector;
234 const float* inputVectorPtr = inputVector;
235 unsigned int number = 0;
237 for(number = 0; number < num_points; number++){
238 *outputVectorPtr++ = ((double)(*inputVectorPtr++));