53 #ifndef INCLUDED_volk_8i_convert_16i_u_H
54 #define INCLUDED_volk_8i_convert_16i_u_H
60 #include <smmintrin.h>
63 volk_8i_convert_16i_u_sse4_1(
int16_t* outputVector,
const int8_t* inputVector,
64 unsigned int num_points)
66 unsigned int number = 0;
67 const unsigned int sixteenthPoints = num_points / 16;
69 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
70 __m128i* outputVectorPtr = (__m128i*)outputVector;
74 for(;number < sixteenthPoints; number++){
75 inputVal = _mm_loadu_si128(inputVectorPtr);
76 ret = _mm_cvtepi8_epi16(inputVal);
77 ret = _mm_slli_epi16(ret, 8);
78 _mm_storeu_si128(outputVectorPtr, ret);
82 inputVal = _mm_srli_si128(inputVal, 8);
83 ret = _mm_cvtepi8_epi16(inputVal);
84 ret = _mm_slli_epi16(ret, 8);
85 _mm_storeu_si128(outputVectorPtr, ret);
92 number = sixteenthPoints * 16;
93 for(; number < num_points; number++){
94 outputVector[number] = (
int16_t)(inputVector[number])*256;
100 #ifdef LV_HAVE_GENERIC
103 volk_8i_convert_16i_generic(
int16_t* outputVector,
const int8_t* inputVector,
104 unsigned int num_points)
106 int16_t* outputVectorPtr = outputVector;
107 const int8_t* inputVectorPtr = inputVector;
108 unsigned int number = 0;
110 for(number = 0; number < num_points; number++){
111 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
121 #ifndef INCLUDED_volk_8i_convert_16i_a_H
122 #define INCLUDED_volk_8i_convert_16i_a_H
127 #ifdef LV_HAVE_SSE4_1
128 #include <smmintrin.h>
131 volk_8i_convert_16i_a_sse4_1(
int16_t* outputVector,
const int8_t* inputVector,
132 unsigned int num_points)
134 unsigned int number = 0;
135 const unsigned int sixteenthPoints = num_points / 16;
137 const __m128i* inputVectorPtr = (
const __m128i*)inputVector;
138 __m128i* outputVectorPtr = (__m128i*)outputVector;
142 for(;number < sixteenthPoints; number++){
143 inputVal = _mm_load_si128(inputVectorPtr);
144 ret = _mm_cvtepi8_epi16(inputVal);
145 ret = _mm_slli_epi16(ret, 8);
146 _mm_store_si128(outputVectorPtr, ret);
150 inputVal = _mm_srli_si128(inputVal, 8);
151 ret = _mm_cvtepi8_epi16(inputVal);
152 ret = _mm_slli_epi16(ret, 8);
153 _mm_store_si128(outputVectorPtr, ret);
160 number = sixteenthPoints * 16;
161 for(; number < num_points; number++){
162 outputVector[number] = (
int16_t)(inputVector[number])*256;
168 #ifdef LV_HAVE_GENERIC
171 volk_8i_convert_16i_a_generic(
int16_t* outputVector,
const int8_t* inputVector,
172 unsigned int num_points)
174 int16_t* outputVectorPtr = outputVector;
175 const int8_t* inputVectorPtr = inputVector;
176 unsigned int number = 0;
178 for(number = 0; number < num_points; number++){
179 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
186 #include <arm_neon.h>
189 volk_8i_convert_16i_neon(
int16_t* outputVector,
const int8_t* inputVector,
unsigned int num_points)
191 int16_t* outputVectorPtr = outputVector;
192 const int8_t* inputVectorPtr = inputVector;
194 const unsigned int eighth_points = num_points / 8;
197 int16x8_t converted_vec;
202 for(number = 0; number < eighth_points; ++number) {
203 input_vec = vld1_s8(inputVectorPtr);
204 converted_vec = vmovl_s8(input_vec);
206 converted_vec = vshlq_n_s16(converted_vec, 8);
207 vst1q_s16( outputVectorPtr, converted_vec);
210 outputVectorPtr += 8;
213 for(number = eighth_points * 8; number < num_points; number++){
214 *outputVectorPtr++ = ((
int16_t)(*inputVectorPtr++)) * 256;
222 volk_8i_convert_16i_a_orc_impl(
int16_t* outputVector,
const int8_t* inputVector,
223 unsigned int num_points);
226 volk_8i_convert_16i_u_orc(
int16_t* outputVector,
const int8_t* inputVector,
227 unsigned int num_points)
229 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75