53 #ifndef INCLUDED_volk_16i_convert_8i_u_H
54 #define INCLUDED_volk_16i_convert_8i_u_H
60 #include <emmintrin.h>
63 volk_16i_convert_8i_u_sse2(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int sixteenthPoints = num_points / 16;
68 int8_t* outputVectorPtr = outputVector;
74 for(;number < sixteenthPoints; number++){
77 inputVal1 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
78 inputVal2 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
80 inputVal1 = _mm_srai_epi16(inputVal1, 8);
81 inputVal2 = _mm_srai_epi16(inputVal2, 8);
83 ret = _mm_packs_epi16(inputVal1, inputVal2);
85 _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
87 outputVectorPtr += 16;
90 number = sixteenthPoints * 16;
91 for(; number < num_points; number++){
92 outputVector[number] =(
int8_t)(inputVector[number] >> 8);
98 #ifdef LV_HAVE_GENERIC
101 volk_16i_convert_8i_generic(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
103 int8_t* outputVectorPtr = outputVector;
104 const int16_t* inputVectorPtr = inputVector;
105 unsigned int number = 0;
107 for(number = 0; number < num_points; number++){
108 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
117 #ifndef INCLUDED_volk_16i_convert_8i_a_H
118 #define INCLUDED_volk_16i_convert_8i_a_H
124 #include <emmintrin.h>
127 volk_16i_convert_8i_a_sse2(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
129 unsigned int number = 0;
130 const unsigned int sixteenthPoints = num_points / 16;
132 int8_t* outputVectorPtr = outputVector;
138 for(;number < sixteenthPoints; number++){
141 inputVal1 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
142 inputVal2 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
144 inputVal1 = _mm_srai_epi16(inputVal1, 8);
145 inputVal2 = _mm_srai_epi16(inputVal2, 8);
147 ret = _mm_packs_epi16(inputVal1, inputVal2);
149 _mm_store_si128((__m128i*)outputVectorPtr, ret);
151 outputVectorPtr += 16;
154 number = sixteenthPoints * 16;
155 for(; number < num_points; number++){
156 outputVector[number] =(
int8_t)(inputVector[number] >> 8);
163 #include <arm_neon.h>
166 volk_16i_convert_8i_neon(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
168 int8_t* outputVectorPtr = outputVector;
169 const int16_t* inputVectorPtr = inputVector;
170 unsigned int number = 0;
171 unsigned int sixteenth_points = num_points / 16;
179 for(number = 0; number < sixteenth_points; number++){
181 inputVal0 = vld1q_s16(inputVectorPtr);
182 inputVal1 = vld1q_s16(inputVectorPtr+8);
184 outputVal0 = vshrn_n_s16(inputVal0, 8);
185 outputVal1 = vshrn_n_s16(inputVal1, 8);
187 outputVal = vcombine_s8(outputVal0, outputVal1);
188 vst1q_s8(outputVectorPtr, outputVal);
189 inputVectorPtr += 16;
190 outputVectorPtr += 16;
193 for(number = sixteenth_points * 16; number < num_points; number++){
194 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
200 #ifdef LV_HAVE_GENERIC
203 volk_16i_convert_8i_a_generic(
int8_t* outputVector,
const int16_t* inputVector,
unsigned int num_points)
205 int8_t* outputVectorPtr = outputVector;
206 const int16_t* inputVectorPtr = inputVector;
207 unsigned int number = 0;
209 for(number = 0; number < num_points; number++){
210 *outputVectorPtr++ = ((
int8_t)(*inputVectorPtr++ >> 8));
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75