GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_s32f_convert_8i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_s32f_convert_8i
25  *
26  * \b Overview
27  *
28  * Converts a floating point number to a 8-bit char after applying a
29  * scaling factor.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_s32f_convert_8i(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li inputVector: the input vector of floats.
38  * \li scalar: The value multiplied against each point in the input buffer.
39  * \li num_points: The number of data points.
40  *
41  * \b Outputs
42  * \li outputVector: The output vector.
43  *
44  * \b Example
45  * Convert floats from [-1,1] to 16-bit integers with a scale of 5 to maintain smallest delta
46  * int N = 10;
47  * unsigned int alignment = volk_get_alignment();
48  * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
49  * int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
50  *
51  * for(unsigned int ii = 0; ii < N; ++ii){
52  * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
53  * }
54  *
55  * // Normalize by the smallest delta (0.2 in this example)
56  * // With float -> 8 bit ints be careful of scaling
57 
58  * float scale = 5.1f;
59  *
60  * volk_32f_s32f_convert_32i(out, increasing, scale, N);
61  *
62  * for(unsigned int ii = 0; ii < N; ++ii){
63  * printf("out[%u] = %i\n", ii, out[ii]);
64  * }
65  *
66  * volk_free(increasing);
67  * volk_free(out);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
72 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_SSE2
78 #include <emmintrin.h>
79 
80 static inline void
81 volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector,
82  const float scalar, unsigned int num_points)
83 {
84  unsigned int number = 0;
85 
86  const unsigned int sixteenthPoints = num_points / 16;
87 
88  const float* inputVectorPtr = (const float*)inputVector;
89  int8_t* outputVectorPtr = outputVector;
90 
91  float min_val = -128;
92  float max_val = 127;
93  float r;
94 
95  __m128 vScalar = _mm_set_ps1(scalar);
96  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
97  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
98  __m128 vmin_val = _mm_set_ps1(min_val);
99  __m128 vmax_val = _mm_set_ps1(max_val);
100 
101  for(;number < sixteenthPoints; number++){
102  inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
103  inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
104  inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
105  inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
106 
107  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
108  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
109  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
110  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
111 
112  intInputVal1 = _mm_cvtps_epi32(inputVal1);
113  intInputVal2 = _mm_cvtps_epi32(inputVal2);
114  intInputVal3 = _mm_cvtps_epi32(inputVal3);
115  intInputVal4 = _mm_cvtps_epi32(inputVal4);
116 
117  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
118  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
119 
120  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
121 
122  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
123  outputVectorPtr += 16;
124  }
125 
126  number = sixteenthPoints * 16;
127  for(; number < num_points; number++){
128  r = inputVector[number] * scalar;
129  if(r > max_val)
130  r = max_val;
131  else if(r < min_val)
132  r = min_val;
133  outputVector[number] = (int16_t)(r);
134  }
135 }
136 
137 #endif /* LV_HAVE_SSE2 */
138 
139 
140 #ifdef LV_HAVE_SSE
141 #include <xmmintrin.h>
142 
143 static inline void
144 volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector,
145  const float scalar, unsigned int num_points)
146 {
147  unsigned int number = 0;
148 
149  const unsigned int quarterPoints = num_points / 4;
150 
151  const float* inputVectorPtr = (const float*)inputVector;
152  int8_t* outputVectorPtr = outputVector;
153 
154  float min_val = -128;
155  float max_val = 127;
156  float r;
157 
158  __m128 vScalar = _mm_set_ps1(scalar);
159  __m128 ret;
160  __m128 vmin_val = _mm_set_ps1(min_val);
161  __m128 vmax_val = _mm_set_ps1(max_val);
162 
163  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
164 
165  for(;number < quarterPoints; number++){
166  ret = _mm_loadu_ps(inputVectorPtr);
167  inputVectorPtr += 4;
168 
169  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
170 
171  _mm_store_ps(outputFloatBuffer, ret);
172  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
173  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
174  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
175  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
176  }
177 
178  number = quarterPoints * 4;
179  for(; number < num_points; number++){
180  r = inputVector[number] * scalar;
181  if(r > max_val)
182  r = max_val;
183  else if(r < min_val)
184  r = min_val;
185  outputVector[number] = (int16_t)(r);
186  }
187 }
188 
189 #endif /* LV_HAVE_SSE */
190 
191 
192 #ifdef LV_HAVE_GENERIC
193 
194 static inline void
195 volk_32f_s32f_convert_8i_generic(int8_t* outputVector, const float* inputVector,
196  const float scalar, unsigned int num_points)
197 {
198  int8_t* outputVectorPtr = outputVector;
199  const float* inputVectorPtr = inputVector;
200  unsigned int number = 0;
201  float min_val = -128;
202  float max_val = 127;
203  float r;
204 
205  for(number = 0; number < num_points; number++){
206  r = *inputVectorPtr++ * scalar;
207  if(r > max_val)
208  r = max_val;
209  else if(r < min_val)
210  r = min_val;
211  *outputVectorPtr++ = (int16_t)(r);
212  }
213 }
214 
215 #endif /* LV_HAVE_GENERIC */
216 
217 
218 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
219 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
220 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
221 
222 #include <volk/volk_common.h>
223 #include <inttypes.h>
224 #include <stdio.h>
225 
226 #ifdef LV_HAVE_SSE2
227 #include <emmintrin.h>
228 
229 static inline void
230 volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector,
231  const float scalar, unsigned int num_points)
232 {
233  unsigned int number = 0;
234 
235  const unsigned int sixteenthPoints = num_points / 16;
236 
237  const float* inputVectorPtr = (const float*)inputVector;
238  int8_t* outputVectorPtr = outputVector;
239 
240  float min_val = -128;
241  float max_val = 127;
242  float r;
243 
244  __m128 vScalar = _mm_set_ps1(scalar);
245  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
246  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
247  __m128 vmin_val = _mm_set_ps1(min_val);
248  __m128 vmax_val = _mm_set_ps1(max_val);
249 
250  for(;number < sixteenthPoints; number++){
251  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
252  inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
253  inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
254  inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
255 
256  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
257  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
258  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
259  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
260 
261  intInputVal1 = _mm_cvtps_epi32(inputVal1);
262  intInputVal2 = _mm_cvtps_epi32(inputVal2);
263  intInputVal3 = _mm_cvtps_epi32(inputVal3);
264  intInputVal4 = _mm_cvtps_epi32(inputVal4);
265 
266  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
267  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
268 
269  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
270 
271  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
272  outputVectorPtr += 16;
273  }
274 
275  number = sixteenthPoints * 16;
276  for(; number < num_points; number++){
277  r = inputVector[number] * scalar;
278  if(r > max_val)
279  r = max_val;
280  else if(r < min_val)
281  r = min_val;
282  outputVector[number] = (int8_t)(r);
283  }
284 }
285 #endif /* LV_HAVE_SSE2 */
286 
287 
288 #ifdef LV_HAVE_SSE
289 #include <xmmintrin.h>
290 
291 static inline void
292 volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector,
293  const float scalar, unsigned int num_points)
294 {
295  unsigned int number = 0;
296 
297  const unsigned int quarterPoints = num_points / 4;
298 
299  const float* inputVectorPtr = (const float*)inputVector;
300 
301  float min_val = -128;
302  float max_val = 127;
303  float r;
304 
305  int8_t* outputVectorPtr = outputVector;
306  __m128 vScalar = _mm_set_ps1(scalar);
307  __m128 ret;
308  __m128 vmin_val = _mm_set_ps1(min_val);
309  __m128 vmax_val = _mm_set_ps1(max_val);
310 
311  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
312 
313  for(;number < quarterPoints; number++){
314  ret = _mm_load_ps(inputVectorPtr);
315  inputVectorPtr += 4;
316 
317  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
318 
319  _mm_store_ps(outputFloatBuffer, ret);
320  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
321  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
322  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
323  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
324  }
325 
326  number = quarterPoints * 4;
327  for(; number < num_points; number++){
328  r = inputVector[number] * scalar;
329  if(r > max_val)
330  r = max_val;
331  else if(r < min_val)
332  r = min_val;
333  outputVector[number] = (int8_t)(r);
334  }
335 }
336 
337 #endif /* LV_HAVE_SSE */
338 
339 
340 #ifdef LV_HAVE_GENERIC
341 
342 static inline void
343 volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector,
344  const float scalar, unsigned int num_points)
345 {
346  int8_t* outputVectorPtr = outputVector;
347  const float* inputVectorPtr = inputVector;
348  unsigned int number = 0;
349  float min_val = -128;
350  float max_val = 127;
351  float r;
352 
353  for(number = 0; number < num_points; number++){
354  r = *inputVectorPtr++ * scalar;
355  if(r > max_val)
356  r = max_val;
357  else if(r < min_val)
358  r = min_val;
359  *outputVectorPtr++ = (int8_t)(r);
360  }
361 }
362 
363 #endif /* LV_HAVE_GENERIC */
364 
365 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27