GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_8ic_deinterleave_16i_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_8ic_deinterleave_16i_x2
25  *
26  * \b Overview
27  *
28  * Deinterleaves the complex 8-bit char vector into I & Q vector data
29  * and converts them to 16-bit shorts.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_8ic_deinterleave_16i_x2(int16_t* iBuffer, int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li complexVector: The complex input vector.
38  * \li num_points: The number of complex data values to be deinterleaved.
39  *
40  * \b Outputs
41  * \li iBuffer: The I buffer output data.
42  * \li qBuffer: The Q buffer output data.
43  *
44  * \b Example
45  * \code
46  * int N = 10000;
47  *
48  * volk_8ic_deinterleave_16i_x2();
49  *
50  * volk_free(x);
51  * \endcode
52  */
53 
54 #ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
55 #define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
56 
57 #include <inttypes.h>
58 #include <stdio.h>
59 
60 #ifdef LV_HAVE_SSE4_1
61 #include <smmintrin.h>
62 
63 static inline void
64 volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, int16_t* qBuffer,
65  const lv_8sc_t* complexVector, unsigned int num_points)
66 {
67  unsigned int number = 0;
68  const int8_t* complexVectorPtr = (int8_t*)complexVector;
69  int16_t* iBufferPtr = iBuffer;
70  int16_t* qBufferPtr = qBuffer;
71  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); // set 16 byte values
72  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
73  __m128i complexVal, iOutputVal, qOutputVal;
74 
75  unsigned int eighthPoints = num_points / 8;
76 
77  for(number = 0; number < eighthPoints; number++){
78  complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16; // aligned load
79 
80  iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask); // shuffle 16 bytes of 128bit complexVal
81  qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
82 
83  iOutputVal = _mm_cvtepi8_epi16(iOutputVal); // fills 2-byte sign extended versions of lower 8 bytes of input to output
84  iOutputVal = _mm_slli_epi16(iOutputVal, 8); // shift in left by 8 bits, each of the 8 16-bit integers, shift in with zeros
85 
86  qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
87  qOutputVal = _mm_slli_epi16(qOutputVal, 8);
88 
89  _mm_store_si128((__m128i*)iBufferPtr, iOutputVal); // aligned store
90  _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
91 
92  iBufferPtr += 8;
93  qBufferPtr += 8;
94  }
95 
96  number = eighthPoints * 8;
97  for(; number < num_points; number++){
98  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256; // load 8 bit Complexvector into 16 bit, shift left by 8 bits and store
99  *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
100  }
101 }
102 #endif /* LV_HAVE_SSE4_1 */
103 
104 
105 #ifdef LV_HAVE_AVX
106 #include <immintrin.h>
107 
108 static inline void
109 volk_8ic_deinterleave_16i_x2_a_avx(int16_t* iBuffer, int16_t* qBuffer,
110  const lv_8sc_t* complexVector, unsigned int num_points)
111 {
112  unsigned int number = 0;
113  const int8_t* complexVectorPtr = (int8_t*)complexVector;
114  int16_t* iBufferPtr = iBuffer;
115  int16_t* qBufferPtr = qBuffer;
116  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); // set 16 byte values
117  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
118  __m256i complexVal, iOutputVal, qOutputVal;
119  __m128i complexVal1, complexVal0;
120  __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
121 
122  unsigned int sixteenthPoints = num_points / 16;
123 
124  for(number = 0; number < sixteenthPoints; number++){
125  complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); complexVectorPtr += 32; // aligned load
126 
127  // Extract from complexVal to iOutputVal and qOutputVal
128  complexVal1 = _mm256_extractf128_si256(complexVal, 1);
129  complexVal0 = _mm256_extractf128_si256(complexVal, 0);
130 
131  iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask); // shuffle 16 bytes of 128bit complexVal
132  iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
133  qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
134  qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
135 
136  iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1); // fills 2-byte sign extended versions of lower 8 bytes of input to output
137  iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8); // shift in left by 8 bits, each of the 8 16-bit integers, shift in with zeros
138  iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
139  iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
140 
141  qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
142  qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
143  qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
144  qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
145 
146  // Pack iOutputVal0,1 to iOutputVal
147  __m256i dummy = _mm256_setzero_si256();
148  iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
149  iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
150  qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
151  qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
152 
153  _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal); // aligned store
154  _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
155 
156  iBufferPtr += 16;
157  qBufferPtr += 16;
158  }
159 
160  number = sixteenthPoints * 16;
161  for(; number < num_points; number++){
162  *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256; // load 8 bit Complexvector into 16 bit, shift left by 8 bits and store
163  *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
164  }
165 }
166 #endif /* LV_HAVE_AVX */
167 
168 
169 #ifdef LV_HAVE_GENERIC
170 
171 static inline void
172 volk_8ic_deinterleave_16i_x2_generic(int16_t* iBuffer, int16_t* qBuffer,
173  const lv_8sc_t* complexVector, unsigned int num_points)
174 {
175  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
176  int16_t* iBufferPtr = iBuffer;
177  int16_t* qBufferPtr = qBuffer;
178  unsigned int number;
179  for(number = 0; number < num_points; number++){
180  *iBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
181  *qBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
182  }
183 }
184 #endif /* LV_HAVE_GENERIC */
185 
186 
187 
188 #endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */
signed short int16_t
Definition: stdint.h:76
signed char int8_t
Definition: stdint.h:75
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52