GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_8ic_x2_s32f_multiply_conjugate_32fc.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_8ic_x2_s32f_multiply_conjugate_32fc
25  *
26  * \b Overview
27  *
28  * Multiplys the one complex vector with the complex conjugate of the
29  * second complex vector and stores their results in the third vector
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_8ic_x2_s32f_multiply_conjugate_32fc(lv_32fc_t* cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li aVector: One of the complex vectors to be multiplied.
38  * \li bVector: The complex vector which will be converted to complex conjugate and multiplied.
39  * \li scalar: each output value is scaled by 1/scalar.
40  * \li num_points: The number of complex values in aVector and bVector to be multiplied together and stored into cVector.
41  *
42  * \b Outputs
43  * \li cVector: The complex vector where the results will be stored.
44  *
45  * \b Example
46  * \code
47  * int N = 10000;
48  *
49  * <FIXME>
50  *
51  * volk_8ic_x2_s32f_multiply_conjugate_32fc();
52  *
53  * \endcode
54  */
55 
56 #ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
57 #define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
58 
59 #include <inttypes.h>
60 #include <stdio.h>
61 #include <volk/volk_complex.h>
62 
63 #ifdef LV_HAVE_SSE4_1
64 #include <smmintrin.h>
65 
66 static inline void
67 volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const lv_8sc_t* aVector,
68  const lv_8sc_t* bVector, const float scalar,
69  unsigned int num_points)
70 {
71  unsigned int number = 0;
72  const unsigned int quarterPoints = num_points / 4;
73 
74  __m128i x, y, realz, imagz;
75  __m128 ret;
76  lv_32fc_t* c = cVector;
77  const lv_8sc_t* a = aVector;
78  const lv_8sc_t* b = bVector;
79  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
80 
81  __m128 invScalar = _mm_set_ps1(1.0/scalar);
82 
83  for(;number < quarterPoints; number++){
84  // Convert into 8 bit values into 16 bit values
85  x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
86  y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
87 
88  // Calculate the ar*cr - ai*(-ci) portions
89  realz = _mm_madd_epi16(x,y);
90 
91  // Calculate the complex conjugate of the cr + ci j values
92  y = _mm_sign_epi16(y, conjugateSign);
93 
94  // Shift the order of the cr and ci values
95  y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), _MM_SHUFFLE(2,3,0,1));
96 
97  // Calculate the ar*(-ci) + cr*(ai)
98  imagz = _mm_madd_epi16(x,y);
99 
100  // Interleave real and imaginary and then convert to float values
101  ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
102 
103  // Normalize the floating point values
104  ret = _mm_mul_ps(ret, invScalar);
105 
106  // Store the floating point values
107  _mm_store_ps((float*)c, ret);
108  c += 2;
109 
110  // Interleave real and imaginary and then convert to float values
111  ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
112 
113  // Normalize the floating point values
114  ret = _mm_mul_ps(ret, invScalar);
115 
116  // Store the floating point values
117  _mm_store_ps((float*)c, ret);
118  c += 2;
119 
120  a += 4;
121  b += 4;
122  }
123 
124  number = quarterPoints * 4;
125  float* cFloatPtr = (float*)&cVector[number];
126  int8_t* a8Ptr = (int8_t*)&aVector[number];
127  int8_t* b8Ptr = (int8_t*)&bVector[number];
128  for(; number < num_points; number++){
129  float aReal = (float)*a8Ptr++;
130  float aImag = (float)*a8Ptr++;
131  lv_32fc_t aVal = lv_cmake(aReal, aImag );
132  float bReal = (float)*b8Ptr++;
133  float bImag = (float)*b8Ptr++;
134  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
135  lv_32fc_t temp = aVal * bVal;
136 
137  *cFloatPtr++ = lv_creal(temp) / scalar;
138  *cFloatPtr++ = lv_cimag(temp) / scalar;
139  }
140 }
141 #endif /* LV_HAVE_SSE4_1 */
142 
143 
144 #ifdef LV_HAVE_GENERIC
145 
146 static inline void
147 volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_8sc_t* aVector,
148  const lv_8sc_t* bVector, const float scalar,
149  unsigned int num_points)
150 {
151  unsigned int number = 0;
152  float* cPtr = (float*)cVector;
153  const float invScalar = 1.0 / scalar;
154  int8_t* a8Ptr = (int8_t*)aVector;
155  int8_t* b8Ptr = (int8_t*)bVector;
156  for(number = 0; number < num_points; number++){
157  float aReal = (float)*a8Ptr++;
158  float aImag = (float)*a8Ptr++;
159  lv_32fc_t aVal = lv_cmake(aReal, aImag );
160  float bReal = (float)*b8Ptr++;
161  float bImag = (float)*b8Ptr++;
162  lv_32fc_t bVal = lv_cmake( bReal, -bImag );
163  lv_32fc_t temp = aVal * bVal;
164 
165  *cPtr++ = (lv_creal(temp) * invScalar);
166  *cPtr++ = (lv_cimag(temp) * invScalar);
167  }
168 }
169 #endif /* LV_HAVE_GENERIC */
170 
171 
172 #endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
#define lv_cmake(r, i)
Definition: volk_complex.h:59
signed char int8_t
Definition: stdint.h:75
float complex lv_32fc_t
Definition: volk_complex.h:56
#define lv_creal(x)
Definition: volk_complex.h:76
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:52
#define lv_cimag(x)
Definition: volk_complex.h:78