GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_s32f_power_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_s32f_power_32f
25  *
26  * \b Overview
27  *
28  * Takes each input vector value to the specified power and stores the
29  * results in the return vector.
30  *
31  * <b>Dispatcher Prototype</b>
32  * \code
33  * void volk_32f_s32f_power_32f(float* cVector, const float* aVector, const float power, unsigned int num_points)
34  * \endcode
35  *
36  * \b Inputs
37  * \li aVector: The input vector of floats.
38  * \li power: The power to raise the input value to.
39  * \li num_points: The number of data points.
40  *
41  * \b Outputs
42  * \li cVector: The output vector.
43  *
44  * \b Example
45  * Square the numbers (0,9)
46  * \code
47  * int N = 10;
48  * unsigned int alignment = volk_get_alignment();
49  * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
50  * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
51  *
52  *
53  * for(unsigned int ii = 0; ii < N; ++ii){
54  * increasing[ii] = (float)ii;
55  * }
56  *
57  * // Normalize by the smallest delta (0.2 in this example)
58  * float scale = 2.0f;
59  *
60  * volk_32f_s32f_power_32f(out, increasing, scale, N);
61  *
62  * for(unsigned int ii = 0; ii < N; ++ii){
63  * printf("out[%u] = %f\n", ii, out[ii]);
64  * }
65  *
66  * volk_free(increasing);
67  * volk_free(out);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
72 #define INCLUDED_volk_32f_s32f_power_32f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 #include <math.h>
77 
78 #ifdef LV_HAVE_SSE4_1
79 #include <tmmintrin.h>
80 
81 #ifdef LV_HAVE_LIB_SIMDMATH
82 #include <simdmath.h>
83 #endif /* LV_HAVE_LIB_SIMDMATH */
84 
85 static inline void
86 volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const float* aVector,
87  const float power, unsigned int num_points)
88 {
89  unsigned int number = 0;
90 
91  float* cPtr = cVector;
92  const float* aPtr = aVector;
93 
94 #ifdef LV_HAVE_LIB_SIMDMATH
95  const unsigned int quarterPoints = num_points / 4;
96  __m128 vPower = _mm_set_ps1(power);
97  __m128 zeroValue = _mm_setzero_ps();
98  __m128 signMask;
99  __m128 negatedValues;
100  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
101  __m128 onesMask = _mm_set_ps1(1);
102 
103  __m128 aVal, cVal;
104  for(;number < quarterPoints; number++){
105 
106  aVal = _mm_load_ps(aPtr);
107  signMask = _mm_cmplt_ps(aVal, zeroValue);
108  negatedValues = _mm_sub_ps(zeroValue, aVal);
109  aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
110 
111  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
112  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
113 
114  cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
115 
116  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
117 
118  aPtr += 4;
119  cPtr += 4;
120  }
121 
122  number = quarterPoints * 4;
123 #endif /* LV_HAVE_LIB_SIMDMATH */
124 
125  for(;number < num_points; number++){
126  *cPtr++ = powf((*aPtr++), power);
127  }
128 }
129 
130 #endif /* LV_HAVE_SSE4_1 */
131 
132 
133 #ifdef LV_HAVE_SSE
134 #include <xmmintrin.h>
135 
136 #ifdef LV_HAVE_LIB_SIMDMATH
137 #include <simdmath.h>
138 #endif /* LV_HAVE_LIB_SIMDMATH */
139 
140 static inline void
141 volk_32f_s32f_power_32f_a_sse(float* cVector, const float* aVector,
142  const float power, unsigned int num_points)
143 {
144  unsigned int number = 0;
145 
146  float* cPtr = cVector;
147  const float* aPtr = aVector;
148 
149 #ifdef LV_HAVE_LIB_SIMDMATH
150  const unsigned int quarterPoints = num_points / 4;
151  __m128 vPower = _mm_set_ps1(power);
152  __m128 zeroValue = _mm_setzero_ps();
153  __m128 signMask;
154  __m128 negatedValues;
155  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
156  __m128 onesMask = _mm_set_ps1(1);
157 
158  __m128 aVal, cVal;
159  for(;number < quarterPoints; number++){
160 
161  aVal = _mm_load_ps(aPtr);
162  signMask = _mm_cmplt_ps(aVal, zeroValue);
163  negatedValues = _mm_sub_ps(zeroValue, aVal);
164  aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues) );
165 
166  // powf4 doesn't support negative values in the base, so we mask them off and then apply the negative after
167  cVal = powf4(aVal, vPower); // Takes each input value to the specified power
168 
169  cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), _mm_and_ps(signMask, negativeOneToPower) ), cVal);
170 
171  _mm_store_ps(cPtr,cVal); // Store the results back into the C container
172 
173  aPtr += 4;
174  cPtr += 4;
175  }
176 
177  number = quarterPoints * 4;
178 #endif /* LV_HAVE_LIB_SIMDMATH */
179 
180  for(;number < num_points; number++){
181  *cPtr++ = powf((*aPtr++), power);
182  }
183 }
184 
185 #endif /* LV_HAVE_SSE */
186 
187 
188 #ifdef LV_HAVE_GENERIC
189 
190 static inline void
191 volk_32f_s32f_power_32f_generic(float* cVector, const float* aVector,
192  const float power, unsigned int num_points)
193 {
194  float* cPtr = cVector;
195  const float* aPtr = aVector;
196  unsigned int number = 0;
197 
198  for(number = 0; number < num_points; number++){
199  *cPtr++ = powf((*aPtr++), power);
200  }
201 }
202 #endif /* LV_HAVE_GENERIC */
203 
204 
205 #endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */