GNU Radio Manual and C++ API Reference  3.7.7
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
volk_32f_stddev_and_mean_32f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
23 /*!
24  * \page volk_32f_stddev_and_mean_32f_x2
25  *
26  * \b Overview
27  *
28  * Computes the standard deviation and mean of the input buffer.
29  *
30  * <b>Dispatcher Prototype</b>
31  * \code
32  * void volk_32f_stddev_and_mean_32f_x2(float* stddev, float* mean, const float* inputBuffer, unsigned int num_points)
33  * \endcode
34  *
35  * \b Inputs
36  * \li inputBuffer: The buffer of points.
37  * \li num_points The number of values in input buffer.
38  *
39  * \b Outputs
40  * \li stddev: The calculated standard deviation.
41  * \li mean: The mean of the input buffer.
42  *
43  * \b Example
44  * Generate random numbers with c++11's normal distribution and estimate the mean and standard deviation
45  * \code
46  * int N = 1000;
47  * unsigned int alignment = volk_get_alignment();
48  * float* rand_numbers = (float*)volk_malloc(sizeof(float)*N, alignment);
49  * float* mean = (float*)volk_malloc(sizeof(float), alignment);
50  * float* stddev = (float*)volk_malloc(sizeof(float), alignment);
51  *
52  * // Use a normal generator with 0 mean, stddev 1
53  * std::default_random_engine generator;
54  * std::normal_distribution<float> distribution(0,1);
55  *
56  * for(unsigned int ii = 0; ii < N; ++ii){
57  * rand_numbers[ii] = distribution(generator);
58  * }
59  *
60  * volk_32f_stddev_and_mean_32f_x2(stddev, mean, rand_numbers, N);
61  *
62  * printf("std. dev. = %f\n", *stddev);
63  * printf("mean = %f\n", *mean);
64  *
65  * volk_free(rand_numbers);
66  * volk_free(mean);
67  * volk_free(stddev);
68  * \endcode
69  */
70 
71 #ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
72 #define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
73 
74 #include <volk/volk_common.h>
75 #include <inttypes.h>
76 #include <stdio.h>
77 #include <math.h>
78 
79 #ifdef LV_HAVE_SSE4_1
80 #include <smmintrin.h>
81 
82 static inline void
83 volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, float* mean,
84  const float* inputBuffer,
85  unsigned int num_points)
86 {
87  float returnValue = 0;
88  float newMean = 0;
89  if(num_points > 0){
90  unsigned int number = 0;
91  const unsigned int sixteenthPoints = num_points / 16;
92 
93  const float* aPtr = inputBuffer;
94  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
95  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
96 
97  __m128 accumulator = _mm_setzero_ps();
98  __m128 squareAccumulator = _mm_setzero_ps();
99  __m128 aVal1, aVal2, aVal3, aVal4;
100  __m128 cVal1, cVal2, cVal3, cVal4;
101  for(;number < sixteenthPoints; number++) {
102  aVal1 = _mm_load_ps(aPtr); aPtr += 4;
103  cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
104  accumulator = _mm_add_ps(accumulator, aVal1); // accumulator += x
105 
106  aVal2 = _mm_load_ps(aPtr); aPtr += 4;
107  cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
108  accumulator = _mm_add_ps(accumulator, aVal2); // accumulator += x
109 
110  aVal3 = _mm_load_ps(aPtr); aPtr += 4;
111  cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
112  accumulator = _mm_add_ps(accumulator, aVal3); // accumulator += x
113 
114  aVal4 = _mm_load_ps(aPtr); aPtr += 4;
115  cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
116  accumulator = _mm_add_ps(accumulator, aVal4); // accumulator += x
117 
118  cVal1 = _mm_or_ps(cVal1, cVal2);
119  cVal3 = _mm_or_ps(cVal3, cVal4);
120  cVal1 = _mm_or_ps(cVal1, cVal3);
121 
122  squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // squareAccumulator += x^2
123  }
124  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
125  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
126  newMean = meanBuffer[0];
127  newMean += meanBuffer[1];
128  newMean += meanBuffer[2];
129  newMean += meanBuffer[3];
130  returnValue = squareBuffer[0];
131  returnValue += squareBuffer[1];
132  returnValue += squareBuffer[2];
133  returnValue += squareBuffer[3];
134 
135  number = sixteenthPoints * 16;
136  for(;number < num_points; number++){
137  returnValue += (*aPtr) * (*aPtr);
138  newMean += *aPtr++;
139  }
140  newMean /= num_points;
141  returnValue /= num_points;
142  returnValue -= (newMean * newMean);
143  returnValue = sqrtf(returnValue);
144  }
145  *stddev = returnValue;
146  *mean = newMean;
147 }
148 #endif /* LV_HAVE_SSE4_1 */
149 
150 
151 #ifdef LV_HAVE_SSE
152 #include <xmmintrin.h>
153 
154 static inline void
155 volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* mean,
156  const float* inputBuffer,
157  unsigned int num_points)
158 {
159  float returnValue = 0;
160  float newMean = 0;
161  if(num_points > 0){
162  unsigned int number = 0;
163  const unsigned int quarterPoints = num_points / 4;
164 
165  const float* aPtr = inputBuffer;
166  __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
167  __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
168 
169  __m128 accumulator = _mm_setzero_ps();
170  __m128 squareAccumulator = _mm_setzero_ps();
171  __m128 aVal = _mm_setzero_ps();
172  for(;number < quarterPoints; number++) {
173  aVal = _mm_load_ps(aPtr); // aVal = x
174  accumulator = _mm_add_ps(accumulator, aVal); // accumulator += x
175  aVal = _mm_mul_ps(aVal, aVal); // squareAccumulator += x^2
176  squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
177  aPtr += 4;
178  }
179  _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C container
180  _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back into the C container
181  newMean = meanBuffer[0];
182  newMean += meanBuffer[1];
183  newMean += meanBuffer[2];
184  newMean += meanBuffer[3];
185  returnValue = squareBuffer[0];
186  returnValue += squareBuffer[1];
187  returnValue += squareBuffer[2];
188  returnValue += squareBuffer[3];
189 
190  number = quarterPoints * 4;
191  for(;number < num_points; number++){
192  returnValue += (*aPtr) * (*aPtr);
193  newMean += *aPtr++;
194  }
195  newMean /= num_points;
196  returnValue /= num_points;
197  returnValue -= (newMean * newMean);
198  returnValue = sqrtf(returnValue);
199  }
200  *stddev = returnValue;
201  *mean = newMean;
202 }
203 #endif /* LV_HAVE_SSE */
204 
205 
206 #ifdef LV_HAVE_GENERIC
207 
208 static inline void
209 volk_32f_stddev_and_mean_32f_x2_generic(float* stddev, float* mean,
210  const float* inputBuffer,
211  unsigned int num_points)
212 {
213  float returnValue = 0;
214  float newMean = 0;
215  if(num_points > 0){
216  const float* aPtr = inputBuffer;
217  unsigned int number = 0;
218 
219  for(number = 0; number < num_points; number++){
220  returnValue += (*aPtr) * (*aPtr);
221  newMean += *aPtr++;
222  }
223  newMean /= num_points;
224  returnValue /= num_points;
225  returnValue -= (newMean * newMean);
226  returnValue = sqrtf(returnValue);
227  }
228  *stddev = returnValue;
229  *mean = newMean;
230 }
231 #endif /* LV_HAVE_GENERIC */
232 
233 
234 
235 
236 #endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27