Vector Optimized Library of Kernels  3.1.2
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
10 #ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11 #define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12 
13 #include <string.h>
14 #include <volk/volk.h>
16 
17 typedef union {
18  // decision_t is a BIT vector
19  unsigned char* t;
20  unsigned int* w;
21 } p_decision_t;
22 
23 static inline int parity(int x, unsigned char* Partab)
24 {
25  x ^= (x >> 16);
26  x ^= (x >> 8);
27  return Partab[x];
28 }
29 
30 static inline int chainback_viterbi(unsigned char* data,
31  unsigned int nbits,
32  unsigned int endstate,
33  unsigned int tailsize,
34  unsigned char* decisions)
35 {
36  unsigned char* d;
37  int d_ADDSHIFT = 0;
38  int d_numstates = (1 << 6);
39  int d_decision_t_size = d_numstates / 8;
40  unsigned int d_k = 7;
41  int d_framebits = nbits;
42  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43  d = decisions;
44  /* Make room beyond the end of the encoder register so we can
45  * accumulate a full byte of decoded data
46  */
47 
48  endstate = (endstate % d_numstates) << d_ADDSHIFT;
49 
50  /* The store into data[] only needs to be done every 8 bits.
51  * But this avoids a conditional branch, and the writes will
52  * combine in the cache anyway
53  */
54 
55  d += tailsize * d_decision_t_size; /* Look past tail */
56  int retval;
57  int dif = tailsize - (d_k - 1);
58  // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59  p_decision_t dec;
60  while (nbits-- > d_framebits - (d_k - 1)) {
61  int k;
62  dec.t = &d[nbits * d_decision_t_size];
63  k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64 
65  endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66  // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67  // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68  data[((nbits + dif) % d_framebits)] = k;
69 
70  retval = endstate;
71  }
72  nbits += 1;
73 
74  while (nbits-- != 0) {
75  int k;
76 
77  dec.t = &d[nbits * d_decision_t_size];
78 
79  k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80 
81  endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82  data[((nbits + dif) % d_framebits)] = k;
83  }
84  // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85  // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86 
87 
88  return retval >> d_ADDSHIFT;
89 }
90 
91 
92 #if LV_HAVE_SSE3
93 
94 #include <emmintrin.h>
95 #include <mmintrin.h>
96 #include <pmmintrin.h>
97 #include <stdio.h>
98 #include <xmmintrin.h>
99 
100 static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* dec,
101  unsigned char* syms,
102  unsigned int framebits)
103 {
104  if (framebits < 12) {
105  return;
106  }
107 
108  static int once = 1;
109  int d_numstates = (1 << 6);
110  int rate = 2;
111  static unsigned char* D;
112  static unsigned char* Y;
113  static unsigned char* X;
114  static unsigned int excess = 6;
115  static unsigned char* Branchtab;
116  static unsigned char Partab[256];
117 
118  int d_polys[2] = { 79, 109 };
119 
120 
121  if (once) {
122 
123  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
124  Y = X + d_numstates;
125  Branchtab =
126  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
127  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
129  int state, i;
130  int cnt, ti;
131 
132  /* Initialize parity lookup table */
133  for (i = 0; i < 256; i++) {
134  cnt = 0;
135  ti = i;
136  while (ti) {
137  if (ti & 1)
138  cnt++;
139  ti >>= 1;
140  }
141  Partab[i] = cnt & 1;
142  }
143  /* Initialize the branch table */
144  for (state = 0; state < d_numstates / 2; state++) {
145  for (i = 0; i < rate; i++) {
146  Branchtab[i * d_numstates / 2 + state] =
147  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
148  }
149  }
150 
151  once = 0;
152  }
153 
154  // unbias the old_metrics
155  memset(X, 31, d_numstates);
156 
157  // initialize decisions
158  memset(D, 0, (d_numstates / 8) * (framebits + 6));
159 
161  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
162 
163  unsigned int min = X[0];
164  int i = 0, state = 0;
165  for (i = 0; i < (d_numstates); ++i) {
166  if (X[i] < min) {
167  min = X[i];
168  state = i;
169  }
170  }
171 
172  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
173 
174  return;
175 }
176 
177 #endif /*LV_HAVE_SSE3*/
178 
179 
180 #if LV_HAVE_NEON
181 
182 static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* dec,
183  unsigned char* syms,
184  unsigned int framebits)
185 {
186  if (framebits < 12) {
187  return;
188  }
189 
190  static int once = 1;
191  int d_numstates = (1 << 6);
192  int rate = 2;
193  static unsigned char* D;
194  static unsigned char* Y;
195  static unsigned char* X;
196  static unsigned int excess = 6;
197  static unsigned char* Branchtab;
198  static unsigned char Partab[256];
199 
200  int d_polys[2] = { 79, 109 };
201 
202 
203  if (once) {
204 
205  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
206  Y = X + d_numstates;
207  Branchtab =
208  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
209  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
211  int state, i;
212  int cnt, ti;
213 
214  /* Initialize parity lookup table */
215  for (i = 0; i < 256; i++) {
216  cnt = 0;
217  ti = i;
218  while (ti) {
219  if (ti & 1)
220  cnt++;
221  ti >>= 1;
222  }
223  Partab[i] = cnt & 1;
224  }
225  /* Initialize the branch table */
226  for (state = 0; state < d_numstates / 2; state++) {
227  for (i = 0; i < rate; i++) {
228  Branchtab[i * d_numstates / 2 + state] =
229  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
230  }
231  }
232 
233  once = 0;
234  }
235 
236  // unbias the old_metrics
237  memset(X, 31, d_numstates);
238 
239  // initialize decisions
240  memset(D, 0, (d_numstates / 8) * (framebits + 6));
241 
243  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
244 
245  unsigned int min = X[0];
246  int i = 0, state = 0;
247  for (i = 0; i < (d_numstates); ++i) {
248  if (X[i] < min) {
249  min = X[i];
250  state = i;
251  }
252  }
253 
254  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
255 
256  return;
257 }
258 
259 #endif /*LV_HAVE_NEON*/
260 
261 
262 #if LV_HAVE_AVX2
263 
264 #include <immintrin.h>
265 #include <stdio.h>
266 
267 static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* dec,
268  unsigned char* syms,
269  unsigned int framebits)
270 {
271  if (framebits < 12) {
272  return;
273  }
274 
275  static int once = 1;
276  int d_numstates = (1 << 6);
277  int rate = 2;
278  static unsigned char* D;
279  static unsigned char* Y;
280  static unsigned char* X;
281  static unsigned int excess = 6;
282  static unsigned char* Branchtab;
283  static unsigned char Partab[256];
284 
285  int d_polys[2] = { 79, 109 };
286 
287 
288  if (once) {
289 
290  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
291  Y = X + d_numstates;
292  Branchtab =
293  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
294  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
296  int state, i;
297  int cnt, ti;
298 
299  /* Initialize parity lookup table */
300  for (i = 0; i < 256; i++) {
301  cnt = 0;
302  ti = i;
303  while (ti) {
304  if (ti & 1)
305  cnt++;
306  ti >>= 1;
307  }
308  Partab[i] = cnt & 1;
309  }
310  /* Initialize the branch table */
311  for (state = 0; state < d_numstates / 2; state++) {
312  for (i = 0; i < rate; i++) {
313  Branchtab[i * d_numstates / 2 + state] =
314  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
315  }
316  }
317 
318  once = 0;
319  }
320 
321  // unbias the old_metrics
322  memset(X, 31, d_numstates);
323 
324  // initialize decisions
325  memset(D, 0, (d_numstates / 8) * (framebits + 6));
326 
327  volk_8u_x4_conv_k7_r2_8u_avx2(
328  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
329 
330  unsigned int min = X[0];
331  int i = 0, state = 0;
332  for (i = 0; i < (d_numstates); ++i) {
333  if (X[i] < min) {
334  min = X[i];
335  state = i;
336  }
337  }
338 
339  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
340 
341  return;
342 }
343 
344 #endif /*LV_HAVE_AVX2*/
345 
346 
347 #if LV_HAVE_GENERIC
348 
349 
350 static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* dec,
351  unsigned char* syms,
352  unsigned int framebits)
353 {
354  if (framebits < 12) {
355  return;
356  }
357 
358  static int once = 1;
359  int d_numstates = (1 << 6);
360  int rate = 2;
361  static unsigned char* Y;
362  static unsigned char* X;
363  static unsigned char* D;
364  static unsigned int excess = 6;
365  static unsigned char* Branchtab;
366  static unsigned char Partab[256];
367 
368  int d_polys[2] = { 79, 109 };
369 
370 
371  if (once) {
372 
373  X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
374  Y = X + d_numstates;
375  Branchtab =
376  (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
377  D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
379 
380  int state, i;
381  int cnt, ti;
382 
383  /* Initialize parity lookup table */
384  for (i = 0; i < 256; i++) {
385  cnt = 0;
386  ti = i;
387  while (ti) {
388  if (ti & 1)
389  cnt++;
390  ti >>= 1;
391  }
392  Partab[i] = cnt & 1;
393  }
394  /* Initialize the branch table */
395  for (state = 0; state < d_numstates / 2; state++) {
396  for (i = 0; i < rate; i++) {
397  Branchtab[i * d_numstates / 2 + state] =
398  parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
399  }
400  }
401 
402  once = 0;
403  }
404 
405  // unbias the old_metrics
406  memset(X, 31, d_numstates);
407 
408  // initialize decisions
409  memset(D, 0, (d_numstates / 8) * (framebits + 6));
410 
412  Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
413 
414  unsigned int min = X[0];
415  int i = 0, state = 0;
416  for (i = 0; i < (d_numstates); ++i) {
417  if (X[i] < min) {
418  min = X[i];
419  state = i;
420  }
421  }
422 
423  chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
424 
425  return;
426 }
427 
428 #endif /* LV_HAVE_GENERIC */
429 
430 #endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
static void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:182
unsigned char * t
Definition: volk_8u_conv_k7_r2puppet_8u.h:19
size_t volk_get_alignment(void)
Get the machine alignment in bytes.
Definition: volk.tmpl.c:90
static void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:100
Definition: volk_8u_conv_k7_r2puppet_8u.h:17
static void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:208
static void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:439
static int chainback_viterbi(unsigned char *data, unsigned int nbits, unsigned int endstate, unsigned int tailsize, unsigned char *decisions)
Definition: volk_8u_conv_k7_r2puppet_8u.h:30
static void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char *dec, unsigned char *syms, unsigned int framebits)
Definition: volk_8u_conv_k7_r2puppet_8u.h:350
for i
Definition: volk_config_fixed.tmpl.h:13
__VOLK_DECL_BEGIN VOLK_API void * volk_malloc(size_t size, size_t alignment)
Allocate size bytes of data aligned to alignment.
Definition: volk_malloc.c:38
static void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char *Y, unsigned char *X, unsigned char *syms, unsigned char *dec, unsigned int framebits, unsigned int excess, unsigned char *Branchtab)
Definition: volk_8u_x4_conv_k7_r2_8u.h:306
tuple data
Definition: plot_best_vs_generic.py:23
static int parity(int x, unsigned char *Partab)
Definition: volk_8u_conv_k7_r2puppet_8u.h:23
unsigned int * w
Definition: volk_8u_conv_k7_r2puppet_8u.h:20