rpm  5.4.15
set.c
Go to the documentation of this file.
1 /*
2  * set.c - base62, golomb and set-string routines
3  *
4  * Copyright (C) 2010, 2011, 2012 Alexey Tourbin <at@altlinux.org>
5  *
6  * License: GPLv2+ or LGPL, see RPM COPYING
7  */
8 
9 /* XXX FIXME: avoid <fcntl.h> borkage on RHEL for now. */
10 #define _FCNTL_H 1
11 
12 #include "system.h"
13 
14 #include <rpmio.h>
15 #ifdef SELF_TEST
16 #include <poptIO.h>
17 #endif
18 
19 #define _RPMSET_INTERNAL
20 #include "set.h"
21 
22 #include "debug.h"
23 
24 int _rpmset_debug = 0;
25 
26 /*==============================================================*/
27 
28 /*
29  * Base62 routines - encode bits with alnum characters.
30  *
31  * This is a base64-based base62 implementation. Values 0..61 are encoded
32  * with '0'..'9', 'a'..'z', and 'A'..'Z'. However, 'Z' is special: it will
33  * also encode 62 and 63. To achieve this, 'Z' will occupy two high bits in
34  * the next character. Thus 'Z' can be interpreted as an escape character
35  * (which indicates that the next character must be handled specially).
36  * Note that setting high bits to "00", "01" or "10" cannot contribute
37  * to another 'Z' (which would require high bits set to "11"). This is
38  * how multiple escapes are avoided.
39  */
40 
41 /* Estimate base62 buffer size required to encode a given number of bits. */
42 static inline
43 int encode_base62_size(int bitc)
44 {
45  /*
46  * In the worst case, which is ZxZxZx..., five bits can make a character;
47  * the remaining bits can make a character, too. And the string must be
48  * null-terminated.
49  */
50  return bitc / 5 + 2;
51 }
52 
53  static
54  void put_digit(char *base62, int c)
55  {
56  assert(c >= 0 && c <= 61);
57  if (c < 10)
58  *base62++ = c + '0';
59  else if (c < 36)
60  *base62++ = c - 10 + 'a';
61  else if (c < 62)
62  *base62++ = c - 36 + 'A';
63  }
64 
65 /* Main base62 encoding routine: pack bitv into base62 string. */
66 static
67 int encode_base62(int bitc, const char *bitv, char *base62)
68 {
69  char *base62_start = base62;
70  int bits2 = 0; /* number of high bits set */
71  int bits6 = 0; /* number of regular bits set */
72  int num6b = 0; /* pending 6-bit number */
73  while (bitc-- > 0) {
74  num6b |= (*bitv++ << bits6++);
75  if (bits6 + bits2 < 6)
76  continue;
77  switch (num6b) {
78  case 61:
79  /* escape */
80  put_digit(base62, 61);
81  /* extra "00...." high bits (in the next character) */
82  bits2 = 2;
83  bits6 = 0;
84  num6b = 0;
85  break;
86  case 62:
87  put_digit(base62, 61);
88  /* extra "01...." high bits */
89  bits2 = 2;
90  bits6 = 0;
91  num6b = 16;
92  break;
93  case 63:
94  put_digit(base62, 61);
95  /* extra "10...." high bits */
96  bits2 = 2;
97  bits6 = 0;
98  num6b = 32;
99  break;
100  default:
101  assert(num6b < 61);
102  put_digit(base62, num6b);
103  bits2 = 0;
104  bits6 = 0;
105  num6b = 0;
106  break;
107  }
108  }
109  if (bits6 + bits2) {
110  assert(num6b < 61);
111  put_digit(base62, num6b);
112  }
113  *base62 = '\0';
114  return base62 - base62_start;
115 }
116 
117 /* Estimate how many bits will result from decoding a base62 string. */
118 static inline
120 {
121  /* Each character will fill at most 6 bits. */
122  return len * 6;
123 }
124 
125 /* This table maps alnum characters to their numeric values. */
126 static
127 const int char_to_num[256] = {
128  [0 ... 255] = 0xee,
129  [0] = 0xff,
130 #define C1(c, b) [c] = c - b
131 #define C2(c, b) C1(c, b), C1(c + 1, b)
132 #define C5(c, b) C1(c, b), C2(c + 1, b), C2(c + 3, b)
133 #define C10(c, b) C5(c, b), C5(c + 5, b)
134  C10('0', '0'),
135 #define C26(c, b) C1(c, b), C5(c + 1, b), C10(c + 6, b), C10(c + 16, b)
136  C26('a', 'a' + 10),
137  C26('A', 'A' + 36),
138 };
139 
140  static inline
141  void put6bits(char *bitv, int c)
142  {
143  *bitv++ = (c >> 0) & 1;
144  *bitv++ = (c >> 1) & 1;
145  *bitv++ = (c >> 2) & 1;
146  *bitv++ = (c >> 3) & 1;
147  *bitv++ = (c >> 4) & 1;
148  *bitv++ = (c >> 5) & 1;
149  }
150  static inline
151  void put4bits(char *bitv, int c)
152  {
153  *bitv++ = (c >> 0) & 1;
154  *bitv++ = (c >> 1) & 1;
155  *bitv++ = (c >> 2) & 1;
156  *bitv++ = (c >> 3) & 1;
157  }
158 
159 /* Main base62 decoding routine: unpack base62 string into bitv[]. */
160 static
161 int decode_base62(const char *base62, char *bitv)
162 {
163  char *bitv_start = bitv;
164  while (1) {
165  long c = (unsigned char) *base62++;
166  int num6b = char_to_num[c];
167  while (num6b < 61) {
168  put6bits(bitv, num6b);
169  c = (unsigned char) *base62++;
170  num6b = char_to_num[c];
171  }
172  if (num6b == 0xff)
173  break;
174  if (num6b == 0xee)
175  return -1;
176  assert(num6b == 61);
177  c = (unsigned char) *base62++;
178  int num4b = char_to_num[c];
179  if (num4b == 0xff)
180  return -2;
181  if (num4b == 0xee)
182  return -3;
183  switch (num4b & (16 + 32)) {
184  case 0:
185  break;
186  case 16:
187  num6b = 62;
188  num4b &= ~16;
189  break;
190  case 32:
191  num6b = 63;
192  num4b &= ~32;
193  break;
194  default:
195  return -4;
196  }
197  put6bits(bitv, num6b);
198  put4bits(bitv, num4b);
199  }
200  return bitv - bitv_start;
201 }
202 
203 #ifdef SELF_TEST
204 static
205 void test_base62(void)
206 {
207  const char rnd_bitv[] = {
208  1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
209  1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1,
210  0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
211  0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
212  /* trigger some 'Z' */
213  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
214  };
215  const int rnd_bitc = sizeof rnd_bitv;
216  /* encode */
217  char base62[encode_base62_size(rnd_bitc)];
218  int len = encode_base62(rnd_bitc, rnd_bitv, base62);
219  assert(len > 0);
220  assert(len == (int)strlen(base62));
221  fprintf(stderr, "len=%d base62=%s\n", len, base62);
222  /* The length cannot be shorter than 6 bits per symbol. */
223  assert(len >= rnd_bitc / 6);
224  /* Neither too long: each second character must fill at least 4 bits. */
225  assert(len <= rnd_bitc / 2 / 4 + rnd_bitc / 2 / 6 + 1);
226  /* decode */
227  char bitv[decode_base62_size(len)];
228  int bitc = decode_base62(base62, bitv);
229  fprintf(stderr, "rnd_bitc=%d bitc=%d\n", rnd_bitc, bitc);
230  assert(bitc >= rnd_bitc);
231  /* Decoded bits must match. */
232  int i;
233  for (i = 0; i < rnd_bitc; i++)
234  assert(rnd_bitv[i] == bitv[i]);
235  // The remaining bits must be zero bits.
236  for (i = rnd_bitc; i < bitc; i++)
237  assert(bitv[i] == 0);
238  fprintf(stderr, "%s: base62 test OK\n", __FILE__);
239 }
240 #endif
241 
242 /*
243  * Golomb-Rice routines - compress integer values into bits.
244  *
245  * The idea is as follows. Input values are assumed to be small integers.
246  * Each value is split into two parts: an integer resulting from its higher
247  * bits and an integer resulting from its lower bits (with the number of lower
248  * bits specified by the Mshift parameter). The frist integer is then stored
249  * in unary coding (which is a variable-length sequence of '0' followed by a
250  * terminating '1'); the second part is stored in normal binary coding (using
251  * Mshift bits).
252  *
253  * The method is justified by the fact that, since most of the values are
254  * small, their first parts will be short (typically 1..3 bits). In particular,
255  * the method is known to be optimal for uniformly distributed hash values,
256  * after the values are sorted and delta-encoded. See e.g.
257  * Putze, F.; Sanders, P.; Singler, J. (2007),
258  * "Cache-, Hash- and Space-Efficient Bloom Filters",
259  * http://algo2.iti.uni-karlsruhe.de/singler/publications/cacheefficientbloomfilters-wea2007.pdf
260  */
261 
262  static
263  int log2i(int n)
264  {
265  int m = 0;
266  while (n >>= 1)
267  m++;
268  return m;
269  }
270 
271 /* Calculate Mshift paramter for encoding. */
272 static
273 int encode_golomb_Mshift(int c, int bpp)
274 {
275  /*
276  * XXX Slightly better Mshift estimations are probably possible.
277  * Recheck "Compression and coding algorithms" by Moffat & Turpin.
278  */
279  int Mshift = bpp - log2i(c) - 1;
280  /* Adjust out-of-range values. */
281  if (Mshift < 7)
282  Mshift = 7;
283  if (Mshift > 31)
284  Mshift = 31;
285  assert(Mshift < bpp);
286  return Mshift;
287 }
288 
289 /* Estimate how many bits can be filled up. */
290 static inline
291 int encode_golomb_size(int c, int Mshift)
292 {
293  /*
294  * XXX No precise estimation. However, we do not expect unary-encoded bits
295  * to take more than binary-encoded Mshift bits.
296  */
297  return Mshift * 2 * c + 16;
298 }
299 
300 /* Main golomb encoding routine: package integers into bits. */
301 static
302 int encode_golomb(int c, const unsigned *v, int Mshift, char *bitv)
303 {
304  char *bitv_start = bitv;
305  const unsigned mask = (1 << Mshift) - 1;
306  while (c > 0) {
307  c--;
308  unsigned v0 = *v++;
309  int i;
310  /* first part: variable-length sequence */
311  unsigned q = v0 >> Mshift;
312  for (i = 0; i < (int)q; i++)
313  *bitv++ = 0;
314  *bitv++ = 1;
315  /* second part: lower Mshift bits */
316  unsigned r = v0 & mask;
317  for (i = 0; i < Mshift; i++)
318  *bitv++ = (r >> i) & 1;
319  }
320  return bitv - bitv_start;
321 }
322 
323 /* Estimate how many values will emerge. */
324 static inline
325 int decode_golomb_size(int bitc, int Mshift)
326 {
327  /*
328  * Each (Mshift + 1) bits can make a value.
329  * The remaining bits cannot make a value, though.
330  */
331  return bitc / (Mshift + 1);
332 }
333 
334 /* Main golomb decoding routine: unpackage bits into values. */
335 static
336 int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
337 {
338  unsigned *v_start = v;
339  /* next value */
340  while (bitc > 0) {
341  /* first part */
342  unsigned q = 0;
343  char bit = 0;
344  while (bitc > 0) {
345  bitc--;
346  bit = *bitv++;
347  if (bit == 0)
348  q++;
349  else
350  break;
351  }
352  /* trailing zero bits in the input are okay */
353  if (bitc == 0 && bit == 0) {
354  /* up to 5 bits can be used to complete last character */
355  if (q > 5)
356  return -10;
357  break;
358  }
359  /* otherwise, incomplete value is not okay */
360  if (bitc < Mshift)
361  return -11;
362  /* second part */
363  unsigned r = 0;
364  int i;
365  for (i = 0; i < Mshift; i++) {
366  bitc--;
367  if (*bitv++)
368  r |= (1 << i);
369  }
370  /* the value */
371  *v++ = (q << Mshift) | r;
372  }
373  return v - v_start;
374 }
375 
376 #ifdef SELF_TEST
377 static
378 void test_golomb(void)
379 {
380  const unsigned rnd_v[] = {
381  /* do re mi fa sol la si */
382  1, 2, 3, 4, 5, 6, 7,
383  /* koshka sela na taksi */
384  7, 6, 5, 4, 3, 2, 1,
385  };
386  const int rnd_c = sizeof rnd_v / sizeof *rnd_v;
387  int bpp = 10;
388  int Mshift = encode_golomb_Mshift(rnd_c, bpp);
389  fprintf(stderr, "rnd_c=%d bpp=%d Mshift=%d\n", rnd_c, bpp, Mshift);
390  assert(Mshift > 0);
391  assert(Mshift < bpp);
392  /* encode */
393  int alloc_bitc = encode_golomb_size(rnd_c, Mshift);
394  assert(alloc_bitc > rnd_c);
395  char bitv[alloc_bitc];
396  int bitc = encode_golomb(rnd_c, rnd_v, Mshift, bitv);
397  fprintf(stderr, "alloc_bitc=%d bitc=%d\n", alloc_bitc, bitc);
398  assert(bitc > rnd_c);
399  assert(bitc <= alloc_bitc);
400  /* decode */
401  int alloc_c = decode_golomb_size(bitc, Mshift);
402  assert(alloc_c >= rnd_c);
403  unsigned v[alloc_c];
404  int c = decode_golomb(bitc, bitv, Mshift, v);
405  fprintf(stderr, "rnd_c=%d alloc_c=%d c=%d\n", rnd_c, alloc_c, c);
406  assert(alloc_c >= c);
407  /* Decoded values must match. */
408  assert(rnd_c == c);
409  int i;
410  for (i = 0; i < c; i++)
411  assert(rnd_v[i] == v[i]);
412  /* At the end of the day, did it save your money? */
413  int golomb_bpp = bitc / c;
414  fprintf(stderr, "bpp=%d golomb_bpp=%d\n", bpp, golomb_bpp);
415  assert(golomb_bpp < bpp);
416  fprintf(stderr, "%s: golomb test OK\n", __FILE__);
417 }
418 #endif
419 
420 /*
421  * Combined base62+gololb decoding routine - implemented for efficiency.
422  *
423  * As Dmitry V. Levin once noticed, when it comes to speed, very few objections
424  * can be made against complicating the code. Which reminds me of Karl Marx,
425  * who said that there is not a crime at which a capitalist will scruple for
426  * the sake of 300 per cent profit, even at the chance of being hanged. Anyway,
427  * here Alexey Tourbin demonstrates that by using sophisticated - or should he
428  * say "ridiculously complicated" - techniques it is indeed possible to gain
429  * some profit, albeit of another kind.
430  */
431 
432 /* Word types (when two bytes from base62 string cast to unsigned short). */
433 enum {
434  W_AA = 0x0000,
435  W_AZ = 0x1000,
436  W_ZA = 0x2000,
437  W_A0 = 0x3000,
438  W_0X = 0x4000,
439  W_EE = 0xeeee,
440 };
441 
442 /* Combine two characters into array index (with respect to endianness). */
443 #if BYTE_ORDER && BYTE_ORDER == LITTLE_ENDIAN
444 #define CCI(c1, c2) ((c1) | ((c2) << 8))
445 #elif BYTE_ORDER && BYTE_ORDER == BIG_ENDIAN
446 #define CCI(c1, c2) ((c2) | ((c1) << 8))
447 #else
448 #error "unknown byte order"
449 #endif
450 
451 /* Maps base62 word into numeric value (decoded bits) ORed with word type. */
452 static
453 const unsigned short word_to_num[65536] = {
454  [0 ... 65535] = W_EE,
455 #define AA1(c1, c2, b1, b2) [CCI(c1, c2)] = (c1 - b1) | ((c2 - b2) << 6)
456 #define AA1x2(c1, c2, b1, b2) AA1(c1, c2, b1, b2), AA1(c1, c2 + 1, b1, b2)
457 #define AA1x3(c1, c2, b1, b2) AA1(c1, c2, b1, b2), AA1x2(c1, c2 + 1, b1, b2)
458 #define AA1x5(c1, c2, b1, b2) AA1x2(c1, c2, b1, b2), AA1x3(c1, c2 + 2, b1, b2)
459 #define AA1x10(c1, c2, b1, b2) AA1x5(c1, c2, b1, b2), AA1x5(c1, c2 + 5, b1, b2)
460 #define AA1x20(c1, c2, b1, b2) AA1x10(c1, c2, b1, b2), AA1x10(c1, c2 + 10, b1, b2)
461 #define AA1x25(c1, c2, b1, b2) AA1x5(c1, c2, b1, b2), AA1x20(c1, c2 + 5, b1, b2)
462 #define AA2x1(c1, c2, b1, b2) AA1(c1, c2, b1, b2), AA1(c1 + 1, c2, b1, b2)
463 #define AA3x1(c1, c2, b1, b2) AA1(c1, c2, b1, b2), AA2x1(c1 + 1, c2, b1, b2)
464 #define AA5x1(c1, c2, b1, b2) AA2x1(c1, c2, b1, b2), AA3x1(c1 + 2, c2, b1, b2)
465 #define AA10x1(c1, c2, b1, b2) AA5x1(c1, c2, b1, b2), AA5x1(c1 + 5, c2, b1, b2)
466 #define AA20x1(c1, c2, b1, b2) AA10x1(c1, c2, b1, b2), AA10x1(c1 + 10, c2, b1, b2)
467 #define AA25x1(c1, c2, b1, b2) AA5x1(c1, c2, b1, b2), AA20x1(c1 + 5, c2, b1, b2)
468 #define AA26x1(c1, c2, b1, b2) AA1(c1, c2, b1, b2), AA25x1(c1 + 1, c2, b1, b2)
469 #define AA2x5(c1, c2, b1, b2) AA1x5(c1, c2, b1, b2), AA1x5(c1 + 1, c2, b1, b2)
470 #define AA3x5(c1, c2, b1, b2) AA1x5(c1, c2, b1, b2), AA2x5(c1 + 1, c2, b1, b2)
471 #define AA5x5(c1, c2, b1, b2) AA2x5(c1, c2, b1, b2), AA3x5(c1 + 2, c2, b1, b2)
472 #define AA5x10(c1, c2, b1, b2) AA5x5(c1, c2, b1, b2), AA5x5(c1, c2 + 5, b1, b2)
473 #define AA10x5(c1, c2, b1, b2) AA5x5(c1, c2, b1, b2), AA5x5(c1 + 5, c2, b1, b2)
474 #define AA20x5(c1, c2, b1, b2) AA10x5(c1, c2, b1, b2), AA10x5(c1 + 10, c2, b1, b2)
475 #define AA25x5(c1, c2, b1, b2) AA5x5(c1, c2, b1, b2), AA20x5(c1 + 5, c2, b1, b2)
476 #define AA10x10(c1, c2, b1, b2) AA5x10(c1, c2, b1, b2), AA5x10(c1 + 5, c2, b1, b2)
477 #define AA10x20(c1, c2, b1, b2) AA10x10(c1, c2, b1, b2), AA10x10(c1, c2 + 10, b1, b2)
478 #define AA10x25(c1, c2, b1, b2) AA10x5(c1, c2, b1, b2), AA10x20(c1, c2 + 5, b1, b2)
479 #define AA10x26(c1, c2, b1, b2) AA10x1(c1, c2, b1, b2), AA10x25(c1, c2 + 1, b1, b2)
480 #define AA20x10(c1, c2, b1, b2) AA10x10(c1, c2, b1, b2), AA10x10(c1 + 10, c2, b1, b2)
481 #define AA25x10(c1, c2, b1, b2) AA5x10(c1, c2, b1, b2), AA20x10(c1 + 5, c2, b1, b2)
482 #define AA26x10(c1, c2, b1, b2) AA1x10(c1, c2, b1, b2), AA25x10(c1 + 1, c2, b1, b2)
483 #define AA25x20(c1, c2, b1, b2) AA25x10(c1, c2, b1, b2), AA25x10(c1, c2 + 10, b1, b2)
484 #define AA25x25(c1, c2, b1, b2) AA25x5(c1, c2, b1, b2), AA25x20(c1, c2 + 5, b1, b2)
485 #define AA25x26(c1, c2, b1, b2) AA25x1(c1, c2, b1, b2), AA25x25(c1, c2 + 1, b1, b2)
486 #define AA26x25(c1, c2, b1, b2) AA1x25(c1, c2, b1, b2), AA25x25(c1 + 1, c2, b1, b2)
487 #define AA26x26(c1, c2, b1, b2) AA26x1(c1, c2, b1, b2), AA26x25(c1, c2 + 1, b1, b2)
488  AA10x10('0', '0', '0', '0'),
489  AA10x26('0', 'a', '0', 'a' + 10),
490  AA10x25('0', 'A', '0', 'A' + 36),
491  AA26x10('a', '0', 'a' + 10, '0'),
492  AA25x10('A', '0', 'A' + 36, '0'),
493  AA26x26('a', 'a', 'a' + 10, 'a' + 10),
494  AA26x25('a', 'A', 'a' + 10, 'A' + 36),
495  AA25x26('A', 'a', 'A' + 36, 'a' + 10),
496  AA25x25('A', 'A', 'A' + 36, 'A' + 36),
497 #define AZ1(c, b) [CCI(c, 'Z')] = (c - b) | W_AZ
498 #define AZ2(c, b) AZ1(c, b), AZ1(c + 1, b)
499 #define AZ5(c, b) AZ1(c, b), AZ2(c + 1, b), AZ2(c + 3, b)
500 #define AZ10(c, b) AZ5(c, b), AZ5(c + 5, b)
501 #define AZ25(c, b) AZ5(c, b), AZ10(c + 5, b), AZ10(c + 15, b)
502 #define AZ26(c, b) AZ1(c, b), AZ25(c + 1, b)
503  AZ10('0', '0'),
504  AZ26('a', 'a' + 10),
505  AZ25('A', 'A' + 36),
506 #define ZA1(c, b) [CCI('Z', c)] = (61 + ((c - b) >> 4)) | (((c - b) & 0xf) << 6) | W_ZA
507 #define ZA2(c, b) ZA1(c, b), ZA1(c + 1, b)
508 #define ZA5(c, b) ZA1(c, b), ZA2(c + 1, b), ZA2(c + 3, b)
509 #define ZA10(c, b) ZA5(c, b), ZA5(c + 5, b)
510 #define ZA25(c, b) ZA5(c, b), ZA10(c + 5, b), ZA10(c + 15, b)
511 #define ZA26(c, b) ZA1(c, b), ZA25(c + 1, b)
512  ZA10('0', '0'),
513  ZA26('a', 'a' + 10),
514  ZA25('A', 'A' + 36),
515 #define A01(c, b) [CCI(c, 0)] = (c - b) | W_A0
516 #define A02(c, b) A01(c, b), A01(c + 1, b)
517 #define A05(c, b) A01(c, b), A02(c + 1, b), A02(c + 3, b)
518 #define A010(c, b) A05(c, b), A05(c + 5, b)
519 #define A025(c, b) A05(c, b), A010(c + 5, b), A010(c + 15, b)
520 #define A026(c, b) A01(c, b), A025(c + 1, b)
521  A010('0', '0'),
522  A026('a', 'a' + 10),
523  A025('A', 'A' + 36),
524 #define OX(c) [CCI(0, c)] = W_0X
525 #define OX4(c) OX(c), OX(c + 1), OX(c + 2), OX(c + 3)
526 #define OX16(c) OX4(c), OX4(c + 4), OX4(c + 8), OX4(c + 12)
527 #define OX64(c) OX16(c), OX16(c + 16), OX16(c + 32), OX16(c + 48)
528 #define OX256(c) OX64(c), OX64(c + 64), OX64(c + 128), OX64(c + 192)
529  OX256('\0'),
530 };
531 
532 /* Combined base62+golomb decoding routine. */
533 static
534 int decode_base62_golomb(const char *base62, int Mshift, unsigned *v)
535 {
536  unsigned *v_start = v;
537  unsigned mask = (1 << Mshift) - 1;
538  unsigned q = 0;
539  unsigned r = 0;
540  int rfill = 0;
541  long c, w;
542  int n, vbits, left;
543  unsigned bits, morebits;
544  /* need align */
545  if (1 & (long) base62) {
546  c = (unsigned char) *base62++;
547  bits = char_to_num[c];
548  if (bits < 61)
549  goto put6q_align;
550  else {
551  if (bits == 0xff)
552  goto eolq;
553  if (bits == 0xee)
554  return -1;
555  assert(bits == 61);
556  goto esc1q;
557  }
558  }
559  /* regular mode, process two-byte words */
560 #define Get24(X) \
561  w = *(unsigned short *) base62; \
562  base62 += 2; \
563  bits = word_to_num[w]; \
564  if (bits >= 0x1000) \
565  goto gotNN ## X; \
566  w = *(unsigned short *) base62; \
567  base62 += 2; \
568  morebits = word_to_num[w]; \
569  if (morebits >= 0x1000) \
570  goto put12 ## X; \
571  bits |= (morebits << 12); \
572  goto put24 ## X
573 #define Get12(X) \
574  bits = morebits
575 #define GotNN(X) \
576  switch (bits & 0xf000) { \
577  case W_AZ: \
578  bits &= 0x0fff; \
579  goto put6 ## X ## _AZ; \
580  case W_ZA: \
581  bits &= 0x0fff; \
582  goto put10 ## X ## _ZA; \
583  case W_A0: \
584  bits &= 0x0fff; \
585  goto put6 ## X ## _A0; \
586  case W_0X: \
587  goto eol ## X; \
588  default: \
589  return -2; \
590  }
591  /* make coroutines */
592  get24q: Get24(q);
593  get24r: Get24(r);
594  get12q: Get12(q);
595  gotNNq: GotNN(q);
596  get12r: Get12(r);
597  gotNNr: GotNN(r);
598  /* escape mode, handle 2 bytes one by one */
599 #define Esc1(X) \
600  bits = 61; \
601  c = (unsigned char) *base62++; \
602  morebits = char_to_num[c]; \
603  if (morebits == 0xff) \
604  return -3; \
605  if (morebits == 0xee) \
606  return -4; \
607  switch (morebits & (16 + 32)) { \
608  case 0: \
609  break; \
610  case 16: \
611  bits = 62; \
612  morebits &= ~16; \
613  break; \
614  case 32: \
615  bits = 63; \
616  morebits &= ~32; \
617  break; \
618  default: \
619  return -5; \
620  } \
621  bits |= (morebits << 6); \
622  goto put10 ## X ## _esc1
623 #define Esc2(X) \
624  c = (unsigned char) *base62++; \
625  bits = char_to_num[c]; \
626  if (bits < 61) \
627  goto put6 ## X ## _esc2; \
628  else { \
629  if (bits == 0xff) \
630  goto eol ## X; \
631  if (bits == 0xee) \
632  return -6; \
633  goto esc1 ## X; \
634  }
635  /* make coroutines */
636  esc1q: Esc1(q);
637  esc2q: Esc2(q);
638  esc1r: Esc1(r);
639  esc2r: Esc2(r);
640  /* golomb pieces */
641 #define QInit(N) \
642  n = N
643 #define RInit(N) \
644  n = N; \
645  r |= (bits << rfill); \
646  rfill += n
647 #define RMake(Get) \
648  left = rfill - Mshift; \
649  if (left < 0) \
650  goto Get ## r; \
651  r &= mask; \
652  *v++ = (q << Mshift) | r; \
653  q = 0; \
654  bits >>= n - left; \
655  n = left
656 #define QMake(Get) \
657  if (bits == 0) { \
658  q += n; \
659  goto Get ## q; \
660  } \
661  vbits = __builtin_ffs(bits); \
662  n -= vbits; \
663  bits >>= vbits; \
664  q += vbits - 1; \
665  r = bits; \
666  rfill = n
667  /* this assumes that minimum Mshift value is 7 */
668 #define Put24Q(Get) \
669  QInit(24); \
670  QMake(Get); RMake(Get); \
671  QMake(Get); RMake(Get); \
672  QMake(Get); RMake(Get); \
673  goto Get ## q
674 #define Put24R(Get) \
675  RInit(24); \
676  RMake(Get); \
677  QMake(Get); RMake(Get); \
678  QMake(Get); RMake(Get); \
679  QMake(Get); goto Get ## r
680 #define Put12Q(Get) \
681  QInit(12); \
682  QMake(Get); RMake(Get); \
683  QMake(Get); goto Get ## r
684 #define Put12R(Get) \
685  RInit(12); \
686  RMake(Get); \
687  QMake(Get); RMake(Get); \
688  QMake(Get); goto Get ## r
689 #define Put10Q(Get) \
690  QInit(10); \
691  QMake(Get); RMake(Get); \
692  QMake(Get); goto Get ## r
693 #define Put10R(Get) \
694  RInit(10); \
695  RMake(Get); \
696  QMake(Get); RMake(Get); \
697  QMake(Get); goto Get ## r
698 #define Put6Q(Get) \
699  QInit(6); \
700  QMake(Get); goto Get ## r
701 #define Put6R(Get) \
702  RInit(6); \
703  RMake(Get); \
704  QMake(Get); goto Get ## r
705  /* make coroutines */
706  put24q: Put24Q(get24);
707  put24r: Put24R(get24);
708  put12q: Put12Q(get12);
709  put12r: Put12R(get12);
710  put6q_align:
711  put6q_esc2: Put6Q(get24);
712  put6r_esc2: Put6R(get24);
713  put6q_AZ: Put6Q(esc1);
714  put6r_AZ: Put6R(esc1);
715  put10q_esc1: Put10Q(esc2);
716  put10r_esc1: Put10R(esc2);
717  put10q_ZA: Put10Q(get24);
718  put10r_ZA: Put10R(get24);
719  put6q_A0: Put6Q(eol);
720  put6r_A0: Put6R(eol);
721  /* handle end of line and return */
722  eolq:
723  if (q > 5)
724  return -10;
725  return v - v_start;
726  eolr:
727  return -11;
728 }
729 
730 #ifdef SELF_TEST
731 static
732 void test_word_table(void)
733 {
734  int i, j;
735  for (i = 0; i < 256; i++)
736  for (j = 0; j < 256; j++) {
737  unsigned char u[2] __attribute__((aligned(2))) = { i, j };
738  unsigned short ix = *(unsigned short *) u;
739  int w = word_to_num[ix];
740  if (w < 0x1000)
741  assert(w == (char_to_num[i] | (char_to_num[j] << 6)));
742  else
743  assert(char_to_num[i] >= 61 || char_to_num[j] >= 61);
744  }
745  fprintf(stderr, "%s: word table test OK\n", __FILE__);
746 }
747 
748 static
749 void test_base62_golomb(void)
750 {
751  const char str[] = "set:hdf7q2P5VZwtLGr9TKxhrEM1";
752  const char *base62 = str + 4 + 2;
753  int Mshift = 10;
754  char bitv[256];
755  int bitc = decode_base62(base62, bitv);
756  assert(bitc > 0);
757  unsigned v1[32], v2[32];
758  int c1 = decode_golomb(bitc, bitv, Mshift, v1);
759  assert(c1 > 0);
760  int c2 = decode_base62_golomb(base62, Mshift, v2);
761  assert(c2 > 0);
762  assert(c1 == c2);
763  int i;
764  for (i = 0; i < c1; i++)
765  assert(v1[i] == v2[i]);
766  fprintf(stderr, "%s: base62_golomb test OK\n", __FILE__);
767 }
768 #endif
769 
770 /*
771  * Delta encoding routines - replace an increasing sequence of integer values
772  * by the sequence of their differences.
773  */
774 
775 static
776 void encode_delta(int c, unsigned *v)
777 {
778  assert(c > 0);
779  unsigned *v_end = v + c;
780  unsigned v0 = *v++;
781  while (v < v_end) {
782  *v -= v0;
783  v0 += *v++;
784  }
785 }
786 
787 static
788 void decode_delta(int c, unsigned *v)
789 {
790  assert(c > 0);
791  unsigned *v_end = v + c;
792  unsigned v0 = *v++;
793  while (v < v_end) {
794  *v += v0;
795  v0 = *v++;
796  }
797 }
798 
799 #ifdef SELF_TEST
800 static
801 void test_delta(void)
802 {
803  unsigned v[] = {
804  1, 3, 7, 0
805  };
806  int c = 3;
807  encode_delta(c, v);
808  assert(v[0] == 1);
809  assert(v[1] == 2);
810  assert(v[2] == 4);
811  assert(v[3] == 0);
812  decode_delta(c, v);
813  assert(v[0] == 1);
814  assert(v[1] == 3);
815  assert(v[2] == 7);
816  assert(v[3] == 0);
817  fprintf(stderr, "%s: delta test OK\n", __FILE__);
818 }
819 #endif
820 
821 /*
822  * Higher-level set-string routines - serialize integers into set-string.
823  *
824  * A set-string looks like this: "set:bMxyz..."
825  *
826  * The "set:" prefix marks set-versions in rpm (to distinguish them between
827  * regular rpm versions). It is assumed to be stripped here.
828  *
829  * The next two characters (denoted 'b' and 'M') encode two small integers
830  * in the range 7..32 using 'a'..'z'. The first character encodes bpp.
831  * Valid bpp range is 10..32. The second character encodes Mshift. Valid
832  * Mshift range is 7..31. Also, valid Mshift must be less than bpp.
833  *
834  * The rest ("xyz...") is a variable-length sequence of alnum characters.
835  * It encodes a (sorted) set of (non-negative) integer values, as follows:
836  * integers are delta-encoded, golomb-compressed and base62-serialized.
837  */
838 
839 static
840 int encode_set_size(int c, int bpp)
841 {
842  int Mshift = encode_golomb_Mshift(c, bpp);
843  int bitc = encode_golomb_size(c, Mshift);
844  /* two leading characters are special */
845  return 2 + encode_base62_size(bitc);
846 }
847 
848 static
849 int encode_set(int c, unsigned *v, int bpp, char *base62)
850 {
851  /* XXX v is non-const due to encode_delta */
852  int Mshift = encode_golomb_Mshift(c, bpp);
853  int bitc = encode_golomb_size(c, Mshift);
854  char bitv[bitc];
855  /* bpp */
856  if (bpp < 10 || bpp > 32)
857  return -1;
858  *base62++ = bpp - 7 + 'a';
859  /* golomb parameter */
860  if (Mshift < 7 || Mshift > 31)
861  return -2;
862  *base62++ = Mshift - 7 + 'a';
863  /* delta */
864  encode_delta(c, v);
865  /* golomb */
866  bitc = encode_golomb(c, v, Mshift, bitv);
867 #ifdef SELF_TEST
868  decode_delta(c, v);
869 #endif
870  if (bitc < 0)
871  return -3;
872  /* base62 */
873  int len = encode_base62(bitc, bitv, base62);
874  if (len < 0)
875  return -4;
876  return 2 + len;
877 }
878 
879 static
880 int decode_set_init(const char *str, int *pbpp, int *pMshift)
881 {
882  /* 7..32 values encoded with 'a'..'z' */
883  int bpp = *str++ + 7 - 'a';
884  if (bpp < 10 || bpp > 32)
885  return -1;
886  /* golomb parameter */
887  int Mshift = *str++ + 7 - 'a';
888  if (Mshift < 7 || Mshift > 31)
889  return -2;
890  if (Mshift >= bpp)
891  return -3;
892  /* no empty sets for now */
893  if (*str == '\0')
894  return -4;
895  *pbpp = bpp;
896  *pMshift = Mshift;
897  return 0;
898 }
899 
900 static inline
901 int decode_set_size(int len, int Mshift)
902 {
903  int bitc = decode_base62_size(len - 2);
904  return decode_golomb_size(bitc, Mshift);
905 }
906 
907 static
908 int decode_set(const char *str, int Mshift, unsigned *v)
909 {
910  const char *base62 = str + 2;
911  /* separate base62+golomb stages, for reference */
912  if (0) {
913  /* base62 */
914  int len = strlen(base62);
915  char bitv[decode_base62_size(len)];
916  int bitc = decode_base62(base62, bitv);
917  if (bitc < 0)
918  return bitc;
919  /* golomb */
920  int c = decode_golomb(bitc, bitv, Mshift, v);
921  if (c < 0)
922  return c;
923  /* delta */
924  decode_delta(c, v);
925  return c;
926  }
927  /* combined base62+golomb stage */
928  int c = decode_base62_golomb(base62, Mshift, v);
929  if (c < 0)
930  return c;
931  /* delta */
932  decode_delta(c, v);
933  return c;
934 }
935 
936 /* Special decode_set version with LRU caching. */
937 static
938 int cache_decode_set(const char *str, int Mshift, const unsigned **pv)
939 {
940  struct cache_ent {
941  char *str;
942  int len;
943  int c;
944  unsigned v[];
945  };
946 #define CACHE_SIZE 256
947 #define PIVOT_SIZE 243
948  static int hc;
949  static unsigned hv[CACHE_SIZE];
950  static struct cache_ent *ev[CACHE_SIZE];
951  /* look up in the cache */
952  int i;
953  unsigned *hp;
954  struct cache_ent *ent;
955  unsigned hash = str[0] | (str[2] << 8) | (str[3] << 16);
956  for (hp = hv; hp < hv + hc; hp++) {
957  if (hash == *hp) {
958  i = hp - hv;
959  ent = ev[i];
960  if (memcmp(str, ent->str, ent->len + 1) == 0) {
961  /* hit, move to front */
962  if (i) {
963  memmove(hv + 1, hv, i * sizeof(hv[0]));
964  memmove(ev + 1, ev, i * sizeof(ev[0]));
965  hv[0] = hash;
966  ev[0] = ent;
967  }
968  *pv = ent->v;
969  return ent->c;
970  }
971  }
972  }
973  /* decode */
974  int len = strlen(str);
975  int c = decode_set_size(len, Mshift);
976 #define SENTINELS 8
977  ent = malloc(sizeof(*ent) + len + 1 + (c + SENTINELS) * sizeof(unsigned));
978  assert(ent);
979  c = ent->c = decode_set(str, Mshift, ent->v);
980  if (c <= 0) {
981  free(ent);
982  return c;
983  }
984  for (i = 0; i < SENTINELS; i++)
985  ent->v[c + i] = ~0u;
986  ent->str = (char *)(ent->v + c + SENTINELS);
987  memcpy(ent->str, str, len + 1);
988  ent->len = len;
989  /* insert */
990  if (hc < CACHE_SIZE)
991  i = hc++;
992  else {
993  /* free last entry */
994  free(ev[CACHE_SIZE - 1]);
995  /* position at midpoint */
996  i = PIVOT_SIZE;
997  memmove(hv + i + 1, hv + i, (CACHE_SIZE - i - 1) * sizeof(hv[0]));
998  memmove(ev + i + 1, ev + i, (CACHE_SIZE - i - 1) * sizeof(ev[0]));
999  }
1000  hv[i] = hash;
1001  ev[i] = ent;
1002  *pv = ent->v;
1003  return c;
1004 }
1005 
1006 /* Reduce a set of (bpp + 1) values to a set of bpp values. */
1007 static
1008 int downsample_set(int c, const unsigned *v, unsigned *w, int bpp)
1009 {
1010  unsigned mask = (1 << bpp) - 1;
1011  /* find the first element with high bit set */
1012  int l = 0;
1013  int u = c;
1014  while (l < u) {
1015  int i = (l + u) / 2;
1016  if (v[i] <= mask)
1017  l = i + 1;
1018  else
1019  u = i;
1020  }
1021  /* initialize parts */
1022  const unsigned *w_start = w;
1023  const unsigned *v1 = v + 0, *v1end = v + u;
1024  const unsigned *v2 = v + u, *v2end = v + c;
1025  /* merge v1 and v2 into w */
1026  if (v1 < v1end && v2 < v2end) {
1027  unsigned v1val = *v1;
1028  unsigned v2val = *v2 & mask;
1029  while (1) {
1030  if (v1val < v2val) {
1031  *w++ = v1val;
1032  v1++;
1033  if (v1 == v1end)
1034  break;
1035  v1val = *v1;
1036  }
1037  else if (v2val < v1val) {
1038  *w++ = v2val;
1039  v2++;
1040  if (v2 == v2end)
1041  break;
1042  v2val = *v2 & mask;
1043  }
1044  else {
1045  *w++ = v1val;
1046  v1++;
1047  v2++;
1048  if (v1 == v1end)
1049  break;
1050  if (v2 == v2end)
1051  break;
1052  v1val = *v1;
1053  v2val = *v2 & mask;
1054  }
1055  }
1056  }
1057  /* append what's left */
1058  while (v1 < v1end)
1059  *w++ = *v1++;
1060  while (v2 < v2end)
1061  *w++ = *v2++ & mask;
1062  return w - w_start;
1063 }
1064 
1065 #ifdef SELF_TEST
1066 static
1067 void test_set(void)
1068 {
1069  unsigned rnd_v[] = {
1070  0x020a, 0x07e5, 0x3305, 0x35f5,
1071  0x4980, 0x4c4f, 0x74ef, 0x7739,
1072  0x82ae, 0x8415, 0xa3e7, 0xb07e,
1073  0xb584, 0xb89f, 0xbb40, 0xf39e,
1074  };
1075  int rnd_c = sizeof rnd_v / sizeof *rnd_v;
1076 
1077  /* encode */
1078  int bpp = 16;
1079  char base62[encode_set_size(rnd_c, bpp)];
1080  int len = encode_set(rnd_c, rnd_v, bpp, base62);
1081  assert(len > 0);
1082  fprintf(stderr, "len=%d set=%s\n", len, base62);
1083 
1084  /* decode */
1085  int Mshift = bpp;
1086  int rc = decode_set_init(base62, &bpp, &Mshift);
1087  assert(rc == 0);
1088  assert(bpp == 16);
1089  assert(Mshift < bpp);
1090  int c = decode_set_size(len, Mshift);
1091  assert(c >= rnd_c);
1092  unsigned vbuf[c];
1093  const unsigned *v = vbuf;
1094  c = decode_set(base62, Mshift, vbuf);
1095 
1096  /* Decoded values must match. */
1097  assert(c == rnd_c);
1098  int i;
1099  for (i = 0; i < c; i++)
1100  assert(v[i] == rnd_v[i]);
1101 
1102  /* Cached version. */
1103  c = cache_decode_set(base62, Mshift, &v);
1104  assert(c == rnd_c);
1105  for (i = 0; i < c; i++)
1106  assert(v[i] == rnd_v[i]);
1107  fprintf(stderr, "%s: set test OK\n", __FILE__);
1108 }
1109 #endif
1110 
1111 /*==============================================================*/
1112 
1113 int rpmsetCmp(const char * str1, const char * str2)
1114 {
1115  int rc = 0;
1116 
1117  if (strncmp(str1, "set:", 4) == 0)
1118  str1 += 4;
1119  if (strncmp(str2, "set:", 4) == 0)
1120  str2 += 4;
1121 
1122  /* initialize decoding */
1123  int bpp1, Mshift1;
1124  int bpp2, Mshift2;
1125  if (decode_set_init(str1, &bpp1, &Mshift1) < 0) {
1126  rc = -3;
1127 if (_rpmset_debug)
1128 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1129  return rc;
1130  }
1131  if (decode_set_init(str2, &bpp2, &Mshift2) < 0) {
1132  rc = -4;
1133 if (_rpmset_debug)
1134 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1135  return rc;
1136  }
1137 
1138  /* decode set1 (comes on behalf of provides) */
1139  const unsigned *v1 = NULL;
1140  int c1 = cache_decode_set(str1, Mshift1, &v1);
1141  if (c1 < 0) {
1142  rc = -3;
1143 if (_rpmset_debug)
1144 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1145  return rc;
1146  }
1147  unsigned v1bufA[c1 + 1];
1148  unsigned v1bufB[c1 + 1];
1149 
1150  /* decode set2 (on the stack) */
1151  int len2 = strlen(str2);
1152  int c2 = decode_set_size(len2, Mshift2);
1153  unsigned v2bufA[c2];
1154  unsigned v2bufB[c2];
1155  const unsigned *v2 = v2bufA;
1156  c2 = decode_set(str2, Mshift2, v2bufA);
1157  if (c2 < 0) {
1158  rc = -4;
1159 if (_rpmset_debug)
1160 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1161  return rc;
1162  }
1163 
1164  /* adjust for comparison */
1165  int i;
1166  while (bpp1 > bpp2) {
1167  unsigned *v1buf = v1bufA;
1168  if (v1 == v1buf)
1169  v1buf = v1bufB;
1170  bpp1--;
1171  c1 = downsample_set(c1, v1, v1buf, bpp1);
1172  for (i = 0; i < SENTINELS; i++)
1173  v1buf[c1 + i] = ~0u;
1174  v1 = v1buf;
1175  }
1176  while (bpp2 > bpp1) {
1177  unsigned *v2buf = v2bufA;
1178  if (v2 == v2buf)
1179  v2buf = v2bufB;
1180  bpp2--;
1181  c2 = downsample_set(c2, v2, v2buf, bpp2);
1182  v2 = v2buf;
1183  }
1184 
1185  /* compare */
1186  int ge = 1;
1187  int le = 1;
1188  const unsigned *v1end = v1 + c1;
1189  const unsigned *v2end = v2 + c2;
1190  for (i = 0; i < SENTINELS; i++)
1191  assert(v1end[i] == ~0u);
1192  unsigned v2val = *v2;
1193 
1194  /* loop pieces */
1195 #define IFLT4 \
1196  if (*v1 < v2val) { \
1197  le = 0; \
1198  v1 += 4; \
1199  while (*v1 < v2val) \
1200  v1 += 4; \
1201  v1 -= 2; \
1202  if (*v1 < v2val) \
1203  v1++; \
1204  else \
1205  v1--; \
1206  if (*v1 < v2val) \
1207  v1++; \
1208  if (v1 == v1end) \
1209  break; \
1210  }
1211 #define IFLT8 \
1212  if (*v1 < v2val) { \
1213  le = 0; \
1214  v1 += 8; \
1215  while (*v1 < v2val) \
1216  v1 += 8; \
1217  v1 -= 4; \
1218  if (*v1 < v2val) \
1219  v1 += 2; \
1220  else \
1221  v1 -= 2; \
1222  if (*v1 < v2val) \
1223  v1++; \
1224  else \
1225  v1--; \
1226  if (*v1 < v2val) \
1227  v1++; \
1228  if (v1 == v1end) \
1229  break; \
1230  }
1231 #define IFGE \
1232  if (*v1 == v2val) { \
1233  v1++; \
1234  v2++; \
1235  if (v1 == v1end) \
1236  break; \
1237  if (v2 == v2end) \
1238  break; \
1239  v2val = *v2; \
1240  } \
1241  else { \
1242  ge = 0; \
1243  v2++; \
1244  if (v2 == v2end) \
1245  break; \
1246  v2val = *v2; \
1247  }
1248 
1249  /* choose the right stepper */
1250  if (c1 >= 16 * c2) {
1251  while (1) {
1252  IFLT8;
1253  IFGE;
1254  }
1255  }
1256  else {
1257  while (1) {
1258  IFLT4;
1259  IFGE;
1260  }
1261  }
1262 
1263  /* return */
1264  if (v1 < v1end)
1265  le = 0;
1266  if (v2 < v2end)
1267  ge = 0;
1268  if (le && ge) {
1269  rc = 0;
1270 if (_rpmset_debug)
1271 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1272  return rc;
1273  }
1274  if (ge) {
1275  rc = 1;
1276 if (_rpmset_debug)
1277 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1278  return rc;
1279  }
1280  if (le) {
1281  rc = -1;
1282 if (_rpmset_debug)
1283 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1284  return rc;
1285  }
1286  rc = -2;
1287 if (_rpmset_debug)
1288 fprintf(stderr, "<-- %s(%s,%s) rc %d\n", __FUNCTION__, str1, str2, rc);
1289  return rc;
1290 }
1291 
1292 /*==============================================================*/
1293 
1294 static void rpmsetFini(void * _set)
1295  /*@globals fileSystem @*/
1296  /*@modifies *_set, fileSystem @*/
1297 {
1298  rpmset set = _set;
1299 
1300  if (set) {
1301  int i;
1302  for (i = 0; i < set->c; i++)
1303  set->sv[i].s = _free(set->sv[i].s);
1304  set->sv = _free(set->sv);
1305  }
1306 }
1307 
1308 /*@unchecked@*/ /*@only@*/ /*@null@*/
1310 
1312  /*@globals _rpmsetPool, fileSystem @*/
1313  /*@modifies pool, _rpmsetPool, fileSystem @*/
1314 {
1315  rpmset set;
1316 
1317  if (_rpmsetPool == NULL) {
1318  _rpmsetPool = rpmioNewPool("set", sizeof(*set), -1, _rpmset_debug,
1319  NULL, NULL, rpmsetFini);
1320  pool = _rpmsetPool;
1321  }
1322  set = (rpmset) rpmioGetPool(pool, sizeof(*set));
1323  memset(((char *)set)+sizeof(set->_item), 0, sizeof(*set)-sizeof(set->_item));
1324  return set;
1325 }
1326 
1327 rpmset rpmsetNew(const char * fn, int flags)
1328 {
1329  rpmset set = rpmsetGetPool(_rpmsetPool);
1330 
1331  return rpmsetLink(set);
1332 }
1333 
1334 void rpmsetAdd(rpmset set, const char * sym)
1335 {
1336  const int delta = 1024;
1337  if ((set->c & (delta - 1)) == 0)
1338  set->sv = xrealloc(set->sv, sizeof(*set->sv) * (set->c + delta));
1339  set->sv[set->c].s = xstrdup(sym);
1340  set->sv[set->c].v = 0;
1341  set->c++;
1342 }
1343 
1344  /* Jenkins' one-at-a-time hash */
1345  static
1346  unsigned int hash(const char *str)
1347  {
1348  unsigned int hash = 0x9e3779b9;
1349  const unsigned char *p = (const unsigned char *) str;
1350  while (*p) {
1351  hash += *p++;
1352  hash += (hash << 10);
1353  hash ^= (hash >> 6);
1354  }
1355  hash += (hash << 3);
1356  hash ^= (hash >> 11);
1357  hash += (hash << 15);
1358  return hash;
1359  }
1360 
1361  /* sort by hash value */
1362  static
1363  int cmp(const void *arg1, const void *arg2)
1364  {
1365  struct sv *sv1 = (struct sv *) arg1;
1366  struct sv *sv2 = (struct sv *) arg2;
1367  if (sv1->v > sv2->v)
1368  return 1;
1369  if (sv2->v > sv1->v)
1370  return -1;
1371  return 0;
1372  }
1373 
1374  static
1375  int uniqv(int c, unsigned *v)
1376  {
1377  int i, j;
1378  for (i = 0, j = 0; i < c; i++) {
1379  while (i + 1 < c && v[i] == v[i+1])
1380  i++;
1381  v[j++] = v[i];
1382  }
1383  return j;
1384  }
1385 
1386 /* This routine does the whole job. */
1387 const char * rpmsetFinish(rpmset set, int bpp)
1388 {
1389  char * t = NULL;
1390 
1391  if (set->c < 1 || bpp < 10 || bpp > 32) {
1392 if (_rpmset_debug)
1393 fprintf(stderr, "<-- %s(%p,%d) rc %s\n", __FUNCTION__, set, bpp, t);
1394  }
1395 
1396  unsigned mask = (bpp < 32) ? (1u << bpp) - 1 : ~0u;
1397 
1398  /* hash sv strings */
1399  int i;
1400  for (i = 0; i < set->c; i++)
1401  set->sv[i].v = hash(set->sv[i].s) & mask;
1402 
1403  qsort(set->sv, set->c, sizeof *set->sv, cmp);
1404 
1405  /* warn on hash collisions */
1406  for (i = 0; i < set->c - 1; i++) {
1407  if (set->sv[i].v != set->sv[i+1].v)
1408  continue;
1409  if (strcmp(set->sv[i].s, set->sv[i+1].s) == 0)
1410  continue;
1411  fprintf(stderr, "warning: hash collision: %s %s\n",
1412  set->sv[i].s, set->sv[i+1].s);
1413  }
1414 
1415  /* encode */
1416  unsigned v[set->c];
1417  for (i = 0; i < set->c; i++)
1418  v[i] = set->sv[i].v;
1419  int c = uniqv(set->c, v);
1420  char base62[encode_set_size(c, bpp)];
1421  int len = encode_set(c, v, bpp, base62);
1422  if (len >= 0)
1423  t = xstrdup(base62);
1424 
1425 if (_rpmset_debug)
1426 fprintf(stderr, "<-- %s(%p,%d) rc %s\n", __FUNCTION__, set, bpp, t);
1427  return t;
1428 }
1429 
1430 #ifdef SELF_TEST
1431 static
1432 void test_api(void)
1433 {
1434  rpmset set1 = rpmsetNew(NULL, 0);
1435  rpmsetAdd(set1, "mama");
1436  rpmsetAdd(set1, "myla");
1437  rpmsetAdd(set1, "ramu");
1438  const char *str10 = rpmsetFinish(set1, 16);
1439  fprintf(stderr, "set10=%s\n", str10);
1440 
1441  int cmp;
1442  rpmset set2 = rpmsetNew(NULL, 0);
1443  rpmsetAdd(set2, "myla");
1444  rpmsetAdd(set2, "mama");
1445  const char *str20 = rpmsetFinish(set2, 16);
1446  fprintf(stderr, "set20=%s\n", str20);
1447  cmp = rpmsetCmp(str10, str20);
1448  assert(cmp == 1);
1449 
1450  rpmsetAdd(set2, "ramu");
1451  const char *str21 = rpmsetFinish(set2, 16);
1452  fprintf(stderr, "set21=%s\n", str21);
1453  cmp = rpmsetCmp(str10, str21);
1454  assert(cmp == 0);
1455 
1456  rpmsetAdd(set2, "baba");
1457  const char *str22 = rpmsetFinish(set2, 16);
1458  cmp = rpmsetCmp(str10, str22);
1459  assert(cmp == -1);
1460 
1461  rpmsetAdd(set1, "deda");
1462  const char *str11 = rpmsetFinish(set1, 16);
1463  cmp = rpmsetCmp(str11, str22);
1464  assert(cmp == -2);
1465 
1466  set1 = rpmsetFree(set1);
1467  set2 = rpmsetFree(set2);
1468  str10 = _free(str10);
1469  str11 = _free(str11);
1470  str20 = _free(str20);
1471  str21 = _free(str21);
1472  str22 = _free(str22);
1473 
1474  fprintf(stderr, "%s: api test OK\n", __FILE__);
1475 }
1476 #endif
1477 
1478 #ifdef SELF_TEST
1479 /*==============================================================*/
1480 static struct poptOption rpmsetOptionsTable[] = {
1481  { "debug", 'd', POPT_ARG_VAL, &_rpmset_debug, -1, NULL, NULL },
1482 
1483  { NULL, '\0', POPT_ARG_INCLUDE_TABLE, rpmioAllPoptTable, 0,
1484  N_("Common options for all rpmio executables:"), NULL },
1485 
1486  POPT_AUTOALIAS
1487  POPT_AUTOHELP
1488  POPT_TABLEEND
1489 };
1490 
1491 int main(int argc, char *argv[])
1492 {
1493  poptContext con = rpmioInit(argc, argv, rpmsetOptionsTable);
1494  int rc = 0;
1495 
1496  test_base62();
1497  test_golomb();
1498  test_word_table();
1499  test_base62_golomb();
1500  test_delta();
1501  test_set();
1502  test_api();
1503 
1504  con = rpmioFini(con);
1505  return rc;
1506 }
1507 #endif
#define AZ10(c, b)
static int encode_set(int c, unsigned *v, int bpp, char *base62)
Definition: set.c:849
poptContext rpmioInit(int argc, char *const argv[], struct poptOption *optionsTable)
Initialize most everything needed by an rpmio executable context.
Definition: poptIO.c:767
rpmioPool _rpmsetPool
Definition: set.c:1309
#define AZ26(c, b)
static int decode_base62_size(int len)
Definition: set.c:119
const char const char size_t len
Definition: bson.h:823
#define AA10x25(c1, c2, b1, b2)
static void rpmsetFini(void *_set)
Definition: set.c:1294
#define IFLT8
static int encode_base62(int bitc, const char *bitv, char *base62)
Definition: set.c:67
char * xstrdup(const char *str)
Definition: rpmmalloc.c:321
#define AZ25(c, b)
static int decode_set_size(int len, int Mshift)
Definition: set.c:901
static void decode_delta(int c, unsigned *v)
Definition: set.c:788
#define Put24R(Get)
int main(int argc, const char **argv, char **envp)
Definition: rpmqv.c:453
static int encode_golomb_size(int c, int Mshift)
Definition: set.c:291
int rpmsetCmp(const char *str1, const char *str2)
Definition: set.c:1113
Definition: set.c:435
static int cache_decode_set(const char *str, int Mshift, const unsigned **pv)
Definition: set.c:938
#define C26(c, b)
static int decode_base62(const char *base62, char *bitv)
Definition: set.c:161
static int encode_set_size(int c, int bpp)
Definition: set.c:840
#define A010(c, b)
#define CACHE_SIZE
struct set * rpmset
Definition: set.h:6
static int encode_base62_size(int bitc)
Definition: set.c:43
#define Put12Q(Get)
#define Put24Q(Get)
const char * str
Definition: bson.h:593
Definition: set.c:436
void rpmsetAdd(rpmset set, const char *sym)
Add new symbol to set.
Definition: set.c:1334
const char const bson_bool_t v
Definition: bson.h:919
#define Esc2(X)
#define AA26x10(c1, c2, b1, b2)
int w
Definition: mongo.h:436
rpmioItem rpmioGetPool(rpmioPool pool, size_t size)
Get unused item from pool, or alloc a new item.
Definition: rpmmalloc.c:220
#define IFLT4
#define N_(Text)
Definition: system.h:531
#define AA26x26(c1, c2, b1, b2)
#define Put10Q(Get)
static void put6bits(char *bitv, int c)
Definition: set.c:141
rpmset rpmsetFree(rpmset set)
Destroy a set wrapper.
#define AA25x26(c1, c2, b1, b2)
#define Esc1(X)
static int encode_golomb(int c, const unsigned *v, int Mshift, char *bitv)
Definition: set.c:302
static int uniqv(int c, unsigned *v)
Definition: set.c:1375
static unsigned int hash(const char *str)
Definition: set.c:1346
rpmset rpmsetNew(const char *fn, int flags)
Create and load a set wrapper.
Definition: set.c:1327
static int decode_golomb_size(int bitc, int Mshift)
Definition: set.c:325
Definition: set.c:437
#define AA10x10(c1, c2, b1, b2)
#define A025(c, b)
#define C10(c, b)
#define Put6Q(Get)
#define Get12(X)
#define Put6R(Get)
#define ZA26(c, b)
#define AA26x25(c1, c2, b1, b2)
#define A026(c, b)
#define Put12R(Get)
const char const bson int mongo_write_concern int flags
Definition: mongo.h:485
#define Put10R(Get)
static const unsigned short word_to_num[65536]
Definition: set.c:453
Definition: set.c:439
static const int char_to_num[256]
Definition: set.c:127
const char const int i
Definition: bson.h:778
static void encode_delta(int c, unsigned *v)
Definition: set.c:776
static rpmset rpmsetGetPool(rpmioPool pool)
Definition: set.c:1311
static int log2i(int n)
Definition: set.c:263
static void set(char *t, NODE *ip)
Definition: rpmmtree.c:1408
Definition: set.c:438
static int cmp(const void *arg1, const void *arg2)
Definition: set.c:1363
rpmioPool rpmioNewPool(const char *name, size_t size, int limit, int flags, char *(*dbg)(void *item), void(*init)(void *item), void(*fini)(void *item))
Create a memory pool.
Definition: rpmmalloc.c:109
static void put4bits(char *bitv, int c)
Definition: set.c:151
#define GotNN(X)
#define ZA10(c, b)
static void * _free(const void *p)
Wrapper to free(3), hides const compilation noise, permit NULL, return NULL.
Definition: rpmiotypes.h:756
rpmset rpmsetLink(rpmset set)
Reference a set wrapper instance.
#define PIVOT_SIZE
static int downsample_set(int c, const unsigned *v, unsigned *w, int bpp)
Definition: set.c:1008
Definition: set.c:434
const char * rpmsetFinish(rpmset set, int bpp)
Make set-version.
Definition: set.c:1387
#define SENTINELS
static int decode_base62_golomb(const char *base62, int Mshift, unsigned *v)
Definition: set.c:534
#define OX256(c)
static int decode_set_init(const char *str, int *pbpp, int *pMshift)
Definition: set.c:880
#define ZA25(c, b)
struct poptOption rpmioAllPoptTable[]
Popt option table for options shared by all modes and executables.
Definition: poptIO.c:564
poptContext rpmioFini(poptContext optCon)
Destroy most everything needed by an rpm CLI executable context.
Definition: poptIO.c:734
#define IFGE
#define Get24(X)
#define AA25x10(c1, c2, b1, b2)
static int decode_golomb(int bitc, const char *bitv, int Mshift, unsigned *v)
Definition: set.c:336
int _rpmset_debug
Definition: set.c:24
#define xrealloc
Definition: system.h:35
#define AA25x25(c1, c2, b1, b2)
static int decode_set(const char *str, int Mshift, unsigned *v)
Definition: set.c:908
int j
Definition: mongo.h:438
__attribute__((visibility("hidden"))) int mayAddToFilesAwaitingFiletriggers(const char *rootDir
static void put_digit(char *base62, int c)
Definition: set.c:54
#define AA10x26(c1, c2, b1, b2)
static int encode_golomb_Mshift(int c, int bpp)
Definition: set.c:273