AlcapDAQ  1
huffman.cpp
Go to the documentation of this file.
1 //
3 // Huffman coding.
4 //
6 
7 #include <stdio.h>
8 #include <stdlib.h>
9 
10 #include "midas.h"
11 
12 #include "mucap_compress.h"
13 
14 
15 typedef struct huffman_node {
16  // tree left/right pointers
19 
20  // linked list pointer
21  struct huffman_node *next;
22 
23  int symbol;
24  int frequency;
25 } huffman_node;
26 
27 void
29 {
30  // Huffman encode a single symbol and send it to an output buffer.
31  // This function is in an inner loop and therefore
32  // performance-critical.
33 
34  // Update the frequency count for this symbol.
35  table->frequency[symbol]++;
36 
37  // Get the code corresponding to this symbol.
38  unsigned int code_bits = table->code_bits[symbol];
39  int code_length = table->code_length[symbol];
40 
41  // Put it in the buffer
42  io_buffer_put(buffer, code_bits, code_length);
43 
44  buffer->num_codes++;
45 }
46 
47 #if 0
48 int huffman_get_symbol(huffman_table * table, io_buffer * buffer)
49 {
50  // Get the next symbol from a Huffman-coded buffer. Again, this
51  // function is in an inner loop and therefore performance-critical.
52  // However, the current bit-by-bit implementation is for testing only
53  // and is likely to be extremely slow.
54 
55  if (buffer->num_codes <= 0) {
56  return -1;
57  }
58  buffer->num_codes--;
59 
60  huffman_node *node = table->tree;
61 
62  while (1) {
63  // We may already be there...
64  if (node->symbol != -1) {
65  return node->symbol;
66  } else {
67  // If not, consume a bit from the input and follow the tree
68  if (buffer->w_bits == 0) {
69  buffer->w = *((buffer->p)++);
70  buffer->w_bits = 32;
71  }
72 
73  unsigned int bit = buffer->w & (1u << 31);
74  buffer->w <<= 1;
75  buffer->w_bits--;
76 
77  if (bit == 0) {
78  node = node->branch0;
79  } else {
80  node = node->branch1;
81  }
82  }
83  }
84 }
85 #endif
86 
88 {
89  // Get the next symbol from a Huffman-coded buffer. Again, this
90  // function is in an inner loop and therefore performance-critical.
91 
92  if (buffer->num_codes <= 0) {
93  return -1;
94  }
95  buffer->num_codes--;
96 
97  // If we have at least 8 bits pre-read from the buffer, then check the
98  // table of pre-computed short symbols.
99  if(buffer->w_bits >= 8) {
100  unsigned int byte = (buffer->w >> 24) & 0xff;
101  int symbol = table->decode_symbols[byte];
102  if(symbol != -1) {
103  int code_length = table->code_length[symbol];
104  buffer->w <<= code_length;
105  buffer->w_bits -= code_length;
106  return symbol;
107  }
108  }
109 
110  // Otherwise, do it the slow way, walking bit-by-bit through the tree
111  huffman_node *node = table->tree;
112 
113  while (1) {
114  // We may already be there...
115  if (node->symbol != -1) {
116  return node->symbol;
117  } else {
118  // If not, consume a bit from the input and follow the tree
119  if (buffer->w_bits == 0) {
120  buffer->w = *((buffer->p)++);
121  buffer->w_bits = 32;
122  }
123 
124  unsigned int bit = buffer->w & (1u << 31);
125  buffer->w <<= 1;
126  buffer->w_bits--;
127 
128  if (bit == 0) {
129  node = node->branch0;
130  } else {
131  node = node->branch1;
132  }
133  }
134  }
135 }
136 
138 {
139  table->decode_symbols = new int[256];
140 
141  for(int i = 0; i < 256; i++) {
142  int w = i << 24;
143  int w_bits = 8;
144 
145  huffman_node *node = table->tree;
146 
147  while (1) {
148  // We may already be there...
149  if (node == NULL) {
150  table->decode_symbols[i] = -1;
151  break;
152  } else if (node->symbol != -1) {
153  table->decode_symbols[i] = node->symbol;
154  break;
155  } else {
156  // If not, consume a bit from the input and follow the tree
157  if (w_bits == 0) {
158  table->decode_symbols[i] = -1;
159  break;
160  }
161 
162  unsigned int bit = w & (1u << 31);
163  w <<= 1;
164  w_bits--;
165 
166  if (bit == 0) {
167  node = node->branch0;
168  } else {
169  node = node->branch1;
170  }
171  }
172  }
173  }
174 }
175 
176 int huffman_node_compare(const void *p1, const void *p2)
177 {
178  huffman_node **node1 = (huffman_node **) p1;
179  huffman_node **node2 = (huffman_node **) p2;
180 
181  return (*node1)->frequency - (*node2)->frequency;
182 }
183 
184 void
186  int length, unsigned int bits)
187 {
188  int symbol = node->symbol;
189 
190  if (symbol == -1) {
191  huffman_visit(table, node->branch0, length + 1, bits << 1);
192  huffman_visit(table, node->branch1, length + 1, (bits << 1) | 1);
193  } else {
194  table->code_bits[symbol] = bits;
195  table->code_length[symbol] = length;
196  }
197 }
198 
199 
201 {
202  // Start by allocating terminal nodes for the symbols.
203  huffman_node *terminals[table->num_symbols];
204  for (int i = 0; i < table->num_symbols; i++) {
205  huffman_node *node = new huffman_node;
206  node->branch0 = NULL;
207  node->branch1 = NULL;
208  node->next = NULL;
209  node->symbol = i;
210  node->frequency = table->frequency[i];
211  terminals[i] = node;
212  }
213 
214  // Now sort the array of terminals by frequency.
215  qsort(terminals, table->num_symbols,
217 
218  // Set up the linked list pointers
219  for (int i = 0; i < table->num_symbols - 1; i++) {
220  terminals[i]->next = terminals[i + 1];
221  }
222  huffman_node *root = terminals[0];
223 
224  // Now repeat: draw the two least probable nodes
225  // and combine them, sorting the resulting combination
226  // into the correct position in the list.
227  while (root->next != NULL) {
228  huffman_node *node0 = root;
229  huffman_node *node1 = node0->next;
230  root = node1->next;
231 
232  huffman_node *combined = new huffman_node;
233  combined->branch0 = node0;
234  combined->branch1 = node1;
235  combined->next = NULL;
236  combined->symbol = -1;
237  combined->frequency = node0->frequency + node1->frequency;
238 
239  if (root == NULL) {
240  root = combined;
241  } else {
242  // Search through the list to find the right position for the
243  // new
244  // node, and put it there.
245  huffman_node *pos = root;
246  huffman_node *prev = NULL;
247  while (pos != NULL) {
248  if (combined->frequency <= pos->frequency) {
249  combined->next = pos;
250  if (prev != NULL) {
251  prev->next = combined;
252  } else {
253  root = combined;
254  }
255  break;
256  }
257  prev = pos;
258  pos = pos->next;
259  }
260 
261  if (pos == NULL) {
262  combined->next = NULL;
263  prev->next = combined;
264  }
265 
266 #ifdef DEBUG
267  // Check that we didn't screw up
268  pos = root;
269  prev = NULL;
270  int node_count = 0;
271  while (pos != NULL) {
272  if (prev != NULL && pos->frequency < prev->frequency) {
273  cm_msg(MERROR, "huffman_optimize_tree", "List out of order");
274  return !SUCCESS;
275  }
276  prev = pos;
277  pos = pos->next;
278  node_count++;
279  }
280 #endif
281  }
282  }
283 
284  table->tree = root;
285 
286  // Now we have the tree; read off the codes by recursively visiting
287  // each node.
288  huffman_visit(table, root, 0, 0);
289 
290  // Check that the number of bits used by each key is less than 32.
291  for (int i = 0; i < table->num_symbols; i++) {
292  if(table->code_length[i] > 32) {
293  cm_msg(MERROR, "huffman_optimize_tree",
294  "Code length for symbol %d is too long (%d bits)", i,
295  table->code_length[i]);
296  return !SUCCESS;
297  }
298  }
299 
301 
302  return SUCCESS;
303 }
304 
305 void huffman_init_default(huffman_table * table, int num_symbols)
306 {
307  table->num_symbols = num_symbols;
308  table->frequency = new int[num_symbols];
309  table->code_bits = new unsigned int[num_symbols];
310  table->code_length = new int[num_symbols];
311  table->tree = NULL;
312 
313  // Assume that all symbols have equal probability
314  for (int i = 0; i < num_symbols; i++) {
315  table->frequency[i] = 1;
316  }
317 
318  huffman_optimize_tree(table);
319 }
320 
321 BOOL save_huffman(char *key_dir, huffman_table * table)
322 {
323  char key_name[MAX_ODB_PATH];
324  int size;
325 
326  sprintf(key_name, "%s/code_bits", key_dir);
327  size = table->num_symbols * sizeof(DWORD);
328  int stat = db_set_value(hDB, 0, key_name, table->code_bits, size,
329  table->num_symbols, TID_DWORD);
330  if(stat != DB_SUCCESS) {
331  cm_msg(MERROR, "save_huffman", "Failed to save ODB key %s", key_name);
332  return !SUCCESS;
333  }
334 
335  sprintf(key_name, "%s/code_length", key_dir);
336  stat = db_set_value(hDB, 0, key_name, table->code_length, size,
337  table->num_symbols, TID_DWORD);
338  if(stat != DB_SUCCESS) {
339  cm_msg(MERROR, "save_huffman", "Failed to save ODB key %s", key_name);
340  return !SUCCESS;
341  }
342 
343  sprintf(key_name, "%s/frequency", key_dir);
344  stat = db_set_value(hDB, 0, key_name, table->frequency, size,
345  table->num_symbols, TID_DWORD);
346  if(stat != DB_SUCCESS) {
347  cm_msg(MERROR, "save_huffman", "Failed to save ODB key %s", key_name);
348  return !SUCCESS;
349  }
350 
351  return SUCCESS;
352 }
353 
355 {
356  node->branch0 = NULL;
357  node->branch1 = NULL;
358  node->next = NULL;
359  node->symbol = -1;
360  node->frequency = 0;
361 }
362 
364 {
365  if(node->branch0 != NULL) {
367  }
368 
369  if(node->branch1 != NULL) {
371  }
372 
373  delete node;
374 }
375 
377 {
378  // build the Huffman tree corresponding to the bit strings in code_bits
379  // and code_length
380 
381  // initialize root of tree
382  huffman_node *root = new huffman_node;
383  table->tree = root;
384  init_huffman_node(root);
385 
386  for(int i = 0; i < table->num_symbols; i++) {
387  DWORD code_bits = table->code_bits[i];
388  int code_length = table->code_length[i];
389  code_bits <<= 32-code_length;
390 
391  huffman_node *node = root;
392 
393  for(int bit_number = 0; bit_number < code_length; bit_number++) {
394 
395  BOOL bit = ((code_bits & (1u << 31)) != 0);
396  code_bits <<= 1;
397 
398  if(bit == 0) {
399  if(node->branch0 == NULL) {
400  node->branch0 = new huffman_node;
401  init_huffman_node(node->branch0);
402  }
403  node = node->branch0;
404 
405  if(bit_number == code_length - 1) {
406  node->symbol = i;
407  node->frequency = table->frequency[i];
408  }
409  } else {
410  if(node->branch1 == NULL) {
411  node->branch1 = new huffman_node;
412  init_huffman_node(node->branch1);
413  }
414  node = node->branch1;
415 
416  if(bit_number == code_length - 1) {
417  node->symbol = i;
418  node->frequency = table->frequency[i];
419  }
420  }
421  }
422  }
423 
425 
426  return SUCCESS;
427 }
428 
429 BOOL load_huffman(char *key_dir, huffman_table * table)
430 {
431  char key_name[MAX_ODB_PATH];
432  int size;
433 
434  for(int i = 0; i < table->num_symbols; i++) {
435  table->frequency[i] = 1;
436  }
437 
438  sprintf(key_name, "%s/code_bits", key_dir);
439  size = table->num_symbols * sizeof(DWORD);
440  int stat = db_get_value(hDB, 0, key_name, table->code_bits, &size,
441  TID_DWORD, FALSE);
442  if(stat != DB_SUCCESS || size != table->num_symbols * sizeof(DWORD)) {
443  cm_msg(MERROR, "load_huffman", "Failed to load ODB key %s", key_name);
444  return !SUCCESS;
445  }
446 
447  sprintf(key_name, "%s/code_length", key_dir);
448  size = table->num_symbols * sizeof(DWORD);
449  db_get_value(hDB, 0, key_name, table->code_length, &size, TID_DWORD, FALSE);
450  if(stat != DB_SUCCESS || size != table->num_symbols * sizeof(DWORD)) {
451  cm_msg(MERROR, "load_huffman", "Failed to load ODB key %s", key_name);
452  return !SUCCESS;
453  }
454 
455  huffman_build_tree(table);
456 
457  return SUCCESS;
458 }