1 /* This code is part of the tng compression routines.
3 * Written by Daniel Spangberg
4 * Copyright (c) 2010, 2013, The GROMACS development team.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the Revised BSD License.
15 #include "../../include/compression/warnmalloc.h"
16 #include "../../include/compression/merge_sort.h"
17 #include "../../include/compression/huffman.h"
19 #define MAX_HUFFMAN_LEN 31
21 enum htree_type { htree_leaf, htree_node };
25 enum htree_type nodeleaf;
26 unsigned int idict; /* Index into input dictionary */
28 unsigned int bit; /* One or zero */
33 enum htree_type nodeleaf;
34 union htree_nodeleaf *n1;
35 union htree_nodeleaf *n2;
36 unsigned int bit; /* One or zero */
42 enum htree_type nodeleaf;
43 struct htree_node node;
44 struct htree_leaf leaf;
55 static int comp_htree(const void *leafptr1, const void *leafptr2, const void *private)
57 const union htree_nodeleaf *leaf1=(union htree_nodeleaf *)leafptr1;
58 const union htree_nodeleaf *leaf2=(union htree_nodeleaf *)leafptr2;
62 if (leaf1->leaf.prob<leaf2->leaf.prob)
64 else if (leaf1->leaf.prob>leaf2->leaf.prob)
69 static void assign_codes(union htree_nodeleaf *htree,
70 struct codelength *codelength,
76 printf("Assign codes called with code %d length %d\n",code,length);
78 if (htree->nodeleaf==htree_leaf)
80 codelength[htree->leaf.idict].length=length+1;
81 codelength[htree->leaf.idict].code=(code<<1)|htree->leaf.bit;
83 printf("I am a leaf: %d %d\n",
84 codelength[htree->leaf.idict].length,
85 codelength[htree->leaf.idict].code);
93 code|=htree->node.bit;
97 printf("I am a node length: %d\n",length);
98 printf("I am a node code: %d\n",code);
100 assign_codes(htree->node.n1,codelength,code,length,0);
101 assign_codes(htree->node.n2,codelength,code,length,0);
105 static void free_nodes(union htree_nodeleaf *htree, int top)
107 if (htree->nodeleaf==htree_leaf)
114 free_nodes(htree->node.n1,0);
115 free_nodes(htree->node.n2,0);
121 static void flush_8bits(unsigned int *combine, unsigned char **output, int *bitptr)
125 unsigned int mask=~(0xFFU<<((*bitptr)-8));
126 unsigned char out=(unsigned char)((*combine)>>((*bitptr)-8));
134 static void writebits(unsigned int value, int length, unsigned char **output, int *bitptr)
137 unsigned int combine=(unsigned int)**output;
139 mask=0xFFU<<(length-8);
141 mask=0xFFU>>(8-length);
144 /* Make room for the bits. */
147 combine|=(value&mask)>>(length-8);
148 flush_8bits(&combine,output,bitptr);
154 /* Make room for the bits. */
158 flush_8bits(&combine,output,bitptr);
160 **output=(unsigned char)combine;
163 static unsigned int readbits(int length, unsigned char **input, int *bitptr)
166 unsigned int extract_mask=0x80U>>*bitptr;
167 unsigned char thisval=**input;
171 val|=((extract_mask & thisval)!=0);
185 static int comp_codes(const void *codeptr1, const void *codeptr2, const void *private)
187 const struct codelength *code1=(struct codelength *)codeptr1;
188 const struct codelength *code2=(struct codelength *)codeptr2;
189 int rval=0; /* It shouldn't be possible to get equal here, though. */
191 if (code1->length>code2->length)
193 else if (code1->length<code2->length)
195 else if (code1->dict>code2->dict)
202 static int comp_codes_value(const void *codeptr1, const void *codeptr2, const void *private)
204 const struct codelength *code1=(struct codelength *)codeptr1;
205 const struct codelength *code2=(struct codelength *)codeptr2;
207 int rval=0; /* It shouldn't be possible to get equal here, though. */
209 if (code1->dict>code2->dict)
216 /* The huffman_dict array should be 131077 (0x20005) long. The
217 huffman_dict_unpacked array should be 131077 long (note five longer than
219 void Ptngc_comp_conv_to_huffman(unsigned int *vals, const int nvals,
220 unsigned int *dict, const int ndict,
222 unsigned char *huffman,
224 unsigned char *huffman_dict,
225 int *huffman_dictlen,
226 unsigned int *huffman_dict_unpacked,
227 int *huffman_dict_unpackedlen)
231 union htree_nodeleaf *htree;
232 struct codelength *codelength;
234 unsigned char *huffman_ptr;
239 /* Create array of leafs (will be array of nodes/trees during
241 htree=warnmalloc(ndict*sizeof *htree);
242 codelength=warnmalloc(ndict*sizeof *codelength);
245 for (i=0; i<ndict; i++)
247 htree[i].nodeleaf=htree_leaf;
248 htree[i].leaf.idict=i;
249 htree[i].leaf.prob=prob[i];
251 /* Sort the leafs wrt probability. */
252 Ptngc_merge_sort(htree,ndict,sizeof *htree,comp_htree,NULL);
255 for (i=0; i<ndict; i++)
257 printf("%d %d\n",dict[htree[i].leaf.idict],htree[i].leaf.prob);
264 codelength[0].code=1;
265 codelength[0].length=1;
269 /* Nodes and leafs left. */
272 /* Take the two least probable symbols (which are at the end of the
273 array and combine them until there is nothing left. */
276 union htree_nodeleaf *n1=warnmalloc(sizeof *n1);
277 union htree_nodeleaf *n2=warnmalloc(sizeof *n2);
282 if (n1->nodeleaf==htree_leaf)
292 if (n2->nodeleaf==htree_leaf)
303 /* Create a new node */
304 htree[nleft-1].nodeleaf=htree_node;
305 htree[nleft-1].node.n1=n1;
306 htree[nleft-1].node.n2=n2;
308 htree[nleft-1].node.prob=new_prob;
309 /* Use insertion sort to place this in the correct place in the
311 /* Where should it be inserted? */
316 if (htree[new_place-1].nodeleaf==htree_node)
317 pc=htree[new_place-1].node.prob;
319 pc=htree[new_place-1].leaf.prob;
325 if (new_place!=nleft)
327 /* Shift array (overlapping regions!) */
328 union htree_nodeleaf nodecopy=htree[nleft-1];
329 memmove(htree+new_place+1,
331 (nleft-1-new_place)*sizeof *htree);
332 htree[new_place]=nodecopy;
336 /* Create codes from tree */
337 assign_codes(htree,codelength,0,0,1);
339 /* First put values into to the codelength array for sorting. */
340 for (i=0; i<ndict; i++)
342 codelength[i].dict=dict[i];
343 codelength[i].prob=prob[i];
345 /* Sort codes wrt length/value */
346 Ptngc_merge_sort(codelength,ndict,sizeof *codelength,comp_codes,NULL);
347 /* Canonicalize codes. */
349 for (i=0; i<ndict; i++)
351 codelength[i].code=code;
353 code=(code+1)<<(codelength[i+1].length-codelength[i].length);
355 /* Free all nodes / leaves. */
357 /* Free array that held nodes/leaves. */
361 /* Check if there is a too long huffman code. */
362 for (i=0; i<ndict; i++)
363 if (codelength[i].length>MAX_HUFFMAN_LEN)
366 /* If the codes are too long alter the probabilities. */
369 for (i=0; i<ndict; i++)
376 /* Free codelength. We will compute a new one. */
383 for (i=0; i<ndict; i++)
385 printf("%d %d\t\t %d %d ",codelength[i].dict,codelength[i].prob,codelength[i].length,codelength[i].code);
387 unsigned int c=codelength[i].code;
389 unsigned int mask=1<<(codelength[i].length-1);
390 for (j=codelength[i].length-1; j>=0; j--)
405 /* Simply do compression by writing out the bits. */
406 for (i=0; i<nvals; i++)
409 for (r=0; r<ndict; r++)
410 if (codelength[r].dict==vals[i])
412 writebits(codelength[r].code,codelength[r].length,&huffman_ptr,&bitptr);
415 writebits(0,8-bitptr,&huffman_ptr,&bitptr);
416 *huffman_len=(int)(huffman_ptr-huffman);
417 /* Output dictionary. */
418 /* First the largest symbol value is written in 16 bits. No bits are
419 encoded for symbols larger than this. Then one bit signifies if
420 there is a used symbol: 1 If unused entry: 0 If used symbol the 5
421 following bits encode the length of the symbol. Worst case is
422 thus 6*65538 bits used for the dictionary. That won't happen
423 unless there's really that many values in use. If that is so,
424 well, either we compress well, or we have many values anyway. */
425 /* First sort the dictionary wrt symbol */
426 Ptngc_merge_sort(codelength,ndict,sizeof *codelength,comp_codes_value,NULL);
428 huffman_ptr=huffman_dict;
429 *huffman_ptr++=(unsigned char)(codelength[ndict-1].dict&0xFFU);
430 *huffman_ptr++=(unsigned char)((codelength[ndict-1].dict>>8)&0xFFU);
431 *huffman_ptr++=(unsigned char)((codelength[ndict-1].dict>>16)&0xFFU);
432 huffman_dict_unpacked[0]=(unsigned char)(codelength[ndict-1].dict&0xFFU);
433 huffman_dict_unpacked[1]=(unsigned char)((codelength[ndict-1].dict>>8)&0xFFU);
434 huffman_dict_unpacked[2]=(unsigned char)((codelength[ndict-1].dict>>16)&0xFFU);
435 for (i=0; i<=(int)codelength[ndict-1].dict; i++)
437 /* Do I have this value? */
440 for (j=0; j<ndict; j++)
441 if (codelength[j].dict==(unsigned int)i)
445 writebits(1,1,&huffman_ptr,&bitptr);
446 writebits(codelength[j].length,5,&huffman_ptr,&bitptr);
447 huffman_dict_unpacked[3+i]=codelength[j].length;
452 writebits(0,1,&huffman_ptr,&bitptr);
453 huffman_dict_unpacked[3+i]=0;
457 writebits(0,8-bitptr,&huffman_ptr,&bitptr);
458 *huffman_dictlen=(int)(huffman_ptr-huffman_dict);
459 *huffman_dict_unpackedlen=3+codelength[ndict-1].dict+1;
461 /* Free info about codes and length. */
465 void Ptngc_comp_conv_from_huffman(unsigned char *huffman,
466 unsigned int *vals, const int nvals,
468 unsigned char *huffman_dict,
469 const int huffman_dictlen,
470 unsigned int *huffman_dict_unpacked,
471 const int huffman_dict_unpackedlen)
473 struct codelength *codelength=warnmalloc(ndict*sizeof *codelength);
477 unsigned char *huffman_ptr;
479 (void)huffman_dictlen;
480 (void)huffman_dict_unpackedlen;
481 if (huffman_dict_unpacked)
483 maxdict=huffman_dict_unpacked[0]|(huffman_dict_unpacked[1]<<8)|(huffman_dict_unpacked[2]<<16);
485 for(i=0; i<=maxdict; i++)
487 if (huffman_dict_unpacked[3+i]!=0)
489 codelength[j].length=huffman_dict_unpacked[3+i];
490 codelength[j].dict=i;
493 codelength[j].length,
502 huffman_ptr=huffman_dict;
503 maxdict=((unsigned int)huffman_ptr[0])|(((unsigned int)huffman_ptr[1])<<8)|(((unsigned int)huffman_ptr[2])<<16);
507 for(i=0; i<=maxdict; i++)
509 int bit=readbits(1,&huffman_ptr,&bitptr);
512 codelength[j].length=readbits(5,&huffman_ptr,&bitptr);
513 codelength[j].dict=i;
516 codelength[j].length,
523 /* Sort codes wrt length/value. */
524 Ptngc_merge_sort(codelength,ndict,sizeof *codelength,comp_codes,NULL);
525 /* Canonicalize codes. */
527 for (i=0; i<ndict; i++)
529 codelength[i].code=code;
531 code=(code+1)<<(codelength[i+1].length-codelength[i].length);
535 for (i=0; i<ndict; i++)
537 printf("%d -\t\t %d %d ",codelength[i].dict,codelength[i].length,codelength[i].code);
539 unsigned int c=codelength[i].code;
541 unsigned int mask=1<<(codelength[i].length-1);
542 for (j=codelength[i].length-1; j>=0; j--)
556 /* Decompress data. */
559 for (i=0; i<nvals; i++)
562 int len=codelength[0].length;
563 symbol=readbits(len,&huffman_ptr,&bitptr);
565 while (symbol!=codelength[j].code)
569 newlen=codelength[j].length;
572 symbol<<=(newlen-len);
573 symbol|=readbits(newlen-len,&huffman_ptr,&bitptr);
577 vals[i]=codelength[j].dict;
579 /* Free info about codes and length. */