TNG version 1.7.3
[alexxy/gromacs.git] / src / external / tng_io / src / compression / xtc3.c
1 /* This code is part of the tng compression routines.
2  *
3  * Written by Daniel Spangberg and Magnus Lundborg
4  * Copyright (c) 2010, 2013-2014 The GROMACS development team.
5  *
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the Revised BSD License.
9  */
10
11 /* This code is heavily influenced by
12    http://hpcv100.rc.rug.nl/xdrf.html
13    Based on coordinate compression (c) by Frans van Hoesel.
14    and GROMACS xtc files (http://www.gromacs.org)
15    (c) Copyright (c) Erik Lindahl, David van der Spoel
16 */
17
18 /* The cost estimates are ripped right out of xtc2.c, so take these
19    with a grain (truckload) of salt. */
20
21 #include <limits.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <math.h>
26 #include "../../include/compression/warnmalloc.h"
27 #include "../../include/compression/widemuldiv.h"
28 #include "../../include/compression/bwlzh.h"
29
30 static const double iflipgaincheck=0.89089871814033927; /*  1./(2**(1./6)) */
31
32 #define MAX_LARGE_RLE 1024 /* Maximum number of large atoms for large RLE. */
33 #define MAX_SMALL_RLE 12 /* Maximum number of small atoms in one group. */
34
35 #define TRESHOLD_INTRA_INTER_DIRECT 1.5 /* How much larger can the direct
36                                            frame deltas for the small
37                                            triplets be and be accepted anyway
38                                            as better than the intra/inter frame
39                                            deltas. For better instructions/RLEs. */
40
41 #define TRESHOLD_INTER_INTRA 5.0 /* How much larger can the intra
42                                     frame deltas for the small
43                                     triplets be and be accepted anyway
44                                     as better than the inter frame
45                                     deltas. */
46
47 /* Difference in indices used for determining whether to store as
48    large or small. A fun detail in this compression algorithm is that
49    if everything works fine, large can often be smaller than small, or
50    at least not as large as is large in magic.c. This is a key idea of
51    xtc3. */
52 #define QUITE_LARGE 3
53 #define IS_LARGE 6
54
55 #if 0
56 #define SHOWIT
57 #endif
58
59 #if 0
60 #define SHOWIT_LIGHT
61 #endif
62
63 #ifdef USE_WINDOWS
64 #define TNG_INLINE __inline
65 #else
66 #define TNG_INLINE inline
67 #endif
68
69 /* These routines are in xtc2.c */
70 int Ptngc_magic(unsigned int i);
71 int Ptngc_find_magic_index(const unsigned int maxval);
72
73 static TNG_INLINE unsigned int positive_int(const int item)
74 {
75   int s=0;
76   if (item>0)
77     s=1+(item-1)*2;
78   else if (item<0)
79     s=2+(-item-1)*2;
80   return s;
81 }
82
83 static TNG_INLINE int unpositive_int(const int val)
84 {
85   int s=(val+1)/2;
86   if ((val%2)==0)
87     s=-s;
88   return s;
89 }
90
91
92 /* Sequence instructions */
93 #define INSTR_DEFAULT 0U
94 #define INSTR_SMALL_RUNLENGTH 1U
95 #define INSTR_ONLY_LARGE 2U
96 #define INSTR_ONLY_SMALL 3U
97 #define INSTR_FLIP 4U
98 #define INSTR_LARGE_RLE 5U
99 #define INSTR_LARGE_DIRECT 6U
100 #define INSTR_LARGE_INTRA_DELTA 7U
101 #define INSTR_LARGE_INTER_DELTA 8U
102
103 #define MAXINSTR 9
104
105 struct xtc3_context
106 {
107   unsigned int *instructions;
108   int ninstr, ninstr_alloc;
109   unsigned int *rle;
110   int nrle, nrle_alloc;
111   unsigned int *large_direct;
112   int nlargedir, nlargedir_alloc;
113   unsigned int *large_intra_delta;
114   int nlargeintra, nlargeintra_alloc;
115   unsigned int *large_inter_delta;
116   int nlargeinter, nlargeinter_alloc;
117   unsigned int *smallintra;
118   int nsmallintra, nsmallintra_alloc;
119   int minint[3],maxint[3];
120   int has_large;
121   int has_large_ints[MAX_LARGE_RLE*3]; /* Large cache. */
122   int has_large_type[MAX_LARGE_RLE]; /* What kind of type this large
123                                         int is. */
124   int current_large_type;
125 };
126
127 static void init_xtc3_context(struct xtc3_context *xtc3_context)
128 {
129   xtc3_context->instructions=NULL;
130   xtc3_context->ninstr=0;
131   xtc3_context->ninstr_alloc=0;
132   xtc3_context->rle=NULL;
133   xtc3_context->nrle=0;
134   xtc3_context->nrle_alloc=0;
135   xtc3_context->large_direct=NULL;
136   xtc3_context->nlargedir=0;
137   xtc3_context->nlargedir_alloc=0;
138   xtc3_context->large_intra_delta=NULL;
139   xtc3_context->nlargeintra=0;
140   xtc3_context->nlargeintra_alloc=0;
141   xtc3_context->large_inter_delta=NULL;
142   xtc3_context->nlargeinter=0;
143   xtc3_context->nlargeinter_alloc=0;
144   xtc3_context->smallintra=NULL;
145   xtc3_context->nsmallintra=0;
146   xtc3_context->nsmallintra_alloc=0;
147   xtc3_context->has_large=0;
148   xtc3_context->current_large_type=0;
149 }
150
151 static void free_xtc3_context(struct xtc3_context *xtc3_context)
152 {
153   free(xtc3_context->instructions);
154   free(xtc3_context->rle);
155   free(xtc3_context->large_direct);
156   free(xtc3_context->large_intra_delta);
157   free(xtc3_context->large_inter_delta);
158   free(xtc3_context->smallintra);
159 }
160
161 /* Modifies three integer values for better compression of water */
162 static void swap_ints(const int *in, int *out)
163 {
164   out[0]=in[0]+in[1];
165   out[1]=-in[1];
166   out[2]=in[1]+in[2];
167 }
168
169 static void swap_is_better(const int *input, const int *minint, int *sum_normal, int *sum_swapped)
170 {
171   int normal_max=0;
172   int swapped_max=0;
173   int i,j;
174   int normal[3];
175   int swapped[3];
176   for (i=0; i<3; i++)
177     {
178       normal[0]=input[i]-minint[i];
179       normal[1]=input[3+i]-input[i]; /* minint[i]-minint[i] cancels out */
180       normal[2]=input[6+i]-input[3+i]; /* minint[i]-minint[i] cancels out */
181       swap_ints(normal,swapped);
182       for (j=1; j<3; j++)
183         {
184           if (positive_int(normal[j])>(unsigned int)normal_max)
185             normal_max=positive_int(normal[j]);
186           if (positive_int(swapped[j])>(unsigned int)swapped_max)
187             swapped_max=positive_int(swapped[j]);
188         }
189     }
190   if (normal_max==0)
191     normal_max=1;
192   if (swapped_max==0)
193     swapped_max=1;
194   *sum_normal=normal_max;
195   *sum_swapped=swapped_max;
196 }
197
198 static void allocate_enough_memory(unsigned int **ptr, int *nele, int *nele_alloc)
199 {
200   (*nele)++;
201   if (*nele>*nele_alloc)
202     {
203       *nele_alloc=*nele + *nele/2;
204       *ptr=warnrealloc(*ptr,*nele_alloc*sizeof **ptr);
205     }
206 }
207
208 static void insert_value_in_array(unsigned int **ptr, int *nele, int *nele_alloc,
209                                   const unsigned int value,
210                                   char *arrayname)
211 {
212 #ifndef SHOWIT
213   (void)arrayname;
214 #endif
215   allocate_enough_memory(ptr,nele,nele_alloc);
216 #ifdef SHOWIT
217   fprintf(stderr,"Inserting value %u into array %s @ %d\n",value,arrayname,(*nele)-1);
218 #endif
219   (*ptr)[(*nele)-1]=value;
220 }
221
222
223
224 static void swapdecide(struct xtc3_context *xtc3_context, int *input,int *swapatoms, int *large_index, int *minint)
225 {
226   int didswap=0;
227   int normal,swapped;
228   (void)large_index;
229   swap_is_better(input,minint,&normal,&swapped);
230   /* We have to determine if it is worth to change the behaviour.
231      If diff is positive it means that it is worth something to
232      swap. But it costs 4 bits to do the change. If we assume that
233      we gain 0.17 bit by the swap per value, and the runlength>2
234      for four molecules in a row, we gain something. So check if we
235      gain at least 0.17 bits to even attempt the swap.
236   */
237 #ifdef SHOWIT
238   fprintf(stderr,"Trying Flip: %g %g\n",(double)swapped/normal, (double)normal/swapped);
239 #endif
240   if (((swapped<normal) && (fabs((double)swapped/normal)<iflipgaincheck)) ||
241       ((normal<swapped) && (fabs((double)normal/swapped)<iflipgaincheck)))
242     {
243       if (swapped<normal)
244         {
245           if (!*swapatoms)
246             {
247               *swapatoms=1;
248               didswap=1;
249             }
250         }
251       else
252         {
253           if (*swapatoms)
254             {
255               *swapatoms=0;
256               didswap=1;
257             }
258         }
259     }
260   if (didswap)
261     {
262 #ifdef SHOWIT
263       fprintf(stderr,"Flip. Swapatoms is now %d\n",*swapatoms);
264 #endif
265       insert_value_in_array(&xtc3_context->instructions,
266                             &xtc3_context->ninstr,
267                             &xtc3_context->ninstr_alloc,
268                             INSTR_FLIP,"instr");
269     }
270 }
271
272 /* It is "large" if we have to increase the small index quite a
273    bit. Not so much to be rejected by the not very large check
274    later. */
275 static int is_quite_large(const int *input, const int small_index, const int max_large_index)
276 {
277   int is=0;
278   int i;
279   if (small_index+QUITE_LARGE>=max_large_index)
280     is=1;
281   else
282     {
283       for (i=0; i<3; i++)
284         if (positive_int(input[i])>(unsigned int)Ptngc_magic(small_index+QUITE_LARGE))
285           {
286             is=1;
287             break;
288           }
289     }
290   return is;
291 }
292
293 #ifdef SHOWIT
294 int nbits_sum;
295 int nvalues_sum;
296 #endif
297
298 static void insert_batch(const int *input_ptr, const int ntriplets_left, const int *prevcoord, int *encode_ints, const int startenc, int *nenc)
299 {
300   int nencode=startenc*3;
301   int tmp_prevcoord[3];
302
303   tmp_prevcoord[0]=prevcoord[0];
304   tmp_prevcoord[1]=prevcoord[1];
305   tmp_prevcoord[2]=prevcoord[2];
306
307   if (startenc)
308     {
309       int i;
310       for (i=0; i<startenc; i++)
311         {
312           tmp_prevcoord[0]+=encode_ints[i*3];
313           tmp_prevcoord[1]+=encode_ints[i*3+1];
314           tmp_prevcoord[2]+=encode_ints[i*3+2];
315 #ifdef SHOWIT
316           fprintf(stderr,"%6d: %6d %6d %6d\t\t%6d %6d %6d\t\t%6d %6d %6d\n",i*3,
317                   tmp_prevcoord[0],tmp_prevcoord[1],tmp_prevcoord[2],
318                   encode_ints[i*3],
319                   encode_ints[i*3+1],
320                   encode_ints[i*3+2],
321                   positive_int(encode_ints[i*3]),
322                   positive_int(encode_ints[i*3+1]),
323                   positive_int(encode_ints[i*3+2]));
324 #endif
325         }
326     }
327
328 #ifdef SHOWIT
329   fprintf(stderr,"New batch\n");
330 #endif
331   while ((nencode<3+MAX_SMALL_RLE*3) && (nencode<ntriplets_left*3))
332     {
333       encode_ints[nencode]=input_ptr[nencode]-tmp_prevcoord[0];
334       encode_ints[nencode+1]=input_ptr[nencode+1]-tmp_prevcoord[1];
335       encode_ints[nencode+2]=input_ptr[nencode+2]-tmp_prevcoord[2];
336 #ifdef SHOWIT
337       fprintf(stderr,"%6d: %6d %6d %6d\t\t%6d %6d %6d\t\t%6d %6d %6d\n",nencode,
338               input_ptr[nencode],
339               input_ptr[nencode+1],
340               input_ptr[nencode+2],
341               encode_ints[nencode],
342               encode_ints[nencode+1],
343               encode_ints[nencode+2],
344               positive_int(encode_ints[nencode]),
345               positive_int(encode_ints[nencode+1]),
346               positive_int(encode_ints[nencode+2]));
347 #endif
348       tmp_prevcoord[0]=input_ptr[nencode];
349       tmp_prevcoord[1]=input_ptr[nencode+1];
350       tmp_prevcoord[2]=input_ptr[nencode+2];
351       nencode+=3;
352     }
353   *nenc=nencode;
354 }
355
356 static void large_instruction_change(struct xtc3_context *xtc3_context, const int i)
357 {
358   /* If the first large is of a different kind than the currently used we must
359      emit an "instruction" to change the large type. */
360   if (xtc3_context->has_large_type[i]!=xtc3_context->current_large_type)
361     {
362       unsigned int instr;
363       xtc3_context->current_large_type=xtc3_context->has_large_type[i];
364       if (xtc3_context->current_large_type==0)
365         instr=INSTR_LARGE_DIRECT;
366       else if (xtc3_context->current_large_type==1)
367         instr=INSTR_LARGE_INTRA_DELTA;
368       else
369         instr=INSTR_LARGE_INTER_DELTA;
370       insert_value_in_array(&xtc3_context->instructions,
371                             &xtc3_context->ninstr,
372                             &xtc3_context->ninstr_alloc,
373                             instr,"instr");
374     }
375 }
376
377 static void write_three_large(struct xtc3_context *xtc3_context,
378                               const int i)
379 {
380   int m;
381   if (xtc3_context->current_large_type==0)
382     {
383       for (m=0; m<3; m++)
384         insert_value_in_array(&xtc3_context->large_direct,
385                               &xtc3_context->nlargedir,
386                               &xtc3_context->nlargedir_alloc,
387                               xtc3_context->has_large_ints[i*3+m],"large direct");
388     }
389   else if (xtc3_context->current_large_type==1)
390     {
391       for (m=0; m<3; m++)
392         insert_value_in_array(&xtc3_context->large_intra_delta,
393                               &xtc3_context->nlargeintra,
394                               &xtc3_context->nlargeintra_alloc,
395                               xtc3_context->has_large_ints[i*3+m],"large intra");
396     }
397   else
398     {
399       for (m=0; m<3; m++)
400         insert_value_in_array(&xtc3_context->large_inter_delta,
401                               &xtc3_context->nlargeinter,
402                               &xtc3_context->nlargeinter_alloc,
403                               xtc3_context->has_large_ints[i*3+m],"large inter");
404     }
405 }
406
407 static void flush_large(struct xtc3_context *xtc3_context,
408                         const int n) /* How many to flush. */
409 {
410   int i;
411   i=0;
412   while (i<n)
413     {
414       int j,k;
415       /* If the first large is of a different kind than the currently used we must
416          emit an "instruction" to change the large type. */
417       large_instruction_change(xtc3_context,i);
418       /* How many large of the same kind in a row? */
419       for (j=0;
420            (i+j<n) &&
421              (xtc3_context->has_large_type[i+j]==xtc3_context->has_large_type[i]);
422            j++);
423       if (j<3)
424         {
425           for (k=0; k<j; k++)
426             {
427               insert_value_in_array(&xtc3_context->instructions,
428                                     &xtc3_context->ninstr,
429                                     &xtc3_context->ninstr_alloc,
430                                     INSTR_ONLY_LARGE,"instr");
431               write_three_large(xtc3_context,i+k);
432             }
433         }
434       else
435         {
436           insert_value_in_array(&xtc3_context->instructions,
437                                 &xtc3_context->ninstr,
438                                 &xtc3_context->ninstr_alloc,
439                                 INSTR_LARGE_RLE,"instr");
440           insert_value_in_array(&xtc3_context->rle,
441                                 &xtc3_context->nrle,
442                                 &xtc3_context->nrle_alloc,
443                                 (unsigned int)j,"rle (large)");
444           for (k=0; k<j; k++)
445             write_three_large(xtc3_context,i+k);
446         }
447       i+=j;
448     }
449   if ((xtc3_context->has_large-n)!=0)
450     {
451       int j;
452       for (i=0; i<xtc3_context->has_large-n; i++)
453         {
454           xtc3_context->has_large_type[i]=xtc3_context->has_large_type[i+n];
455           for (j=0; j<3; j++)
456             xtc3_context->has_large_ints[i*3+j]=xtc3_context->has_large_ints[(i+n)*3+j];
457         }
458     }
459   xtc3_context->has_large-=n; /* Number of remaining large atoms in buffer */
460 }
461
462 static double compute_intlen(unsigned int *ints)
463 {
464   /* The largest value. */
465   unsigned int m=ints[0];
466   if (ints[1]>m)
467     m=ints[1];
468   if (ints[2]>m)
469     m=ints[2];
470   return (double)m;
471 }
472
473 static void buffer_large(struct xtc3_context *xtc3_context, int *input, const int inpdata,
474                          const int natoms, const int intradelta_ok)
475 {
476   unsigned int direct[3], intradelta[3]={0,}, interdelta[3]={0,};
477   double minlen;
478   int best_type;
479   int frame=inpdata/(natoms*3);
480   int atomframe=inpdata%(natoms*3);
481   /* If it is full we must write them all. */
482   if (xtc3_context->has_large==MAX_LARGE_RLE)
483     flush_large(xtc3_context,xtc3_context->has_large); /* Flush all. */
484   /* Find out which is the best choice for the large integer. Direct coding, or some
485      kind of delta coding? */
486   /* First create direct coding. */
487   direct[0]=(unsigned int)(input[inpdata]-xtc3_context->minint[0]);
488   direct[1]=(unsigned int)(input[inpdata+1]-xtc3_context->minint[1]);
489   direct[2]=(unsigned int)(input[inpdata+2]-xtc3_context->minint[2]);
490   minlen=compute_intlen(direct);
491   best_type=0; /* Direct. */
492 #if 1
493   /* Then try intra coding if we can. */
494   if ((intradelta_ok) && (atomframe>=3))
495     {
496       double thislen;
497       intradelta[0]=positive_int(input[inpdata]-input[inpdata-3]);
498       intradelta[1]=positive_int(input[inpdata+1]-input[inpdata-2]);
499       intradelta[2]=positive_int(input[inpdata+2]-input[inpdata-1]);
500       thislen=compute_intlen(intradelta);
501       if (thislen*TRESHOLD_INTRA_INTER_DIRECT<minlen)
502         {
503           minlen=thislen;
504           best_type=1; /* Intra delta */
505         }
506     }
507 #endif
508 #if 1
509   /* Then try inter coding if we can. */
510   if (frame>0)
511     {
512       double thislen;
513       interdelta[0]=positive_int(input[inpdata]-input[inpdata-natoms*3]);
514       interdelta[1]=positive_int(input[inpdata+1]-input[inpdata-natoms*3+1]);
515       interdelta[2]=positive_int(input[inpdata+2]-input[inpdata-natoms*3+2]);
516       thislen=compute_intlen(interdelta);
517       if (thislen*TRESHOLD_INTRA_INTER_DIRECT<minlen)
518         {
519           best_type=2; /* Inter delta */
520         }
521     }
522 #endif
523   xtc3_context->has_large_type[xtc3_context->has_large]=best_type;
524   if (best_type==0)
525     {
526       xtc3_context->has_large_ints[xtc3_context->has_large*3]=direct[0];
527       xtc3_context->has_large_ints[xtc3_context->has_large*3+1]=direct[1];
528       xtc3_context->has_large_ints[xtc3_context->has_large*3+2]=direct[2];
529     }
530   else if (best_type==1)
531     {
532       xtc3_context->has_large_ints[xtc3_context->has_large*3]=intradelta[0];
533       xtc3_context->has_large_ints[xtc3_context->has_large*3+1]=intradelta[1];
534       xtc3_context->has_large_ints[xtc3_context->has_large*3+2]=intradelta[2];
535     }
536   else if (best_type==2)
537     {
538       xtc3_context->has_large_ints[xtc3_context->has_large*3]=interdelta[0];
539       xtc3_context->has_large_ints[xtc3_context->has_large*3+1]=interdelta[1];
540       xtc3_context->has_large_ints[xtc3_context->has_large*3+2]=interdelta[2];
541     }
542   xtc3_context->has_large++;
543 }
544
545 static void output_int(unsigned char *output,int *outdata, const unsigned int n)
546 {
547   output[(*outdata)++]=((unsigned int)n)&0xFFU;
548   output[(*outdata)++]=(((unsigned int)n)>>8)&0xFFU;
549   output[(*outdata)++]=(((unsigned int)n)>>16)&0xFFU;
550   output[(*outdata)++]=(((unsigned int)n)>>24)&0xFFU;
551 }
552
553 #if 0
554 static void printarray(unsigned int *a, int n, char *name)
555 {
556   int i;
557   for (i=0; i<n; i++)
558     fprintf(stderr,"%u %s\n",a[i],name);
559 }
560 #endif
561
562 /* The base_compress routine first compresses all x coordinates, then
563    y and finally z. The bases used for each can be different. The
564    MAXBASEVALS value determines how many coordinates are compressed
565    into a single number. Only resulting whole bytes are dealt with for
566    simplicity. MAXMAXBASEVALS is the insanely large value to accept
567    files written with that value. BASEINTERVAL determines how often a
568    new base is actually computed and stored in the output
569    file. MAXBASEVALS*BASEINTERVAL values are stored using the same
570    base in BASEINTERVAL different integers. Note that the primarily
571    the decompression using a large MAXBASEVALS becomes very slow. */
572 #define MAXMAXBASEVALS 16384U
573 #define MAXBASEVALS 24U
574 #define BASEINTERVAL 8
575
576 /* How many bytes are needed to store n values in base base */
577 static int base_bytes(const unsigned int base, const int n)
578 {
579   int i,j;
580   unsigned int largeint[MAXMAXBASEVALS+1];
581   unsigned int largeint_tmp[MAXMAXBASEVALS+1];
582   int numbytes=0;
583
584   memset(largeint, 0U, sizeof(unsigned int) * (n+1));
585
586   for (i=0; i<n; i++)
587     {
588       if (i!=0)
589         {
590           Ptngc_largeint_mul(base,largeint,largeint_tmp,n+1);
591           memcpy(largeint, largeint_tmp, (n+1)*sizeof *largeint);
592         }
593       Ptngc_largeint_add(base-1U,largeint,n+1);
594     }
595   for (i=0; i<n; i++)
596     if (largeint[i])
597       for (j=0; j<4; j++)
598         if ((largeint[i]>>(j*8))&0xFFU)
599           numbytes=i*4+j+1;
600   return numbytes;
601 }
602
603 static void base_compress(unsigned int *data, const int len, unsigned char *output, int *outlen)
604 {
605   unsigned int largeint[MAXBASEVALS+1];
606   unsigned int largeint_tmp[MAXBASEVALS+1];
607   int ixyz, i;
608   unsigned int j;
609   int nwrittenout=0;
610   unsigned int numbytes=0;
611   /* Store the MAXBASEVALS value in the output. */
612   output[nwrittenout++]=(unsigned char)(MAXBASEVALS&0xFFU);
613   output[nwrittenout++]=(unsigned char)((MAXBASEVALS>>8)&0xFFU);
614   /* Store the BASEINTERVAL value in the output. */
615   output[nwrittenout++]=(unsigned char)(BASEINTERVAL&0xFFU);
616   for (ixyz=0; ixyz<3; ixyz++)
617     {
618       unsigned int base=0U;
619       int nvals=0;
620       int basegiven=0;
621
622       memset(largeint, 0U, sizeof(unsigned int) * (MAXBASEVALS+1));
623
624       for (i=ixyz; i<len; i+=3)
625         {
626          if (nvals==0)
627            {
628              int basecheckvals=0;
629              int k;
630              if (basegiven==0)
631                {
632                  base=0U;
633                  /* Find the largest value for this particular coordinate. */
634                  for (k=i; k<len; k+=3)
635                    {
636                      if (data[k]>base)
637                        base=data[k];
638                      basecheckvals++;
639                      if (basecheckvals==MAXBASEVALS*BASEINTERVAL)
640                        break;
641                    }
642                  /* The base is one larger than the largest values. */
643                  base++;
644                  if (base<2)
645                    base=2;
646                  /* Store the base in the output. */
647                  output[nwrittenout++]=(unsigned char)(base&0xFFU);
648                  output[nwrittenout++]=(unsigned char)((base>>8)&0xFFU);
649                  output[nwrittenout++]=(unsigned char)((base>>16)&0xFFU);
650                  output[nwrittenout++]=(unsigned char)((base>>24)&0xFFU);
651                  basegiven=BASEINTERVAL;
652                  /* How many bytes is needed to store MAXBASEVALS values using this base? */
653                  numbytes=base_bytes(base,MAXBASEVALS);
654                }
655              basegiven--;
656 #ifdef SHOWIT
657              fprintf(stderr,"Base for %d is %u. I need %d bytes for %d values.\n",ixyz,base,numbytes,MAXBASEVALS);
658 #endif
659            }
660           if (nvals!=0)
661             {
662               Ptngc_largeint_mul(base,largeint,largeint_tmp,MAXBASEVALS+1);
663               for (j=0; j<MAXBASEVALS+1; j++)
664                 largeint[j]=largeint_tmp[j];
665             }
666           Ptngc_largeint_add(data[i],largeint,MAXBASEVALS+1);
667 #ifdef SHOWIT
668           fprintf(stderr,"outputting value %u\n",data[i]);
669 #endif
670           nvals++;
671           if (nvals==MAXBASEVALS)
672             {
673 #ifdef SHOWIT
674               fprintf(stderr,"Writing largeint: ");
675 #endif
676               for (j=0; j<numbytes; j++)
677                 {
678                   int ilarge=j/4;
679                   int ibyte=j%4;
680                   output[nwrittenout++]=(unsigned char)((largeint[ilarge]>>(ibyte*8))&(0xFFU));
681 #ifdef SHOWIT
682                   fprintf(stderr,"%02x",(unsigned int)output[nwrittenout-1]);
683 #endif
684                 }
685 #ifdef SHOWIT
686               fprintf(stderr,"\n");
687 #endif
688               nvals=0;
689
690               memset(largeint, 0U, sizeof(unsigned int) * (MAXBASEVALS+1));
691             }
692         }
693       if (nvals)
694         {
695           numbytes=base_bytes(base,nvals);
696 #ifdef SHOWIT
697           fprintf(stderr,"Base for %d is %u. I need %d bytes for %d values.\n",ixyz,base,numbytes,nvals);
698 #endif
699           for (j=0; j<numbytes; j++)
700             {
701               int ilarge=j/4;
702               int ibyte=j%4;
703               output[nwrittenout++]=(unsigned char)((largeint[ilarge]>>(ibyte*8))&(0xFFU));
704             }
705         }
706     }
707   *outlen=nwrittenout;
708 }
709
710 static void base_decompress(unsigned char *input, const int len, unsigned int *output)
711 {
712   unsigned int largeint[MAXMAXBASEVALS+1];
713   unsigned int largeint_tmp[MAXMAXBASEVALS+1];
714   int ixyz, i, j;
715   int maxbasevals=(int)((unsigned int)(input[0])|(((unsigned int)(input[1]))<<8));
716   int baseinterval=(int)input[2];
717   if (maxbasevals>(int)MAXMAXBASEVALS)
718     {
719       fprintf(stderr,"Read a larger maxbasevals value from the file than I can handle. Fix"
720               " by increasing MAXMAXBASEVALS to at least %d. Although, this is"
721               " probably a bug in TRAJNG, since MAXMAXBASEVALS should already be insanely large enough.\n",maxbasevals);
722       exit(EXIT_FAILURE);
723     }
724   input+=3;
725   for (ixyz=0; ixyz<3; ixyz++)
726     {
727       int numbytes=0;
728       int nvals_left=len/3;
729       int outvals=ixyz;
730       int basegiven=0;
731       unsigned int base=0U;
732 #ifdef SHOWIT
733       fprintf(stderr,"Base for %d is %u. I need %d bytes for %d values.\n",ixyz,base,numbytes,maxbasevals);
734 #endif
735       while (nvals_left)
736         {
737           int n;
738           if (basegiven==0)
739             {
740               base=(unsigned int)(input[0])|
741                 (((unsigned int)(input[1]))<<8)|
742                 (((unsigned int)(input[2]))<<16)|
743                 (((unsigned int)(input[3]))<<24);
744               input+=4;
745               basegiven=baseinterval;
746               /* How many bytes is needed to store maxbasevals values using this base? */
747               numbytes=base_bytes(base,maxbasevals);
748             }
749           basegiven--;
750           if (nvals_left<maxbasevals)
751             {
752               numbytes=base_bytes(base,nvals_left);
753 #ifdef SHOWIT
754               fprintf(stderr,"Base for %d is %u. I need %d bytes for %d values.\n",ixyz,base,numbytes,nvals_left);
755 #endif
756             }
757           memset(largeint, 0U, sizeof(unsigned int) * (maxbasevals+1));
758 #ifdef SHOWIT
759           fprintf(stderr,"Reading largeint: ");
760 #endif
761           if (numbytes/4 < maxbasevals+1)
762             {
763               for (j=0; j<numbytes; j++)
764                 {
765                   int ilarge=j/4;
766                   int ibyte=j%4;
767                   largeint[ilarge]|=((unsigned int)input[j])<<(ibyte*8);
768 #ifdef SHOWIT
769                   fprintf(stderr,"%02x",(unsigned int)input[j]);
770 #endif
771                 }
772             }
773 #ifdef SHOWIT
774           fprintf(stderr,"\n");
775 #endif
776           input+=numbytes;
777           /* Do the long division required to get the output values. */
778           n=maxbasevals;
779           if (n>nvals_left)
780             n=nvals_left;
781           for (i=n-1; i>=0; i--)
782             {
783               output[outvals+i*3]=Ptngc_largeint_div(base,largeint,largeint_tmp,maxbasevals+1);
784               for (j=0; j<maxbasevals+1; j++)
785                 largeint[j]=largeint_tmp[j];
786             }
787 #ifdef SHOWIT
788           for (i=0; i<n; i++)
789             fprintf(stderr,"outputting value %u\n",output[outvals+i*3]);
790 #endif
791           outvals+=n*3;
792           nvals_left-=n;
793         }
794     }
795 }
796
797 /* If a large proportion of the integers are large (More than 10\% are >14 bits) we return 0, otherwise 1 */
798 static int heuristic_bwlzh(unsigned int *ints, const int nints)
799 {
800   int i,num;
801   num=0;
802   for (i=0; i<nints; i++)
803     if (ints[i]>=16384)
804       num++;
805   if (num>nints/10)
806     return 0;
807   else
808     return 1;
809 }
810
811 /* Speed selects how careful to try to find the most efficient compression. The BWLZH algo is expensive!
812    Speed <=2 always avoids BWLZH everywhere it is possible.
813    Speed 3 and 4 and 5 use heuristics (check proportion of large value). This should mostly be safe.
814    Speed 5 enables the LZ77 component of BWLZH.
815    Speed 6 always tests if BWLZH is better and if it is uses it. This can be very slow.
816  */
817 unsigned char *Ptngc_pack_array_xtc3(int *input, int *length, const int natoms, int speed)
818 {
819   unsigned char *output=NULL;
820   int i,ienc,j;
821   int outdata=0;
822   /* Pack triplets. */
823   int ntriplets=*length/3;
824   int intmax;
825   int max_small;
826   int small_index;
827   int max_large_index;
828   int large_index[3];
829   int prevcoord[3];
830   int runlength=0; /* Initial runlength. "Stupidly" set to zero for
831                       simplicity and explicity */
832   int swapatoms=0; /* Initial guess is that we should not swap the
833                       first two atoms in each large+small
834                       transition */
835   int didswap; /* Whether swapping was actually done. */
836   int inpdata=0;
837   int encode_ints[3+MAX_SMALL_RLE*3]; /* Up to 3 large + 24 small ints can be encoded at once */
838   int nencode;
839   int ntriplets_left=ntriplets;
840   int refused=0;
841   unsigned char *bwlzh_buf=NULL;
842   int bwlzh_buf_len;
843   unsigned char *base_buf=NULL;
844   int base_buf_len;
845
846   struct xtc3_context xtc3_context;
847   init_xtc3_context(&xtc3_context);
848
849   memcpy(xtc3_context.maxint, input, 3*sizeof *xtc3_context.maxint);
850   memcpy(xtc3_context.minint, input, 3*sizeof *xtc3_context.maxint);
851
852   /* Values of speed should be sane. */
853   if (speed<1)
854     speed=1;
855   if (speed>6)
856     speed=6;
857
858 #ifdef SHOWIT
859   nbits_sum=0;
860   nvalues_sum=0;
861 #endif
862   /* Allocate enough memory for output */
863   if (*length < 48)
864     output=warnmalloc(8*48*sizeof *output);
865   else
866     output=warnmalloc(8* *length*sizeof *output);
867
868
869   for (i=1; i<ntriplets; i++)
870     for (j=0; j<3; j++)
871       {
872         if (input[i*3+j]>xtc3_context.maxint[j])
873           xtc3_context.maxint[j]=input[i*3+j];
874         if (input[i*3+j]<xtc3_context.minint[j])
875           xtc3_context.minint[j]=input[i*3+j];
876       }
877
878   large_index[0]=Ptngc_find_magic_index(xtc3_context.maxint[0]-xtc3_context.minint[0]+1);
879   large_index[1]=Ptngc_find_magic_index(xtc3_context.maxint[1]-xtc3_context.minint[1]+1);
880   large_index[2]=Ptngc_find_magic_index(xtc3_context.maxint[2]-xtc3_context.minint[2]+1);
881   max_large_index=large_index[0];
882   if (large_index[1]>max_large_index)
883     max_large_index=large_index[1];
884   if (large_index[2]>max_large_index)
885     max_large_index=large_index[2];
886
887 #ifdef SHOWIT
888   for (j=0; j<3; j++)
889     fprintf(stderr,"minint[%d]=%d. maxint[%d]=%d large_index[%d]=%d value=%d\n",j,xtc3_context.minint[j],j,xtc3_context.maxint[j],
890             j,large_index[j],Ptngc_magic(large_index[j]));
891 #endif
892
893   /* Guess initial small index */
894   small_index=max_large_index/2;
895
896   /* Find the largest value that is not large. Not large is half index of
897      large. */
898   max_small=Ptngc_magic(small_index);
899   intmax=0;
900   for (i=0; i<*length; i++)
901     {
902       int item=input[i];
903       int s=positive_int(item);
904       if (s>intmax)
905         if (s<max_small)
906           intmax=s;
907     }
908   /* This value is not critical, since if I guess wrong, the code will
909      just insert instructions to increase this value. */
910   small_index=Ptngc_find_magic_index(intmax);
911 #ifdef SHOWIT
912   fprintf(stderr,"initial small_index=%d value=%d\n",small_index,Ptngc_magic(small_index));
913 #endif
914
915   output_int(output,&outdata,positive_int(xtc3_context.minint[0]));
916   output_int(output,&outdata,positive_int(xtc3_context.minint[1]));
917   output_int(output,&outdata,positive_int(xtc3_context.minint[2]));
918
919 #if 0
920 #ifdef SHOWIT
921   for (i=0; i<ntriplets_left; i++)
922     fprintf(stderr,"VALUE:%d %6d %6d %6d\n",
923             i,
924             input[inpdata+i*3],
925             input[inpdata+i*3+1],
926             input[inpdata+i*3+2]);
927 #endif
928 #endif
929
930   /* Initial prevcoord is the minimum integers. */
931   memcpy(prevcoord, xtc3_context.minint, 3*sizeof *prevcoord);
932   prevcoord[0]=xtc3_context.minint[0];
933   prevcoord[1]=xtc3_context.minint[1];
934   prevcoord[2]=xtc3_context.minint[2];
935
936   while (ntriplets_left)
937     {
938       if (ntriplets_left<0)
939         {
940           fprintf(stderr,"TRAJNG: BUG! ntriplets_left<0!\n");
941           exit(EXIT_FAILURE);
942         }
943       /* If only less than three atoms left we just write them all as large integers. Here no swapping is done! */
944       if (ntriplets_left<3)
945         {
946           for (ienc=0; ienc<ntriplets_left; ienc++)
947             {
948               buffer_large(&xtc3_context,input,inpdata,natoms,1);
949               inpdata+=3;
950               ntriplets_left--;
951             }
952           flush_large(&xtc3_context,xtc3_context.has_large); /* Flush all */
953         }
954       else
955         {
956           int min_runlength=0;
957           int largest_required_base;
958           int largest_runlength_base;
959           int largest_required_index;
960           int largest_runlength_index;
961           int new_runlength;
962           int new_small_index;
963           int iter_runlength;
964           int iter_small_index;
965           int rle_index_dep;
966           didswap=0;
967           /* Insert the next batch of integers to be encoded into the buffer */
968 #ifdef SHOWIT
969           fprintf(stderr,"Initial batch\n");
970 #endif
971           insert_batch(input+inpdata,ntriplets_left,prevcoord,encode_ints,0,&nencode);
972
973           /* First we must decide if the next value is large (does not reasonably fit in current small encoding)
974              Also, if we have not written any values yet, we must begin by writing a large atom. */
975           if ((inpdata==0) || (is_quite_large(encode_ints,small_index,max_large_index)) || (refused))
976             {
977               /* If any of the next two atoms are large we should probably write them as large and not swap them */
978               int no_swap=0;
979               if ((is_quite_large(encode_ints+3,small_index,max_large_index)) || (is_quite_large(encode_ints+6,small_index,max_large_index)))
980                 no_swap=1;
981 #if 1
982               if (!no_swap)
983                 {
984                   /* If doing inter-frame coding results in smaller values we should not do any swapping either. */
985                   int frame=inpdata/(natoms*3);
986                   if (frame>0)
987                     {
988                       unsigned int delta[3], delta2[3];
989                       delta[0]=positive_int(input[inpdata+3]-input[inpdata-natoms*3+3]);
990                       delta[1]=positive_int(input[inpdata+4]-input[inpdata-natoms*3+4]);
991                       delta[2]=positive_int(input[inpdata+5]-input[inpdata-natoms*3+5]);
992                       delta2[0]=positive_int(encode_ints[3]);
993                       delta2[1]=positive_int(encode_ints[4]);
994                       delta2[2]=positive_int(encode_ints[5]);
995 #ifdef SHOWIT
996                       fprintf(stderr,"A1: inter delta: %u %u %u. intra delta=%u %u %u\n",
997                               delta[0],delta[1],delta[2],
998                               delta2[0],delta2[1],delta2[2]);
999 #endif
1000                       if (compute_intlen(delta)*TRESHOLD_INTER_INTRA<compute_intlen(delta2))
1001                         {
1002                           delta[0]=positive_int(input[inpdata+6]-input[inpdata-natoms*3+6]);
1003                           delta[1]=positive_int(input[inpdata+7]-input[inpdata-natoms*3+7]);
1004                           delta[2]=positive_int(input[inpdata+8]-input[inpdata-natoms*3+8]);
1005                           delta2[0]=positive_int(encode_ints[6]);
1006                           delta2[1]=positive_int(encode_ints[7]);
1007                           delta2[2]=positive_int(encode_ints[8]);
1008 #ifdef SHOWIT
1009                           fprintf(stderr,"A2: inter delta: %u %u %u. intra delta=%u %u %u\n",
1010                                   delta[0],delta[1],delta[2],
1011                                   delta2[0],delta2[1],delta2[2]);
1012 #endif
1013                           if (compute_intlen(delta)*TRESHOLD_INTER_INTRA<compute_intlen(delta2))
1014                             {
1015                               no_swap=1;
1016 #ifdef SHOWIT
1017                               fprintf(stderr,"SETTING NO SWAP!\n");
1018 #endif
1019                             }
1020                         }
1021                     }
1022                 }
1023 #endif
1024               if (!no_swap)
1025                 {
1026                   /* Next we must decide if we should swap the first
1027                      two values. */
1028 #if 1
1029                   swapdecide(&xtc3_context,input+inpdata,&swapatoms,large_index,xtc3_context.minint);
1030 #else
1031                   swapatoms=0;
1032 #endif
1033                   /* If we should do the integer swapping manipulation we should do it now */
1034                   if (swapatoms)
1035                     {
1036                       didswap=1;
1037                       for (i=0; i<3; i++)
1038                         {
1039                           int in[3], out[3];
1040                           in[0]=input[inpdata+i];
1041                           in[1]=input[inpdata+3+i]-input[inpdata+i];
1042                           in[2]=input[inpdata+6+i]-input[inpdata+3+i];
1043                           swap_ints(in,out);
1044                           encode_ints[i]=out[0];
1045                           encode_ints[3+i]=out[1];
1046                           encode_ints[6+i]=out[2];
1047                         }
1048                       /* We have swapped atoms, so the minimum run-length is 2 */
1049 #ifdef SHOWIT
1050                       fprintf(stderr,"Swap atoms results in:\n");
1051                       for (i=0; i<3; i++)
1052                         fprintf(stderr,"%d: %6d %6d %6d\t\t%6d %6d %6d\n",i*3,
1053                                 encode_ints[i*3],
1054                                 encode_ints[i*3+1],
1055                                 encode_ints[i*3+2],
1056                                 positive_int(encode_ints[i*3]),
1057                                 positive_int(encode_ints[i*3+1]),
1058                                 positive_int(encode_ints[i*3+2]));
1059
1060 #endif
1061                       min_runlength=2;
1062                     }
1063                 }
1064               /* Cache large value for later possible combination with
1065                  a sequence of small integers. */
1066               if ((swapatoms) && (didswap))
1067                 {
1068                   buffer_large(&xtc3_context,input,inpdata+3,natoms,0); /* This is a swapped integer, so inpdata is one atom later and
1069                                                                            intra coding is not ok. */
1070                   for (ienc=0; ienc<3; ienc++)
1071                     prevcoord[ienc]=input[inpdata+3+ienc];
1072                 }
1073               else
1074                 {
1075                   buffer_large(&xtc3_context,input,inpdata,natoms,1);
1076                   for (ienc=0; ienc<3; ienc++)
1077                     prevcoord[ienc]=input[inpdata+ienc];
1078                 }
1079
1080
1081 #ifdef SHOWIT
1082               fprintf(stderr,"Prevcoord after packing of large: %d %d %d\n",
1083                       prevcoord[0],prevcoord[1],prevcoord[2]);
1084 #endif
1085
1086               /* We have written a large integer so we have one less atoms to worry about */
1087               inpdata+=3;
1088               ntriplets_left--;
1089
1090               refused=0;
1091
1092               /* Insert the next batch of integers to be encoded into the buffer */
1093 #ifdef SHOWIT
1094               fprintf(stderr,"Update batch due to large int.\n");
1095 #endif
1096               if ((swapatoms) && (didswap))
1097                 {
1098                   /* Keep swapped values. */
1099                   for (i=0; i<2; i++)
1100                     for (ienc=0; ienc<3; ienc++)
1101                       encode_ints[i*3+ienc]=encode_ints[(i+1)*3+ienc];
1102                 }
1103               insert_batch(input+inpdata,ntriplets_left,prevcoord,encode_ints,min_runlength,&nencode);
1104             }
1105           /* Here we should only have differences for the atom coordinates. */
1106           /* Convert the ints to positive ints */
1107           for (ienc=0; ienc<nencode; ienc++)
1108             {
1109               int pint=positive_int(encode_ints[ienc]);
1110               encode_ints[ienc]=pint;
1111             }
1112           /* Now we must decide what base and runlength to do. If we have swapped atoms it will be at least 2.
1113              If even the next atom is large, we will not do anything. */
1114           largest_required_base=0;
1115           /* Determine required base */
1116           for (ienc=0; ienc<min_runlength*3; ienc++)
1117             if (encode_ints[ienc]>largest_required_base)
1118               largest_required_base=encode_ints[ienc];
1119           /* Also compute what the largest base is for the current runlength setting! */
1120           largest_runlength_base=0;
1121           for (ienc=0; (ienc<runlength*3) && (ienc<nencode); ienc++)
1122             if (encode_ints[ienc]>largest_runlength_base)
1123               largest_runlength_base=encode_ints[ienc];
1124
1125           largest_required_index=Ptngc_find_magic_index(largest_required_base);
1126           largest_runlength_index=Ptngc_find_magic_index(largest_runlength_base);
1127
1128           if (largest_required_index<largest_runlength_index)
1129             {
1130               new_runlength=min_runlength;
1131               new_small_index=largest_required_index;
1132             }
1133           else
1134             {
1135               new_runlength=runlength;
1136               new_small_index=largest_runlength_index;
1137             }
1138
1139           /* Only allow increase of runlength wrt min_runlength */
1140           if (new_runlength<min_runlength)
1141             new_runlength=min_runlength;
1142
1143           /* If the current runlength is longer than the number of
1144              triplets left stop it from being so. */
1145           if (new_runlength>ntriplets_left)
1146             new_runlength=ntriplets_left;
1147
1148           /* We must at least try to get some small integers going. */
1149           if (new_runlength==0)
1150             {
1151               new_runlength=1;
1152               new_small_index=small_index;
1153             }
1154
1155           iter_runlength=new_runlength;
1156           iter_small_index=new_small_index;
1157
1158           /* Iterate to find optimal encoding and runlength */
1159 #ifdef SHOWIT
1160           fprintf(stderr,"Entering iterative loop.\n");
1161           fflush(stderr);
1162 #endif
1163
1164           do {
1165             new_runlength=iter_runlength;
1166             new_small_index=iter_small_index;
1167
1168 #ifdef SHOWIT
1169             fprintf(stderr,"Test new_small_index=%d Base=%d\n",new_small_index,Ptngc_magic(new_small_index));
1170 #endif
1171             /* What is the largest runlength
1172                we can do with the currently
1173                selected encoding? Also the max supported runlength is MAX_SMALL_RLE triplets! */
1174             for (ienc=0; ienc<nencode && ienc<MAX_SMALL_RLE*3; ienc++)
1175               {
1176                 int test_index=Ptngc_find_magic_index(encode_ints[ienc]);
1177                 if (test_index>new_small_index)
1178                   break;
1179               }
1180             if (ienc/3>new_runlength)
1181               {
1182                 iter_runlength=ienc/3;
1183 #ifdef SHOWIT
1184                 fprintf(stderr,"I found a new possible runlength: %d\n",iter_runlength);
1185 #endif
1186               }
1187
1188             /* How large encoding do we have to use? */
1189             largest_runlength_base=0;
1190             for (ienc=0; ienc<iter_runlength*3; ienc++)
1191               if (encode_ints[ienc]>largest_runlength_base)
1192                 largest_runlength_base=encode_ints[ienc];
1193             largest_runlength_index=Ptngc_find_magic_index(largest_runlength_base);
1194             if (largest_runlength_index!=new_small_index)
1195               {
1196                 iter_small_index=largest_runlength_index;
1197 #ifdef SHOWIT
1198                 fprintf(stderr,"I found a new possible small index: %d Base=%d\n",iter_small_index,Ptngc_magic(iter_small_index));
1199 #endif
1200               }
1201           } while ((new_runlength!=iter_runlength) ||
1202                    (new_small_index!=iter_small_index));
1203
1204 #ifdef SHOWIT
1205           fprintf(stderr,"Exit iterative loop.\n");
1206           fflush(stderr);
1207 #endif
1208
1209           /* Verify that we got something good. We may have caught a
1210              substantially larger atom. If so we should just bail
1211              out and let the loop get on another lap. We may have a
1212              minimum runlength though and then we have to fulfill
1213              the request to write out these atoms! */
1214           rle_index_dep=0;
1215           if (new_runlength<3)
1216             rle_index_dep=IS_LARGE;
1217           else if (new_runlength<6)
1218             rle_index_dep=QUITE_LARGE;
1219           if ((min_runlength)
1220               || ((new_small_index<small_index+IS_LARGE) && (new_small_index+rle_index_dep<max_large_index))
1221 #if 1
1222               || (new_small_index+IS_LARGE<max_large_index)
1223 #endif
1224 )
1225             {
1226               /* If doing inter-frame coding of large integers results
1227                  in smaller values than the small value we should not
1228                  produce a sequence of small values here. */
1229               int frame=inpdata/(natoms*3);
1230               int numsmaller=0;
1231 #if 1
1232               if ((!swapatoms) && (frame>0))
1233                 {
1234                   for (i=0; i<new_runlength; i++)
1235                     {
1236                       unsigned int delta[3];
1237                       unsigned int delta2[3];
1238                       delta[0]=positive_int(input[inpdata+i*3]-input[inpdata-natoms*3+i*3]);
1239                       delta[1]=positive_int(input[inpdata+i*3+1]-input[inpdata-natoms*3+i*3+1]);
1240                       delta[2]=positive_int(input[inpdata+i*3+2]-input[inpdata-natoms*3+i*3+2]);
1241                       delta2[0]=positive_int(encode_ints[i*3]);
1242                       delta2[1]=positive_int(encode_ints[i*3+1]);
1243                       delta2[2]=positive_int(encode_ints[i*3+2]);
1244                       if (compute_intlen(delta)*TRESHOLD_INTER_INTRA<compute_intlen(delta2))
1245                         numsmaller++;
1246                     }
1247                 }
1248 #endif
1249               /* Most of the values should become smaller, otherwise
1250                  we should encode them with intra coding. */
1251               if ((!swapatoms) && (numsmaller>=2*new_runlength/3))
1252                 {
1253                   /* Put all the values in large arrays, instead of the small array */
1254                   if (new_runlength)
1255                     {
1256                       for (i=0; i<new_runlength; i++)
1257                         buffer_large(&xtc3_context,input,inpdata+i*3,natoms,1);
1258                       for (i=0; i<3; i++)
1259                         prevcoord[i]=input[inpdata+(new_runlength-1)*3+i];
1260                       inpdata+=3*new_runlength;
1261                       ntriplets_left-=new_runlength;
1262                     }
1263                 }
1264               else
1265                 {
1266                   if ((new_runlength!=runlength) || (new_small_index!=small_index))
1267                     {
1268                       int change=new_small_index-small_index;
1269
1270                       if (new_small_index<=0)
1271                         change=0;
1272
1273                       if (change<0)
1274                         {
1275                           int ixx;
1276                           for (ixx=0; ixx<new_runlength; ixx++)
1277                             {
1278                               int rejected;
1279                               do {
1280                                 int ixyz;
1281                                 double isum=0.; /* ints can be almost 32 bit so multiplication will overflow. So do doubles. */
1282                                 for (ixyz=0; ixyz<3; ixyz++)
1283                                   {
1284                                     /* encode_ints is already positive (and multiplied by 2 versus the original, just as magic ints) */
1285                                     double id=encode_ints[ixx*3+ixyz];
1286                                     isum+=id*id;
1287                                   }
1288                                 rejected=0;
1289 #ifdef SHOWIT
1290                                 fprintf(stderr,"Tested decrease %d of index: %g>=%g?\n",change,isum,(double)Ptngc_magic(small_index+change)*(double)Ptngc_magic(small_index+change));
1291 #endif
1292                                 if (isum>(double)Ptngc_magic(small_index+change)*(double)Ptngc_magic(small_index+change))
1293                                   {
1294 #ifdef SHOWIT
1295                                     fprintf(stderr,"Rejected decrease %d of index due to length of vector: %g>=%g\n",change,isum,(double)Ptngc_magic(small_index+change)*(double)Ptngc_magic(small_index+change));
1296 #endif
1297                                     rejected=1;
1298                                     change++;
1299                                   }
1300                               } while ((change<0) && (rejected));
1301                               if (change==0)
1302                                 break;
1303                             }
1304                         }
1305
1306                       /* Always accept the new small indices here. */
1307                       small_index=new_small_index;
1308                       /* If we have a new runlength emit it */
1309                       if (runlength!=new_runlength)
1310                         {
1311                           runlength=new_runlength;
1312                           insert_value_in_array(&xtc3_context.instructions,
1313                                                 &xtc3_context.ninstr,
1314                                                 &xtc3_context.ninstr_alloc,
1315                                                 INSTR_SMALL_RUNLENGTH,"instr");
1316                           insert_value_in_array(&xtc3_context.rle,
1317                                                 &xtc3_context.nrle,
1318                                                 &xtc3_context.nrle_alloc,
1319                                                 (unsigned int)runlength,"rle (small)");
1320                         }
1321 #ifdef SHOWIT
1322                       fprintf(stderr,"Current small index: %d Base=%d\n",small_index,Ptngc_magic(small_index));
1323 #endif
1324                     }
1325                   /* If we have a large previous integer we can combine it with a sequence of small ints. */
1326                   if (xtc3_context.has_large)
1327                     {
1328                       /* If swapatoms is set to 1 but we did actually not
1329                          do any swapping, we must first write out the
1330                          large atom and then the small. If swapatoms is 1
1331                          and we did swapping we can use the efficient
1332                          encoding. */
1333                       if ((swapatoms) && (!didswap))
1334                         {
1335 #ifdef SHOWIT
1336                           fprintf(stderr,"Swapatoms was set to 1 but we did not do swapping!\n");
1337                           fprintf(stderr,"Only one large integer.\n");
1338 #endif
1339                           /* Flush all large atoms. */
1340                           flush_large(&xtc3_context,xtc3_context.has_large);
1341 #ifdef SHOWIT
1342                           fprintf(stderr,"Sequence of only small integers.\n");
1343 #endif
1344                           insert_value_in_array(&xtc3_context.instructions,
1345                                                 &xtc3_context.ninstr,
1346                                                 &xtc3_context.ninstr_alloc,
1347                                                 INSTR_ONLY_SMALL,"instr");
1348                         }
1349                       else
1350                         {
1351
1352 #ifdef SHOWIT
1353                           fprintf(stderr,"Sequence of one large and small integers (good compression).\n");
1354 #endif
1355                           /* Flush all large atoms but one! */
1356                           if (xtc3_context.has_large>1)
1357                             flush_large(&xtc3_context,xtc3_context.has_large-1);
1358
1359                           /* Here we must check if we should emit a large
1360                              type change instruction. */
1361                           large_instruction_change(&xtc3_context,0);
1362
1363                           insert_value_in_array(&xtc3_context.instructions,
1364                                                 &xtc3_context.ninstr,
1365                                                 &xtc3_context.ninstr_alloc,
1366                                                 INSTR_DEFAULT,"instr");
1367
1368                           write_three_large(&xtc3_context,0);
1369                           xtc3_context.has_large=0;
1370                         }
1371                     }
1372                   else
1373                     {
1374 #ifdef SHOWIT
1375                       fprintf(stderr,"Sequence of only small integers.\n");
1376 #endif
1377                       insert_value_in_array(&xtc3_context.instructions,
1378                                             &xtc3_context.ninstr,
1379                                             &xtc3_context.ninstr_alloc,
1380                                             INSTR_ONLY_SMALL,"instr");
1381                     }
1382                   /* Insert the small integers into the small integer array. */
1383                   for (ienc=0; ienc<runlength*3; ienc++)
1384                     insert_value_in_array(&xtc3_context.smallintra,
1385                                           &xtc3_context.nsmallintra,
1386                                           &xtc3_context.nsmallintra_alloc,
1387                                           (unsigned int)encode_ints[ienc],"smallintra");
1388
1389 #ifdef SHOWIT
1390                   for (ienc=0; ienc<runlength; ienc++)
1391                     fprintf(stderr,"Small: %d %d %d\n",
1392                             encode_ints[ienc*3],
1393                             encode_ints[ienc*3+1],
1394                             encode_ints[ienc*3+2]);
1395 #endif
1396                   /* Update prevcoord. */
1397                   for (ienc=0; ienc<runlength; ienc++)
1398                     {
1399 #ifdef SHOWIT
1400                       fprintf(stderr,"Prevcoord in packing: %d %d %d\n",
1401                               prevcoord[0],prevcoord[1],prevcoord[2]);
1402 #endif
1403                       prevcoord[0]+=unpositive_int(encode_ints[ienc*3]);
1404                       prevcoord[1]+=unpositive_int(encode_ints[ienc*3+1]);
1405                       prevcoord[2]+=unpositive_int(encode_ints[ienc*3+2]);
1406                     }
1407 #ifdef SHOWIT
1408                   fprintf(stderr,"Prevcoord in packing: %d %d %d\n",
1409                           prevcoord[0],prevcoord[1],prevcoord[2]);
1410 #endif
1411
1412                   inpdata+=3*runlength;
1413                   ntriplets_left-=runlength;
1414 #if 1
1415                 }
1416 #endif
1417             }
1418           else
1419             {
1420 #ifdef SHOWIT
1421               fprintf(stderr,"Refused value: %d old is %d max is %d\n",new_small_index,small_index,max_large_index);
1422               fflush(stderr);
1423 #endif
1424               refused=1;
1425             }
1426         }
1427 #ifdef SHOWIT
1428       fprintf(stderr,"Number of triplets left is %d\n",ntriplets_left);
1429 #endif
1430     }
1431
1432   /* If we have large previous integers we must flush them now. */
1433   if (xtc3_context.has_large)
1434     flush_large(&xtc3_context,xtc3_context.has_large);
1435
1436   /* Now it is time to compress all the data in the buffers with the bwlzh or base algo. */
1437
1438 #if 0
1439   /* Inspect the data. */
1440   printarray(xtc3_context.instructions,xtc3_context.ninstr,"A instr");
1441   printarray(xtc3_context.rle,xtc3_context.nrle,"A rle");
1442   printarray(xtc3_context.large_direct,xtc3_context.nlargedir,"A largedir");
1443   printarray(xtc3_context.large_intra_delta,xtc3_context.nlargeintra,"A largeintra");
1444   printarray(xtc3_context.large_inter_delta,xtc3_context.nlargeinter,"A largeinter");
1445   printarray(xtc3_context.smallintra,xtc3_context.nsmallintra,"A smallintra");
1446   exit(0);
1447 #endif
1448
1449 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1450   fprintf(stderr,"instructions: %d\n",xtc3_context.ninstr);
1451 #endif
1452
1453 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1454 #define bwlzh_compress bwlzh_compress_verbose
1455 #define bwlzh_compress_no_lz77 bwlzh_compress_no_lz77_verbose
1456 #endif
1457
1458   output_int(output,&outdata,(unsigned int)xtc3_context.ninstr);
1459   if (xtc3_context.ninstr)
1460     {
1461       bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.ninstr));
1462       if (speed>=5)
1463         bwlzh_compress(xtc3_context.instructions,xtc3_context.ninstr,bwlzh_buf,&bwlzh_buf_len);
1464       else
1465         bwlzh_compress_no_lz77(xtc3_context.instructions,xtc3_context.ninstr,bwlzh_buf,&bwlzh_buf_len);
1466       output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1467       memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1468       outdata+=bwlzh_buf_len;
1469       free(bwlzh_buf);
1470     }
1471
1472 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1473   fprintf(stderr,"rle: %d\n",xtc3_context.nrle);
1474 #endif
1475
1476   output_int(output,&outdata,(unsigned int)xtc3_context.nrle);
1477   if (xtc3_context.nrle)
1478     {
1479       bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.nrle));
1480       if (speed>=5)
1481         bwlzh_compress(xtc3_context.rle,xtc3_context.nrle,bwlzh_buf,&bwlzh_buf_len);
1482       else
1483         bwlzh_compress_no_lz77(xtc3_context.rle,xtc3_context.nrle,bwlzh_buf,&bwlzh_buf_len);
1484       output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1485       memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1486       outdata+=bwlzh_buf_len;
1487       free(bwlzh_buf);
1488     }
1489
1490 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1491   fprintf(stderr,"large direct: %d\n",xtc3_context.nlargedir);
1492 #endif
1493
1494   output_int(output,&outdata,(unsigned int)xtc3_context.nlargedir);
1495   if (xtc3_context.nlargedir)
1496     {
1497       if ((speed<=2) || ((speed<=5) && (!heuristic_bwlzh(xtc3_context.large_direct,xtc3_context.nlargedir))))
1498         {
1499           bwlzh_buf=NULL;
1500           bwlzh_buf_len=INT_MAX;
1501         }
1502       else
1503         {
1504           bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.nlargedir));
1505           if (speed>=5)
1506             bwlzh_compress(xtc3_context.large_direct,xtc3_context.nlargedir,bwlzh_buf,&bwlzh_buf_len);
1507           else
1508             bwlzh_compress_no_lz77(xtc3_context.large_direct,xtc3_context.nlargedir,bwlzh_buf,&bwlzh_buf_len);
1509         }
1510       /* If this can be written smaller using base compression we should do that. */
1511       base_buf=warnmalloc((xtc3_context.nlargedir+3)*sizeof(int));
1512       base_compress(xtc3_context.large_direct,xtc3_context.nlargedir,base_buf,&base_buf_len);
1513 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1514       fprintf(stderr,"Large direct: Base len=%d. BWLZH len=%d\n",base_buf_len,bwlzh_buf_len);
1515 #endif
1516       if (base_buf_len<bwlzh_buf_len)
1517         {
1518           output[outdata++]=0U;
1519           output_int(output,&outdata,(unsigned int)base_buf_len);
1520           memcpy(output+outdata,base_buf,base_buf_len);
1521           outdata+=base_buf_len;
1522         }
1523       else
1524         {
1525           output[outdata++]=1U;
1526           output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1527           memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1528           outdata+=bwlzh_buf_len;
1529         }
1530       free(bwlzh_buf);
1531       free(base_buf);
1532     }
1533
1534 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1535   fprintf(stderr,"large intra: %d\n",xtc3_context.nlargeintra);
1536 #endif
1537
1538   output_int(output,&outdata,(unsigned int)xtc3_context.nlargeintra);
1539   if (xtc3_context.nlargeintra)
1540     {
1541       if ((speed<=2) || ((speed<=5) && (!heuristic_bwlzh(xtc3_context.large_intra_delta,xtc3_context.nlargeintra))))
1542         {
1543           bwlzh_buf=NULL;
1544           bwlzh_buf_len=INT_MAX;
1545         }
1546       else
1547         {
1548           bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.nlargeintra));
1549           if (speed>=5)
1550             bwlzh_compress(xtc3_context.large_intra_delta,xtc3_context.nlargeintra,bwlzh_buf,&bwlzh_buf_len);
1551           else
1552             bwlzh_compress_no_lz77(xtc3_context.large_intra_delta,xtc3_context.nlargeintra,bwlzh_buf,&bwlzh_buf_len);
1553         }
1554       /* If this can be written smaller using base compression we should do that. */
1555       base_buf=warnmalloc((xtc3_context.nlargeintra+3)*sizeof(int));
1556       base_compress(xtc3_context.large_intra_delta,xtc3_context.nlargeintra,base_buf,&base_buf_len);
1557 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1558       fprintf(stderr,"Large intra: Base len=%d. BWLZH len=%d\n",base_buf_len,bwlzh_buf_len);
1559 #endif
1560       if (base_buf_len<bwlzh_buf_len)
1561         {
1562           output[outdata++]=0U;
1563           output_int(output,&outdata,(unsigned int)base_buf_len);
1564           memcpy(output+outdata,base_buf,base_buf_len);
1565           outdata+=base_buf_len;
1566         }
1567       else
1568         {
1569           output[outdata++]=1U;
1570           output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1571           memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1572           outdata+=bwlzh_buf_len;
1573         }
1574       free(bwlzh_buf);
1575       free(base_buf);
1576     }
1577
1578 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1579   fprintf(stderr,"large inter: %d\n",xtc3_context.nlargeinter);
1580 #endif
1581
1582   output_int(output,&outdata,(unsigned int)xtc3_context.nlargeinter);
1583   if (xtc3_context.nlargeinter)
1584     {
1585       if ((speed<=2) || ((speed<=5) && (!heuristic_bwlzh(xtc3_context.large_inter_delta,xtc3_context.nlargeinter))))
1586         {
1587           bwlzh_buf=NULL;
1588           bwlzh_buf_len=INT_MAX;
1589         }
1590       else
1591         {
1592           bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.nlargeinter));
1593           if (speed>=5)
1594             bwlzh_compress(xtc3_context.large_inter_delta,xtc3_context.nlargeinter,bwlzh_buf,&bwlzh_buf_len);
1595           else
1596             bwlzh_compress_no_lz77(xtc3_context.large_inter_delta,xtc3_context.nlargeinter,bwlzh_buf,&bwlzh_buf_len);
1597         }
1598       /* If this can be written smaller using base compression we should do that. */
1599       base_buf=warnmalloc((xtc3_context.nlargeinter+3)*sizeof(int));
1600       base_compress(xtc3_context.large_inter_delta,xtc3_context.nlargeinter,base_buf,&base_buf_len);
1601 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1602       fprintf(stderr,"Large inter: Base len=%d. BWLZH len=%d\n",base_buf_len,bwlzh_buf_len);
1603 #endif
1604       if (base_buf_len<bwlzh_buf_len)
1605         {
1606           output[outdata++]=0U;
1607           output_int(output,&outdata,(unsigned int)base_buf_len);
1608           memcpy(output+outdata,base_buf,base_buf_len);
1609           outdata+=base_buf_len;
1610         }
1611       else
1612         {
1613           output[outdata++]=1U;
1614           output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1615           memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1616           outdata+=bwlzh_buf_len;
1617         }
1618       free(bwlzh_buf);
1619       free(base_buf);
1620     }
1621
1622 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1623   fprintf(stderr,"small intra: %d\n",xtc3_context.nsmallintra);
1624 #endif
1625
1626   output_int(output,&outdata,(unsigned int)xtc3_context.nsmallintra);
1627   if (xtc3_context.nsmallintra)
1628     {
1629       if ((speed<=2) || ((speed<=5) && (!heuristic_bwlzh(xtc3_context.smallintra,xtc3_context.nsmallintra))))
1630         {
1631           bwlzh_buf=NULL;
1632           bwlzh_buf_len=INT_MAX;
1633         }
1634       else
1635         {
1636           bwlzh_buf=warnmalloc(bwlzh_get_buflen(xtc3_context.nsmallintra));
1637           if (speed>=5)
1638             bwlzh_compress(xtc3_context.smallintra,xtc3_context.nsmallintra,bwlzh_buf,&bwlzh_buf_len);
1639           else
1640             bwlzh_compress_no_lz77(xtc3_context.smallintra,xtc3_context.nsmallintra,bwlzh_buf,&bwlzh_buf_len);
1641         }
1642       /* If this can be written smaller using base compression we should do that. */
1643       base_buf=warnmalloc((xtc3_context.nsmallintra+3)*sizeof(int));
1644       base_compress(xtc3_context.smallintra,xtc3_context.nsmallintra,base_buf,&base_buf_len);
1645 #if defined(SHOWIT) || defined(SHOWIT_LIGHT)
1646       fprintf(stderr,"Small intra: Base len=%d. BWLZH len=%d\n",base_buf_len,bwlzh_buf_len);
1647 #endif
1648       if (base_buf_len<bwlzh_buf_len)
1649         {
1650           output[outdata++]=0U;
1651           output_int(output,&outdata,(unsigned int)base_buf_len);
1652           memcpy(output+outdata,base_buf,base_buf_len);
1653           outdata+=base_buf_len;
1654         }
1655       else
1656         {
1657           output[outdata++]=1U;
1658           output_int(output,&outdata,(unsigned int)bwlzh_buf_len);
1659           memcpy(output+outdata,bwlzh_buf,bwlzh_buf_len);
1660           outdata+=bwlzh_buf_len;
1661         }
1662       free(bwlzh_buf);
1663       free(base_buf);
1664     }
1665   *length=outdata;
1666
1667   free_xtc3_context(&xtc3_context);
1668   return output;
1669 }
1670
1671 static void decompress_bwlzh_block(unsigned char **ptr,
1672                                    const int nvals,
1673                                    unsigned int **vals)
1674 {
1675   int bwlzh_buf_len=(int)(((unsigned int)(*ptr)[0]) |
1676                           (((unsigned int)(*ptr)[1])<<8) |
1677                           (((unsigned int)(*ptr)[2])<<16) |
1678                           (((unsigned int)(*ptr)[3])<<24));
1679   (*ptr)+=4;
1680   *vals=warnmalloc(nvals*sizeof (**vals));
1681   bwlzh_decompress(*ptr,nvals,*vals);
1682   (*ptr)+=bwlzh_buf_len;
1683 }
1684
1685 static void decompress_base_block(unsigned char **ptr,
1686                                   const int nvals,
1687                                   unsigned int **vals)
1688 {
1689   int base_buf_len=(int)(((unsigned int)(*ptr)[0]) |
1690                          (((unsigned int)(*ptr)[1])<<8) |
1691                          (((unsigned int)(*ptr)[2])<<16) |
1692                          (((unsigned int)(*ptr)[3])<<24));
1693   (*ptr)+=4;
1694   *vals=warnmalloc(nvals*sizeof (**vals));
1695   base_decompress(*ptr,nvals,*vals);
1696   (*ptr)+=base_buf_len;
1697 }
1698
1699 static void unpack_one_large(struct xtc3_context *xtc3_context,
1700                              int *ilargedir, int *ilargeintra,
1701                              int *ilargeinter, int *prevcoord,
1702                              int *minint, int *output,
1703                              const int outdata, const int didswap,
1704                              const int natoms, const int current_large_type)
1705 {
1706   int large_ints[3]={0,0,0};
1707   if (current_large_type==0 && xtc3_context->large_direct)
1708     {
1709       large_ints[0]=(int)xtc3_context->large_direct[(*ilargedir)]+minint[0];
1710       large_ints[1]=(int)xtc3_context->large_direct[(*ilargedir)+1]+minint[1];
1711       large_ints[2]=(int)xtc3_context->large_direct[(*ilargedir)+2]+minint[2];
1712       (*ilargedir)+=3;
1713     }
1714   else if (current_large_type==1 && xtc3_context->large_intra_delta)
1715     {
1716       large_ints[0]=unpositive_int(xtc3_context->large_intra_delta[(*ilargeintra)])+prevcoord[0];
1717       large_ints[1]=unpositive_int(xtc3_context->large_intra_delta[(*ilargeintra)+1])+prevcoord[1];
1718       large_ints[2]=unpositive_int(xtc3_context->large_intra_delta[(*ilargeintra)+2])+prevcoord[2];
1719       (*ilargeintra)+=3;
1720     }
1721   else if (xtc3_context->large_inter_delta)
1722     {
1723       large_ints[0]=unpositive_int(xtc3_context->large_inter_delta[(*ilargeinter)])
1724         +output[outdata-natoms*3+didswap*3];
1725       large_ints[1]=unpositive_int(xtc3_context->large_inter_delta[(*ilargeinter)+1])
1726         +output[outdata-natoms*3+1+didswap*3];
1727       large_ints[2]=unpositive_int(xtc3_context->large_inter_delta[(*ilargeinter)+2])
1728         +output[outdata-natoms*3+2+didswap*3];
1729       (*ilargeinter)+=3;
1730     }
1731   memcpy(prevcoord, large_ints, 3*sizeof *prevcoord);
1732   output[outdata]=large_ints[0];
1733   output[outdata+1]=large_ints[1];
1734   output[outdata+2]=large_ints[2];
1735 #ifdef SHOWIT
1736   fprintf(stderr,"Unpack one large: %d %d %d\n",prevcoord[0],prevcoord[1],prevcoord[2]);
1737 #endif
1738 }
1739
1740
1741 int Ptngc_unpack_array_xtc3(unsigned char *packed,int *output, const int length, const int natoms)
1742 {
1743   int i;
1744   int minint[3];
1745   unsigned char *ptr=packed;
1746   int prevcoord[3];
1747   int outdata=0;
1748   int ntriplets_left=length/3;
1749   int swapatoms=0;
1750   int runlength=0;
1751   int current_large_type=0;
1752   int iinstr=0;
1753   int irle=0;
1754   int ilargedir=0;
1755   int ilargeintra=0;
1756   int ilargeinter=0;
1757   int ismallintra=0;
1758
1759   struct xtc3_context xtc3_context;
1760   init_xtc3_context(&xtc3_context);
1761
1762   for (i=0; i<3; i++)
1763     {
1764       minint[i]=unpositive_int((int)(((unsigned int)ptr[0]) |
1765                                      (((unsigned int)ptr[1])<<8) |
1766                                      (((unsigned int)ptr[2])<<16) |
1767                                      (((unsigned int)ptr[3])<<24)));
1768       ptr+=4;
1769     }
1770
1771   xtc3_context.ninstr=(int)(((unsigned int)ptr[0]) |
1772                             (((unsigned int)ptr[1])<<8) |
1773                             (((unsigned int)ptr[2])<<16) |
1774                             (((unsigned int)ptr[3])<<24));
1775   ptr+=4;
1776   if (xtc3_context.ninstr)
1777     decompress_bwlzh_block(&ptr,xtc3_context.ninstr,&xtc3_context.instructions);
1778
1779   xtc3_context.nrle=(int)(((unsigned int)ptr[0]) |
1780                           (((unsigned int)ptr[1])<<8) |
1781                           (((unsigned int)ptr[2])<<16) |
1782                           (((unsigned int)ptr[3])<<24));
1783   ptr+=4;
1784   if (xtc3_context.nrle)
1785     decompress_bwlzh_block(&ptr,xtc3_context.nrle,&xtc3_context.rle);
1786
1787   xtc3_context.nlargedir=(int)(((unsigned int)ptr[0]) |
1788                                (((unsigned int)ptr[1])<<8) |
1789                                (((unsigned int)ptr[2])<<16) |
1790                                (((unsigned int)ptr[3])<<24));
1791   ptr+=4;
1792   if (xtc3_context.nlargedir)
1793     {
1794       if (*ptr++==1)
1795         decompress_bwlzh_block(&ptr,xtc3_context.nlargedir,&xtc3_context.large_direct);
1796       else
1797         decompress_base_block(&ptr,xtc3_context.nlargedir,&xtc3_context.large_direct);
1798     }
1799
1800   xtc3_context.nlargeintra=(int)(((unsigned int)ptr[0]) |
1801                                  (((unsigned int)ptr[1])<<8) |
1802                                  (((unsigned int)ptr[2])<<16) |
1803                                  (((unsigned int)ptr[3])<<24));
1804   ptr+=4;
1805   if (xtc3_context.nlargeintra)
1806     {
1807       if (*ptr++==1)
1808         decompress_bwlzh_block(&ptr,xtc3_context.nlargeintra,&xtc3_context.large_intra_delta);
1809       else
1810         decompress_base_block(&ptr,xtc3_context.nlargeintra,&xtc3_context.large_intra_delta);
1811     }
1812
1813   xtc3_context.nlargeinter=(int)(((unsigned int)ptr[0]) |
1814                                  (((unsigned int)ptr[1])<<8) |
1815                                  (((unsigned int)ptr[2])<<16) |
1816                                  (((unsigned int)ptr[3])<<24));
1817   ptr+=4;
1818   if (xtc3_context.nlargeinter)
1819     {
1820       if (*ptr++==1)
1821         decompress_bwlzh_block(&ptr,xtc3_context.nlargeinter,&xtc3_context.large_inter_delta);
1822       else
1823         decompress_base_block(&ptr,xtc3_context.nlargeinter,&xtc3_context.large_inter_delta);
1824     }
1825
1826   xtc3_context.nsmallintra=(int)(((unsigned int)ptr[0]) |
1827                                  (((unsigned int)ptr[1])<<8) |
1828                                  (((unsigned int)ptr[2])<<16) |
1829                                  (((unsigned int)ptr[3])<<24));
1830   ptr+=4;
1831   if (xtc3_context.nsmallintra)
1832     {
1833       if (*ptr++==1)
1834         decompress_bwlzh_block(&ptr,xtc3_context.nsmallintra,&xtc3_context.smallintra);
1835       else
1836         decompress_base_block(&ptr,xtc3_context.nsmallintra,&xtc3_context.smallintra);
1837     }
1838
1839   /* Initial prevcoord is the minimum integers. */
1840   memcpy(prevcoord, minint, 3*sizeof *prevcoord);
1841
1842   while (ntriplets_left>0 && iinstr<xtc3_context.ninstr)
1843     {
1844       int instr=xtc3_context.instructions[iinstr++];
1845 #ifdef SHOWIT
1846       fprintf(stderr,"instr=%d @ %d\n",instr,iinstr-1);
1847 #endif
1848 #ifdef SHOWIT
1849       fprintf(stderr,"ntriplets left=%d\n",ntriplets_left);
1850 #endif
1851       if ((instr==INSTR_DEFAULT) /* large+small */
1852           || (instr==INSTR_ONLY_LARGE) /* only large */
1853           || (instr==INSTR_ONLY_SMALL)) /* only small */
1854         {
1855           if (instr!=INSTR_ONLY_SMALL)
1856             {
1857               int didswap=0;
1858               if ((instr==INSTR_DEFAULT) && (swapatoms))
1859                 didswap=1;
1860               unpack_one_large(&xtc3_context,&ilargedir, &ilargeintra, &ilargeinter,
1861                                prevcoord, minint, output, outdata, didswap,
1862                                natoms, current_large_type);
1863               ntriplets_left--;
1864               outdata+=3;
1865             }
1866           if (instr!=INSTR_ONLY_LARGE)
1867             {
1868               for (i=0; i<runlength; i++)
1869                 {
1870                   prevcoord[0]+=unpositive_int(xtc3_context.smallintra[ismallintra]);
1871                   prevcoord[1]+=unpositive_int(xtc3_context.smallintra[ismallintra+1]);
1872                   prevcoord[2]+=unpositive_int(xtc3_context.smallintra[ismallintra+2]);
1873                   ismallintra+=3;
1874                   output[outdata+i*3]=prevcoord[0];
1875                   output[outdata+i*3+1]=prevcoord[1];
1876                   output[outdata+i*3+2]=prevcoord[2];
1877 #ifdef SHOWIT
1878                   fprintf(stderr,"Unpack small: %d %d %d\n",prevcoord[0],prevcoord[1],prevcoord[2]);
1879 #endif
1880                 }
1881               if ((instr==INSTR_DEFAULT) && (swapatoms))
1882                 {
1883                   for (i=0; i<3; i++)
1884                     {
1885                       int tmp=output[outdata-3+i];
1886                       output[outdata-3+i]=output[outdata+i];
1887                       output[outdata+i]=tmp;
1888                     }
1889 #ifdef SHOWIT
1890                   fprintf(stderr,"Unswap results in\n");
1891                   for (i=0; i<3; i++)
1892                     fprintf(stderr,"%d %d %d\n",output[outdata-3+i*3],output[outdata-2+i*3],output[outdata-1+i*3]);
1893 #endif
1894                 }
1895               ntriplets_left-=runlength;
1896               outdata+=runlength*3;
1897             }
1898         }
1899       else if (instr==INSTR_LARGE_RLE && irle<xtc3_context.nrle)
1900         {
1901           int large_rle=xtc3_context.rle[irle++];
1902 #ifdef SHOWIT
1903           fprintf(stderr,"large_rle=%d @ %d\n",large_rle,irle-1);
1904 #endif
1905           for (i=0; i<large_rle; i++)
1906             {
1907               unpack_one_large(&xtc3_context,&ilargedir, &ilargeintra, &ilargeinter,
1908                                prevcoord, minint, output, outdata, 0,
1909                                natoms, current_large_type);
1910               ntriplets_left--;
1911               outdata+=3;
1912             }
1913         }
1914       else if (instr==INSTR_SMALL_RUNLENGTH && irle<xtc3_context.nrle)
1915         {
1916           runlength=xtc3_context.rle[irle++];
1917 #ifdef SHOWIT
1918           fprintf(stderr,"small_rle=%d @ %d\n",runlength,irle-1);
1919 #endif
1920         }
1921       else if (instr==INSTR_FLIP)
1922         {
1923           swapatoms=1-swapatoms;
1924 #ifdef SHOWIT
1925           fprintf(stderr,"new flip=%d\n",swapatoms);
1926 #endif
1927         }
1928       else if (instr==INSTR_LARGE_DIRECT)
1929         {
1930           current_large_type=0;
1931 #ifdef SHOWIT
1932           fprintf(stderr,"large direct\n");
1933 #endif
1934         }
1935       else if (instr==INSTR_LARGE_INTRA_DELTA)
1936         {
1937           current_large_type=1;
1938 #ifdef SHOWIT
1939           fprintf(stderr,"large intra delta\n");
1940 #endif
1941         }
1942       else if (instr==INSTR_LARGE_INTER_DELTA)
1943         {
1944           current_large_type=2;
1945 #ifdef SHOWIT
1946           fprintf(stderr,"large inter delta\n");
1947 #endif
1948         }
1949     }
1950   if (ntriplets_left<0)
1951     {
1952       fprintf(stderr,"TRAJNG XTC3: A bug has been found. At end ntriplets_left<0\n");
1953       exit(EXIT_FAILURE);
1954     }
1955   free_xtc3_context(&xtc3_context);
1956   return 0;
1957 }