ae95462fcb52c986465fdbf00296f65bb3b802e3
[alexxy/gromacs.git] / src / mdlib / genborn.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2008, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <math.h>
44 #include <string.h>
45
46 #include "typedefs.h"
47 #include "smalloc.h"
48 #include "genborn.h"
49 #include "vec.h"
50 #include "grompp.h"
51 #include "pdbio.h"
52 #include "names.h"
53 #include "physics.h"
54 #include "partdec.h"
55 #include "domdec.h"
56 #include "network.h"
57 #include "gmx_fatal.h"
58 #include "mtop_util.h"
59 #include "pbc.h"
60 #include "nrnb.h"
61 #include "bondf.h"
62
63 #ifdef GMX_LIB_MPI
64 #include <mpi.h>
65 #endif
66 #ifdef GMX_THREAD_MPI
67 #include "tmpi.h"
68 #endif
69
70 #ifdef GMX_X86_SSE2
71 #  ifdef GMX_DOUBLE
72 #    include "genborn_sse2_double.h"
73 #    include "genborn_allvsall_sse2_double.h"
74 #  else
75 #    include "genborn_sse2_single.h"
76 #    include "genborn_allvsall_sse2_single.h"
77 #  endif /* GMX_DOUBLE */
78 #endif /* SSE or AVX present */
79
80 #include "genborn_allvsall.h"
81
82 /*#define DISABLE_SSE*/
83
84 typedef struct {
85     int shift;
86     int naj;
87     int *aj;
88     int aj_nalloc;
89 } gbtmpnbl_t;
90
91 typedef struct gbtmpnbls {
92     int nlist;
93     gbtmpnbl_t *list;
94     int list_nalloc;
95 } t_gbtmpnbls;
96
97 /* This function is exactly the same as the one in bondfree.c. The reason
98  * it is copied here is that the bonded gb-interactions are evaluated
99  * not in calc_bonds, but rather in calc_gb_forces
100  */
101 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
102 {
103         if (pbc) {
104                 return pbc_dx_aiuc(pbc,xi,xj,dx);
105         }
106         else {
107                 rvec_sub(xi,xj,dx);
108                 return CENTRAL;
109         }
110 }
111
112 int init_gb_nblist(int natoms, t_nblist *nl)
113 {
114     nl->maxnri      = natoms*4;
115     nl->maxnrj      = 0;
116     nl->maxlen      = 0;
117     nl->nri         = 0;
118     nl->nrj         = 0;
119     nl->iinr        = NULL;
120     nl->gid         = NULL;
121     nl->shift       = NULL;
122     nl->jindex      = NULL;
123     nl->jjnr        = NULL;
124     /*nl->nltype      = nltype;*/
125     
126     srenew(nl->iinr,   nl->maxnri);
127     srenew(nl->gid,    nl->maxnri);
128     srenew(nl->shift,  nl->maxnri);
129     srenew(nl->jindex, nl->maxnri+1);
130     
131     nl->jindex[0] = 0;
132     
133     return 0;
134 }
135
136 void gb_pd_send(t_commrec *cr, real *send_data, int nr)
137 {
138 #ifdef GMX_MPI    
139     int i,cur;
140     int *index,*sendc,*disp;
141     
142     snew(sendc,cr->nnodes);
143     snew(disp,cr->nnodes);
144     
145     index = pd_index(cr);
146     cur   = cr->nodeid;
147     
148     /* Setup count/index arrays */
149     for(i=0;i<cr->nnodes;i++)
150     {
151         sendc[i]  = index[i+1]-index[i];
152         disp[i]   = index[i];    
153     }
154     
155     /* Do communication */
156     MPI_Gatherv(send_data+index[cur],sendc[cur],GMX_MPI_REAL,send_data,sendc,
157                 disp,GMX_MPI_REAL,0,cr->mpi_comm_mygroup);
158     MPI_Bcast(send_data,nr,GMX_MPI_REAL,0,cr->mpi_comm_mygroup);
159     
160 #endif
161 }
162
163
164 int init_gb_plist(t_params *p_list)
165 {
166     p_list->nr    = 0;
167     p_list->param = NULL;
168     
169     return 0;
170 }
171
172
173
174 int init_gb_still(const t_commrec *cr, t_forcerec  *fr, 
175                   const t_atomtypes *atype, t_idef *idef, t_atoms *atoms, 
176                   gmx_genborn_t *born,int natoms)
177 {
178     
179     int i,j,i1,i2,k,m,nbond,nang,ia,ib,ic,id,nb,idx,idx2,at;
180     int iam,ibm;
181     int at0,at1;
182     real length,angle;
183     real r,ri,rj,ri2,ri3,rj2,r2,r3,r4,rk,ratio,term,h,doffset;
184     real p1,p2,p3,factor,cosine,rab,rbc;
185     
186     real *vsol;
187     real *gp;
188     
189     snew(vsol,natoms);
190     snew(gp,natoms);
191     snew(born->gpol_still_work,natoms+3);
192     
193     if(PAR(cr))
194     {
195         if(PARTDECOMP(cr))
196         {
197             pd_at_range(cr,&at0,&at1);
198             
199             for(i=0;i<natoms;i++)
200             {
201                 vsol[i] = gp[i] = 0;
202             }
203         }
204         else
205         {
206             at0 = 0;
207             at1 = natoms;
208         }
209     }
210     else
211     {
212         at0 = 0;
213         at1 = natoms;
214     }
215     
216     doffset = born->gb_doffset;
217     
218     for(i=0;i<natoms;i++)
219     {
220         born->gpol_globalindex[i]=born->vsolv_globalindex[i]=
221             born->gb_radius_globalindex[i]=0;     
222     }
223     
224     /* Compute atomic solvation volumes for Still method */
225     for(i=0;i<natoms;i++)
226     {    
227         ri=atype->gb_radius[atoms->atom[i].type];
228         born->gb_radius_globalindex[i] = ri;
229         r3=ri*ri*ri;
230         born->vsolv_globalindex[i]=(4*M_PI/3)*r3;        
231     }
232
233     for(j=0;j<idef->il[F_GB12].nr;j+=3)
234     {
235         m=idef->il[F_GB12].iatoms[j];
236         ia=idef->il[F_GB12].iatoms[j+1];
237         ib=idef->il[F_GB12].iatoms[j+2];
238         
239         r=1.01*idef->iparams[m].gb.st;
240         
241         ri   = atype->gb_radius[atoms->atom[ia].type];
242         rj   = atype->gb_radius[atoms->atom[ib].type];
243         
244         ri2  = ri*ri;
245         ri3  = ri2*ri;
246         rj2  = rj*rj;
247         
248         ratio  = (rj2-ri2-r*r)/(2*ri*r);
249         h      = ri*(1+ratio);
250         term   = (M_PI/3.0)*h*h*(3.0*ri-h);
251
252         if(PARTDECOMP(cr))
253         {
254             vsol[ia]+=term;
255         }
256         else
257         {
258             born->vsolv_globalindex[ia] -= term;
259         }
260         
261         ratio  = (ri2-rj2-r*r)/(2*rj*r);
262         h      = rj*(1+ratio);
263         term   = (M_PI/3.0)*h*h*(3.0*rj-h);
264         
265         if(PARTDECOMP(cr))
266         {
267             vsol[ib]+=term;
268         }
269         else
270         {
271             born->vsolv_globalindex[ib] -= term;
272         }        
273     }
274     
275     if(PARTDECOMP(cr))
276     {
277         gmx_sum(natoms,vsol,cr);
278         
279         for(i=0;i<natoms;i++)
280         {
281             born->vsolv_globalindex[i]=born->vsolv_globalindex[i]-vsol[i];
282         }
283     }
284   
285     /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still 
286        method */
287     /* Self */
288     for(j=0;j<natoms;j++)
289     {
290         if(born->use_globalindex[j]==1)
291         {
292             born->gpol_globalindex[j]=-0.5*ONE_4PI_EPS0/
293                 (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1);
294         }
295     }
296     
297     /* 1-2 */
298     for(j=0;j<idef->il[F_GB12].nr;j+=3)
299     {
300         m=idef->il[F_GB12].iatoms[j];
301         ia=idef->il[F_GB12].iatoms[j+1];
302         ib=idef->il[F_GB12].iatoms[j+2];
303         
304         r=idef->iparams[m].gb.st;
305         
306         r4=r*r*r*r;
307
308         if(PARTDECOMP(cr))
309         {
310             gp[ia]+=STILL_P2*born->vsolv_globalindex[ib]/r4;
311             gp[ib]+=STILL_P2*born->vsolv_globalindex[ia]/r4;
312         }
313         else
314         {
315             born->gpol_globalindex[ia]=born->gpol_globalindex[ia]+
316                 STILL_P2*born->vsolv_globalindex[ib]/r4;
317             born->gpol_globalindex[ib]=born->gpol_globalindex[ib]+
318                 STILL_P2*born->vsolv_globalindex[ia]/r4;
319         }
320     }
321
322     /* 1-3 */
323     for(j=0;j<idef->il[F_GB13].nr;j+=3)
324     {
325         m=idef->il[F_GB13].iatoms[j];
326         ia=idef->il[F_GB13].iatoms[j+1];
327         ib=idef->il[F_GB13].iatoms[j+2];
328     
329         r=idef->iparams[m].gb.st;
330         r4=r*r*r*r;
331     
332         if(PARTDECOMP(cr))
333         {
334             gp[ia]+=STILL_P3*born->vsolv[ib]/r4;
335             gp[ib]+=STILL_P3*born->vsolv[ia]/r4;
336         }
337         else
338         {
339             born->gpol_globalindex[ia]=born->gpol_globalindex[ia]+
340                 STILL_P3*born->vsolv_globalindex[ib]/r4;
341             born->gpol_globalindex[ib]=born->gpol_globalindex[ib]+
342                 STILL_P3*born->vsolv_globalindex[ia]/r4;
343         }        
344     }
345     
346     if(PARTDECOMP(cr))
347     {
348         gmx_sum(natoms,gp,cr);
349         
350         for(i=0;i<natoms;i++)
351         {
352             born->gpol_globalindex[i]=born->gpol_globalindex[i]+gp[i];
353         }    
354     }
355     
356     sfree(vsol);
357     sfree(gp);
358         
359     return 0;
360 }
361
362 /* Initialize all GB datastructs and compute polarization energies */
363 int init_gb(gmx_genborn_t **p_born,
364             const t_commrec *cr, t_forcerec *fr, const t_inputrec *ir,
365             const gmx_mtop_t *mtop, real rgbradii, int gb_algorithm)
366 {
367     int i,j,m,ai,aj,jj,natoms,nalloc;
368     real rai,sk,p,doffset;
369     
370     t_atoms        atoms;
371     gmx_genborn_t  *born;
372     gmx_localtop_t *localtop;
373
374     natoms   = mtop->natoms;
375         
376     atoms    = gmx_mtop_global_atoms(mtop);
377     localtop = gmx_mtop_generate_local_top(mtop,ir);
378     
379     snew(born,1);
380     *p_born = born;
381
382     born->nr  = natoms;
383     
384     snew(born->drobc, natoms);
385     snew(born->bRad,  natoms);
386     
387     /* Allocate memory for the global data arrays */
388     snew(born->param_globalindex, natoms+3);
389     snew(born->gpol_globalindex,  natoms+3);
390     snew(born->vsolv_globalindex, natoms+3);
391     snew(born->gb_radius_globalindex, natoms+3);
392     snew(born->use_globalindex,    natoms+3);
393     
394     snew(fr->invsqrta, natoms);
395     snew(fr->dvda,     natoms);
396     
397     fr->dadx              = NULL;
398     fr->dadx_rawptr       = NULL;
399     fr->nalloc_dadx       = 0;
400     born->gpol_still_work = NULL;
401     born->gpol_hct_work   = NULL;
402     
403     /* snew(born->asurf,natoms); */
404     /* snew(born->dasurf,natoms); */
405
406     /* Initialize the gb neighbourlist */
407     init_gb_nblist(natoms,&(fr->gblist));
408     
409     /* Do the Vsites exclusions (if any) */
410     for(i=0;i<natoms;i++)
411     {
412         jj = atoms.atom[i].type;
413         if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0)
414         {
415             born->use_globalindex[i] = 1;
416         }
417         else
418         {
419             born->use_globalindex[i] = 0;
420         }
421                 
422         /* If we have a Vsite, put vs_globalindex[i]=0 */
423         if (C6 (fr->nbfp,fr->ntype,jj,jj) == 0 &&
424             C12(fr->nbfp,fr->ntype,jj,jj) == 0 &&
425             atoms.atom[i].q == 0)
426         {
427             born->use_globalindex[i]=0;
428         }
429     }
430     
431     /* Copy algorithm parameters from inputrecord to local structure */
432     born->obc_alpha  = ir->gb_obc_alpha;
433     born->obc_beta   = ir->gb_obc_beta;
434     born->obc_gamma  = ir->gb_obc_gamma;
435     born->gb_doffset = ir->gb_dielectric_offset;
436     born->gb_epsilon_solvent = ir->gb_epsilon_solvent;
437     born->epsilon_r = ir->epsilon_r;
438     
439     doffset = born->gb_doffset;
440   
441     /* Set the surface tension */
442     born->sa_surface_tension = ir->sa_surface_tension;
443    
444     /* If Still model, initialise the polarisation energies */
445     if(gb_algorithm==egbSTILL)    
446     {
447         init_gb_still(cr, fr,&(mtop->atomtypes), &(localtop->idef), &atoms, 
448                       born, natoms);    
449     }
450
451     
452     /* If HCT/OBC,  precalculate the sk*atype->S_hct factors */
453     else if(gb_algorithm==egbHCT || gb_algorithm==egbOBC)
454     {
455         
456         snew(born->gpol_hct_work, natoms+3);
457         
458         for(i=0;i<natoms;i++)
459         {    
460             if(born->use_globalindex[i]==1)
461             {
462                 rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset; 
463                 sk  = rai * mtop->atomtypes.S_hct[atoms.atom[i].type];
464                 born->param_globalindex[i] = sk;
465                 born->gb_radius_globalindex[i] = rai;
466             }
467             else
468             {
469                 born->param_globalindex[i] = 0;
470                 born->gb_radius_globalindex[i] = 0;
471             }
472         }
473     }
474         
475     /* Allocate memory for work arrays for temporary use */
476     snew(born->work,natoms+4);
477     snew(born->count,natoms);
478     snew(born->nblist_work,natoms);
479     
480     /* Domain decomposition specific stuff */
481     born->nalloc = 0;
482     
483     return 0;
484 }
485
486
487
488 static int
489 calc_gb_rad_still(t_commrec *cr, t_forcerec *fr,int natoms, gmx_localtop_t *top,
490                   const t_atomtypes *atype, rvec x[], t_nblist *nl, 
491                   gmx_genborn_t *born,t_mdatoms *md)
492 {    
493     int i,k,n,nj0,nj1,ai,aj,type;
494     int shift;
495     real shX,shY,shZ;
496     real gpi,dr,dr2,dr4,idr4,rvdw,ratio,ccf,theta,term,rai,raj;
497     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
498     real rinv,idr2,idr6,vaj,dccf,cosq,sinq,prod,gpi2;
499     real factor;
500     real vai, prod_ai, icf4,icf6;
501     
502     factor  = 0.5*ONE_4PI_EPS0;
503     n       = 0;
504     
505     for(i=0;i<born->nr;i++)
506     {
507         born->gpol_still_work[i]=0;
508     }
509      
510         for(i=0;i<nl->nri;i++ )
511     {
512         ai      = nl->iinr[i];
513         
514         nj0     = nl->jindex[i];            
515         nj1     = nl->jindex[i+1];
516     
517         /* Load shifts for this list */
518         shift   = nl->shift[i];
519         shX     = fr->shift_vec[shift][0];
520         shY     = fr->shift_vec[shift][1];
521         shZ     = fr->shift_vec[shift][2];
522         
523         gpi     = 0;
524         
525         rai     = top->atomtypes.gb_radius[md->typeA[ai]];
526         vai     = born->vsolv[ai];
527         prod_ai = STILL_P4*vai;
528         
529         /* Load atom i coordinates, add shift vectors */
530         ix1     = shX + x[ai][0];
531         iy1     = shY + x[ai][1];
532         iz1     = shZ + x[ai][2];
533                         
534         for(k=nj0;k<nj1;k++)
535         {
536             aj    = nl->jjnr[k];
537             jx1   = x[aj][0];
538             jy1   = x[aj][1];
539             jz1   = x[aj][2];
540             
541             dx11  = ix1-jx1;
542             dy11  = iy1-jy1;
543             dz11  = iz1-jz1;
544             
545             dr2   = dx11*dx11+dy11*dy11+dz11*dz11; 
546             rinv  = gmx_invsqrt(dr2);
547             idr2  = rinv*rinv;
548             idr4  = idr2*idr2;
549             idr6  = idr4*idr2;
550             
551             raj = top->atomtypes.gb_radius[md->typeA[aj]];
552             
553             rvdw  = rai + raj;
554             
555             ratio = dr2 / (rvdw * rvdw);
556             vaj   = born->vsolv[aj];
557             
558             if(ratio>STILL_P5INV) 
559             {
560                 ccf=1.0;
561                 dccf=0.0;
562             }
563             else
564             {
565                 theta = ratio*STILL_PIP5;
566                 cosq  = cos(theta);
567                 term  = 0.5*(1.0-cosq);
568                 ccf   = term*term;
569                 sinq  = 1.0 - cosq*cosq;
570                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
571             }
572             
573             prod          = STILL_P4*vaj;
574             icf4          = ccf*idr4;
575             icf6          = (4*ccf-dccf)*idr6;
576
577             born->gpol_still_work[aj] += prod_ai*icf4;
578             gpi             = gpi+prod*icf4;
579             
580             /* Save ai->aj and aj->ai chain rule terms */
581             fr->dadx[n++]   = prod*icf6;
582             fr->dadx[n++]   = prod_ai*icf6;
583         }
584         born->gpol_still_work[ai]+=gpi;
585     }
586
587     /* Parallel summations */
588     if(PARTDECOMP(cr))
589     {
590         gmx_sum(natoms, born->gpol_still_work, cr);
591     }
592     else if(DOMAINDECOMP(cr))
593     {
594         dd_atom_sum_real(cr->dd, born->gpol_still_work);
595     }
596         
597     /* Calculate the radii */
598         for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
599     {
600                 if(born->use[i] != 0)
601         {
602                 
603             gpi  = born->gpol[i]+born->gpol_still_work[i];
604             gpi2 = gpi * gpi;
605             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
606             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
607                 }
608     }
609
610     /* Extra communication required for DD */
611     if(DOMAINDECOMP(cr))
612     {
613         dd_atom_spread_real(cr->dd, born->bRad);
614         dd_atom_spread_real(cr->dd, fr->invsqrta);
615     }
616     
617     return 0;
618     
619 }
620     
621
622 static int 
623 calc_gb_rad_hct(t_commrec *cr,t_forcerec *fr,int natoms, gmx_localtop_t *top,
624                 const t_atomtypes *atype, rvec x[], t_nblist *nl, 
625                 gmx_genborn_t *born,t_mdatoms *md)
626 {
627     int i,k,n,ai,aj,nj0,nj1,at0,at1;
628     int shift;
629     real shX,shY,shZ;
630     real rai,raj,gpi,dr2,dr,sk,sk_ai,sk2,sk2_ai,lij,uij,diff2,tmp,sum_ai;
631     real rad,min_rad,rinv,rai_inv;
632     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
633     real lij2, uij2, lij3, uij3, t1,t2,t3;
634     real lij_inv,dlij,duij,sk2_rinv,prod,log_term;
635     real doffset,raj_inv,dadx_val;
636     real *gb_radius;
637     
638     doffset = born->gb_doffset;
639     gb_radius = born->gb_radius;
640
641     for(i=0;i<born->nr;i++)
642     {
643         born->gpol_hct_work[i] = 0;
644     }
645     
646     /* Keep the compiler happy */
647     n    = 0;
648     prod = 0;
649         
650     for(i=0;i<nl->nri;i++)
651     {
652         ai     = nl->iinr[i];
653             
654         nj0    = nl->jindex[i];            
655         nj1    = nl->jindex[i+1];
656         
657         /* Load shifts for this list */
658         shift   = nl->shift[i];
659         shX     = fr->shift_vec[shift][0];
660         shY     = fr->shift_vec[shift][1];
661         shZ     = fr->shift_vec[shift][2];
662         
663         rai     = gb_radius[ai];
664         rai_inv = 1.0/rai;
665         
666         sk_ai   = born->param[ai];
667         sk2_ai  = sk_ai*sk_ai;
668         
669         /* Load atom i coordinates, add shift vectors */
670         ix1     = shX + x[ai][0];
671         iy1     = shY + x[ai][1];
672         iz1     = shZ + x[ai][2];
673         
674         sum_ai  = 0;
675         
676         for(k=nj0;k<nj1;k++)
677         {
678             aj    = nl->jjnr[k];
679             
680             jx1   = x[aj][0];
681             jy1   = x[aj][1];
682             jz1   = x[aj][2];
683             
684             dx11  = ix1 - jx1;
685             dy11  = iy1 - jy1;
686             dz11  = iz1 - jz1;
687             
688             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
689             rinv  = gmx_invsqrt(dr2);
690             dr    = rinv*dr2;
691             
692             sk    = born->param[aj];
693             raj   = gb_radius[aj];
694             
695             /* aj -> ai interaction */
696             if(rai < dr+sk)
697             {
698                 lij     = 1.0/(dr-sk);
699                 dlij    = 1.0;
700                 
701                 if(rai>dr-sk) 
702                 {
703                     lij  = rai_inv;
704                     dlij = 0.0;
705                 }
706                             
707                 lij2     = lij*lij;
708                 lij3     = lij2*lij;
709                 
710                 uij      = 1.0/(dr+sk);
711                 uij2     = uij*uij;
712                 uij3     = uij2*uij;
713                 
714                 diff2    = uij2-lij2;
715                 
716                 lij_inv  = gmx_invsqrt(lij2);
717                 sk2      = sk*sk;
718                 sk2_rinv = sk2*rinv;
719                 prod     = 0.25*sk2_rinv;
720                 
721                 log_term = log(uij*lij_inv);
722                 
723                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
724                     prod*(-diff2);
725                                 
726                 if(rai<sk-dr)
727                 {
728                     tmp = tmp + 2.0 * (rai_inv-lij);
729                 }
730                     
731                 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
732                 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
733                 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
734                 
735                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
736                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ 
737                 /* rb2 is moved to chainrule    */
738
739                 sum_ai += 0.5*tmp;
740             }
741             else
742             {
743                 dadx_val = 0.0;
744             }
745             fr->dadx[n++] = dadx_val;
746
747             
748             /* ai -> aj interaction */
749             if(raj < dr + sk_ai)
750             {
751                 lij     = 1.0/(dr-sk_ai);
752                 dlij    = 1.0;
753                 raj_inv = 1.0/raj;
754                 
755                 if(raj>dr-sk_ai)
756                 {
757                     lij = raj_inv;
758                     dlij = 0.0;
759                 }
760                 
761                 lij2     = lij  * lij;
762                 lij3     = lij2 * lij;
763                 
764                 uij      = 1.0/(dr+sk_ai);
765                 uij2     = uij  * uij;
766                 uij3     = uij2 * uij;
767                 
768                 diff2    = uij2-lij2;
769                 
770                 lij_inv  = gmx_invsqrt(lij2);
771                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
772                 sk2_rinv = sk2*rinv;
773                 prod     = 0.25 * sk2_rinv;
774                 
775                 /* log_term = table_log(uij*lij_inv,born->log_table,
776                    LOG_TABLE_ACCURACY); */
777                 log_term = log(uij*lij_inv);
778                 
779                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
780                            prod*(-diff2);
781                 
782                 if(raj<sk_ai-dr)
783                 {
784                     tmp     = tmp + 2.0 * (raj_inv-lij);
785                 }
786                 
787                 /* duij = 1.0 */
788                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
789                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
790                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
791                 
792                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
793                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule    */
794                 
795                 born->gpol_hct_work[aj] += 0.5*tmp;
796             }
797             else
798             {
799                 dadx_val = 0.0;
800             }
801             fr->dadx[n++] = dadx_val;
802         }
803         
804         born->gpol_hct_work[ai] += sum_ai;
805     }
806     
807     /* Parallel summations */
808     if(PARTDECOMP(cr))
809     {
810         gmx_sum(natoms, born->gpol_hct_work, cr);
811     }
812     else if(DOMAINDECOMP(cr))
813     {
814         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
815     }
816     
817     for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
818     {
819                 if(born->use[i] != 0)
820         {
821             rai     = top->atomtypes.gb_radius[md->typeA[i]]-doffset; 
822             sum_ai  = 1.0/rai - born->gpol_hct_work[i];
823             min_rad = rai + doffset;
824             rad     = 1.0/sum_ai; 
825             
826             born->bRad[i]   = rad > min_rad ? rad : min_rad;
827             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
828         }
829     }
830     
831     /* Extra communication required for DD */
832     if(DOMAINDECOMP(cr))
833     {
834         dd_atom_spread_real(cr->dd, born->bRad);
835         dd_atom_spread_real(cr->dd, fr->invsqrta);
836     }
837     
838     
839     return 0;
840 }
841
842 static int 
843 calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
844                     const t_atomtypes *atype, rvec x[], t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md)
845 {
846     int i,k,ai,aj,nj0,nj1,n,at0,at1;
847     int shift;
848     real shX,shY,shZ;
849     real rai,raj,gpi,dr2,dr,sk,sk2,lij,uij,diff2,tmp,sum_ai;
850     real rad, min_rad,sum_ai2,sum_ai3,tsum,tchain,rinv,rai_inv,lij_inv,rai_inv2;
851     real log_term,prod,sk2_rinv,sk_ai,sk2_ai;
852     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
853     real lij2,uij2,lij3,uij3,dlij,duij,t1,t2,t3;
854     real doffset,raj_inv,dadx_val;
855     real *gb_radius;
856     
857     /* Keep the compiler happy */
858     n    = 0;
859     prod = 0;
860     raj  = 0;
861     
862     doffset = born->gb_doffset;
863     gb_radius = born->gb_radius;
864     
865     for(i=0;i<born->nr;i++)
866     {
867         born->gpol_hct_work[i] = 0;
868     }
869     
870     for(i=0;i<nl->nri;i++)
871     {
872         ai      = nl->iinr[i];
873     
874         nj0     = nl->jindex[i];
875         nj1     = nl->jindex[i+1];
876         
877         /* Load shifts for this list */
878         shift   = nl->shift[i];
879         shX     = fr->shift_vec[shift][0];
880         shY     = fr->shift_vec[shift][1];
881         shZ     = fr->shift_vec[shift][2];
882         
883         rai      = gb_radius[ai];
884         rai_inv  = 1.0/rai;
885         
886         sk_ai    = born->param[ai];
887         sk2_ai   = sk_ai*sk_ai;
888         
889         /* Load atom i coordinates, add shift vectors */
890         ix1      = shX + x[ai][0];
891         iy1      = shY + x[ai][1];
892         iz1      = shZ + x[ai][2];
893         
894         sum_ai   = 0;
895         
896         for(k=nj0;k<nj1;k++)
897         {
898             aj    = nl->jjnr[k];
899             
900             jx1   = x[aj][0];
901             jy1   = x[aj][1];
902             jz1   = x[aj][2];
903             
904             dx11  = ix1 - jx1;
905             dy11  = iy1 - jy1;
906             dz11  = iz1 - jz1;
907             
908             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
909             rinv  = gmx_invsqrt(dr2);
910             dr    = dr2*rinv;
911         
912             /* sk is precalculated in init_gb() */
913             sk    = born->param[aj];
914             raj   = gb_radius[aj];
915             
916             /* aj -> ai interaction */
917             if(rai < dr+sk)
918             {
919                 lij       = 1.0/(dr-sk);
920                 dlij      = 1.0; 
921                                 
922                 if(rai>dr-sk)
923                 {
924                     lij  = rai_inv;
925                     dlij = 0.0;
926                 }
927                 
928                 uij      = 1.0/(dr+sk);
929                 lij2     = lij  * lij;
930                 lij3     = lij2 * lij;
931                 uij2     = uij  * uij;
932                 uij3     = uij2 * uij;
933                 
934                 diff2    = uij2-lij2;
935                 
936                 lij_inv  = gmx_invsqrt(lij2);
937                 sk2      = sk*sk;
938                 sk2_rinv = sk2*rinv;    
939                 prod     = 0.25*sk2_rinv;
940                 
941                 log_term = log(uij*lij_inv);
942                 
943                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
944                 
945                 if(rai < sk-dr)
946                 {
947                     tmp = tmp + 2.0 * (rai_inv-lij);
948                 }
949                 
950                 /* duij    = 1.0; */
951                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr); 
952                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr); 
953                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv; 
954                     
955                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
956                 
957                 sum_ai += 0.5*tmp;
958             }
959             else
960             {
961                 dadx_val = 0.0;
962             }
963             fr->dadx[n++] = dadx_val;
964           
965             /* ai -> aj interaction */
966             if(raj < dr + sk_ai)
967             {
968                 lij     = 1.0/(dr-sk_ai);
969                 dlij    = 1.0;
970                 raj_inv = 1.0/raj;
971                 
972                 if(raj>dr-sk_ai)
973                 {
974                     lij = raj_inv;
975                     dlij = 0.0;
976                 }
977                 
978                 lij2     = lij  * lij;
979                 lij3     = lij2 * lij;
980                 
981                 uij      = 1.0/(dr+sk_ai);
982                 uij2     = uij  * uij;
983                 uij3     = uij2 * uij;
984                 
985                 diff2    = uij2-lij2;
986                 
987                 lij_inv  = gmx_invsqrt(lij2);
988                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
989                 sk2_rinv = sk2*rinv;
990                 prod     = 0.25 * sk2_rinv;
991                 
992                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
993                 log_term = log(uij*lij_inv);
994                 
995                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
996                 
997                 if(raj<sk_ai-dr)
998                 {
999                     tmp     = tmp + 2.0 * (raj_inv-lij);
1000                 }
1001                 
1002                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
1003                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
1004                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
1005                 
1006                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
1007                 
1008                 born->gpol_hct_work[aj] += 0.5*tmp;
1009                 
1010             }
1011             else
1012             {
1013                 dadx_val = 0.0;
1014             }
1015             fr->dadx[n++] = dadx_val;
1016
1017         }        
1018         born->gpol_hct_work[ai] += sum_ai;
1019       
1020     }
1021     
1022     /* Parallel summations */
1023     if(PARTDECOMP(cr))
1024     {
1025         gmx_sum(natoms, born->gpol_hct_work, cr);
1026     }
1027     else if(DOMAINDECOMP(cr))
1028     {
1029         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
1030     }
1031     
1032     for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
1033     {
1034                 if(born->use[i] != 0)
1035         {
1036             rai        = top->atomtypes.gb_radius[md->typeA[i]];
1037             rai_inv2   = 1.0/rai;
1038             rai        = rai-doffset; 
1039             rai_inv    = 1.0/rai;
1040             sum_ai     = rai * born->gpol_hct_work[i];
1041             sum_ai2    = sum_ai  * sum_ai;
1042             sum_ai3    = sum_ai2 * sum_ai;
1043             
1044             tsum    = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
1045             born->bRad[i] = rai_inv - tsum*rai_inv2;
1046             born->bRad[i] = 1.0 / born->bRad[i];
1047             
1048             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
1049             
1050             tchain  = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
1051             born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
1052         }
1053     }
1054     
1055     /* Extra (local) communication required for DD */
1056     if(DOMAINDECOMP(cr))
1057     {
1058         dd_atom_spread_real(cr->dd, born->bRad);
1059         dd_atom_spread_real(cr->dd, fr->invsqrta);
1060         dd_atom_spread_real(cr->dd, born->drobc);
1061     }
1062     
1063     return 0;
1064     
1065 }
1066
1067
1068
1069 int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *top,
1070                 const t_atomtypes *atype, rvec x[], t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,t_nrnb     *nrnb)
1071 {    
1072     real *p;
1073     int   cnt;
1074     int ndadx;
1075
1076     if(fr->bAllvsAll && fr->dadx==NULL)
1077     {
1078         /* We might need up to 8 atoms of padding before and after, 
1079          * and another 4 units to guarantee SSE alignment.
1080          */
1081         fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12);
1082         snew(fr->dadx_rawptr,fr->nalloc_dadx);
1083         fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1084     }
1085     else
1086     {
1087         /* In the SSE-enabled gb-loops, when writing to dadx, we
1088          * always write 2*4 elements at a time, even in the case with only
1089          * 1-3 j particles, where we only really need to write 2*(1-3)
1090          * elements. This is because we want dadx to be aligned to a 16-
1091          * byte boundary, and being able to use _mm_store/load_ps
1092          */
1093         ndadx = 2 * (nl->nrj + 3*nl->nri);
1094
1095         /* First, reallocate the dadx array, we need 3 extra for SSE */
1096         if (ndadx + 3 > fr->nalloc_dadx)
1097         {
1098             fr->nalloc_dadx = over_alloc_large(ndadx) + 3;
1099             srenew(fr->dadx_rawptr,fr->nalloc_dadx);
1100             fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));            
1101         }
1102     }
1103
1104     if(fr->bAllvsAll)
1105     {
1106         cnt = md->homenr*(md->nr/2+1);
1107         
1108         if(ir->gb_algorithm==egbSTILL)
1109         {
1110 #if 0 && defined (GMX_X86_SSE2)
1111             if(fr->use_acceleration)
1112             {
1113 #  ifdef GMX_DOUBLE
1114                 genborn_allvsall_calc_still_radii_sse2_double(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1115 #  else
1116                 genborn_allvsall_calc_still_radii_sse2_single(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1117 #  endif
1118             }
1119             else
1120             {
1121                 genborn_allvsall_calc_still_radii(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1122             }
1123 #else
1124             genborn_allvsall_calc_still_radii(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1125 #endif
1126             /* 13 flops in outer loop, 47 flops in inner loop */
1127             inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,md->homenr*13+cnt*47);
1128         }
1129         else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
1130         {
1131 #if 0 && defined (GMX_X86_SSE2)
1132             if(fr->use_acceleration)
1133             {
1134 #  ifdef GMX_DOUBLE
1135                 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1136 #  else
1137                 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1138 #  endif
1139             }
1140             else
1141             {
1142                 genborn_allvsall_calc_hct_obc_radii(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1143             }
1144 #else
1145             genborn_allvsall_calc_hct_obc_radii(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1146 #endif
1147             /* 24 flops in outer loop, 183 in inner */
1148             inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,md->homenr*24+cnt*183);
1149         }
1150         else
1151         {
1152             gmx_fatal(FARGS,"Bad gb algorithm for all-vs-all interactions");
1153         }
1154         return 0;
1155     }
1156     
1157     /* Switch for determining which algorithm to use for Born radii calculation */
1158 #ifdef GMX_DOUBLE
1159     
1160 #if 0 && defined (GMX_X86_SSE2)
1161     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1162     switch(ir->gb_algorithm)
1163     {
1164         case egbSTILL:
1165             if(fr->use_acceleration)
1166             {            
1167                 calc_gb_rad_still_sse2_double(cr,fr,born->nr,top, atype, x[0], nl, born);
1168             }
1169             else
1170             {
1171                 calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md); 
1172             }   
1173             break;
1174         case egbHCT:
1175             if(fr->use_acceleration)
1176             {
1177                 calc_gb_rad_hct_obc_sse2_double(cr,fr,born->nr,top, atype, x[0], nl, born, md, ir->gb_algorithm);
1178             }
1179             else
1180             {
1181                 calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md); 
1182             }
1183             break;
1184         case egbOBC:
1185             if(fr->use_acceleration)
1186             {
1187                 calc_gb_rad_hct_obc_sse2_double(cr,fr,born->nr,top, atype, x[0], nl, born, md, ir->gb_algorithm);
1188             }
1189             else
1190             {
1191                 calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md); 
1192             }
1193             break;
1194             
1195         default:
1196             gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir->gb_algorithm);
1197     }
1198 #else
1199     switch(ir->gb_algorithm)
1200     {
1201         case egbSTILL:
1202             calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md); 
1203             break;
1204         case egbHCT:
1205             calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md); 
1206             break;
1207         case egbOBC:
1208             calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md); 
1209             break;
1210             
1211         default:
1212             gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d",ir->gb_algorithm);
1213     }
1214             
1215 #endif
1216                         
1217 #else                
1218             
1219 #if 0 && defined (GMX_X86_SSE2)
1220     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1221     switch(ir->gb_algorithm)
1222     {
1223         case egbSTILL:
1224             if(fr->use_acceleration)
1225             {
1226             calc_gb_rad_still_sse2_single(cr,fr,born->nr,top, atype, x[0], nl, born);
1227             }
1228             else
1229             {
1230                 calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md); 
1231             }
1232             break;
1233         case egbHCT:
1234                 if(fr->use_acceleration)
1235                 {
1236                     calc_gb_rad_hct_obc_sse2_single(cr,fr,born->nr,top, atype, x[0], nl, born, md, ir->gb_algorithm);
1237                 }
1238                 else
1239                 {
1240                     calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md); 
1241                 }
1242             break;
1243             
1244         case egbOBC:
1245             if(fr->use_acceleration)
1246             {
1247                 calc_gb_rad_hct_obc_sse2_single(cr,fr,born->nr,top, atype, x[0], nl, born, md, ir->gb_algorithm);
1248             }
1249             else
1250             {
1251                 calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md); 
1252             }
1253             break;
1254             
1255         default:
1256             gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir->gb_algorithm);
1257     }
1258     
1259 #else
1260     switch(ir->gb_algorithm)
1261     {
1262         case egbSTILL:
1263             calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md); 
1264             break;
1265         case egbHCT:
1266             calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md); 
1267             break;
1268         case egbOBC:
1269             calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md); 
1270             break;
1271             
1272         default:
1273             gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d",ir->gb_algorithm);
1274     }
1275     
1276 #endif /* Single precision sse */
1277             
1278 #endif /* Double or single precision */
1279     
1280     if(fr->bAllvsAll==FALSE)
1281     {
1282         switch(ir->gb_algorithm)
1283         {
1284             case egbSTILL:
1285                 /* 17 flops per outer loop iteration, 47 flops per inner loop */
1286                 inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nri*17+nl->nrj*47);
1287                 break;
1288             case egbHCT:
1289             case egbOBC:
1290                 /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
1291                 inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nri*61+nl->nrj*183);
1292                 break;
1293                 
1294             default:
1295                 break;
1296         }
1297     }
1298     
1299     return 0;        
1300 }
1301
1302
1303
1304 real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
1305                   real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
1306                   real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
1307 {
1308     int i,j,n0,m,nnn,type,ai,aj;
1309         int ki;
1310  
1311         real isai,isaj;
1312     real r,rsq11;
1313     real rinv11,iq;
1314     real isaprod,qq,gbscale,gbtabscale,Y,F,Geps,Heps2,Fp,VV,FF,rt,eps,eps2;
1315     real vgb,fgb,vcoul,fijC,dvdatmp,fscal,dvdaj;
1316     real vctot;
1317     
1318         rvec dx;
1319         ivec dt;
1320         
1321     t_iatom *forceatoms;
1322
1323     /* Scale the electrostatics by gb_epsilon_solvent */
1324     facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent);
1325     
1326     gbtabscale=*p_gbtabscale;
1327     vctot = 0.0;
1328     
1329     for(j=F_GB12;j<=F_GB14;j++)
1330     {
1331         forceatoms = idef->il[j].iatoms;
1332         
1333         for(i=0;i<idef->il[j].nr; )
1334         {
1335             /* To avoid reading in the interaction type, we just increment i to pass over
1336              * the types in the forceatoms array, this saves some memory accesses
1337              */
1338             i++;
1339             ai            = forceatoms[i++];
1340             aj            = forceatoms[i++];
1341                         
1342                         ki            = pbc_rvec_sub(pbc,x[ai],x[aj],dx);
1343                         rsq11         = iprod(dx,dx);
1344                         
1345                         isai          = invsqrta[ai];
1346                         iq            = (-1)*facel*charge[ai];
1347                         
1348             rinv11        = gmx_invsqrt(rsq11);
1349             isaj          = invsqrta[aj];
1350             isaprod       = isai*isaj;
1351             qq            = isaprod*iq*charge[aj];
1352             gbscale       = isaprod*gbtabscale;
1353             r             = rsq11*rinv11;
1354             rt            = r*gbscale;
1355             n0            = rt;
1356             eps           = rt-n0;
1357             eps2          = eps*eps;
1358             nnn           = 4*n0;
1359             Y             = GBtab[nnn];
1360             F             = GBtab[nnn+1];
1361             Geps          = eps*GBtab[nnn+2];
1362             Heps2         = eps2*GBtab[nnn+3];
1363             Fp            = F+Geps+Heps2;
1364             VV            = Y+eps*Fp;
1365             FF            = Fp+Geps+2.0*Heps2;
1366             vgb           = qq*VV;
1367             fijC          = qq*FF*gbscale;
1368             dvdatmp       = -(vgb+fijC*r)*0.5;
1369             dvda[aj]      = dvda[aj] + dvdatmp*isaj*isaj;
1370             dvda[ai]      = dvda[ai] + dvdatmp*isai*isai;
1371             vctot         = vctot + vgb;
1372             fgb           = -(fijC)*rinv11;
1373                         
1374                         if (graph) {
1375                                 ivec_sub(SHIFT_IVEC(graph,ai),SHIFT_IVEC(graph,aj),dt);
1376                                 ki=IVEC2IS(dt);
1377                         }
1378                         
1379                         for (m=0; (m<DIM); m++) {                       /*  15          */
1380                                 fscal=fgb*dx[m];
1381                                 f[ai][m]+=fscal;
1382                                 f[aj][m]-=fscal;
1383                                 fshift[ki][m]+=fscal;
1384                                 fshift[CENTRAL][m]-=fscal;
1385                         }
1386         }
1387     }
1388     
1389     return vctot;
1390 }
1391
1392 real calc_gb_selfcorrections(t_commrec *cr, int natoms, 
1393                  real *charge, gmx_genborn_t *born, real *dvda, t_mdatoms *md, double facel)
1394 {    
1395     int i,ai,at0,at1;
1396     real rai,e,derb,q,q2,fi,rai_inv,vtot;
1397
1398     if(PARTDECOMP(cr))
1399     {
1400         pd_at_range(cr,&at0,&at1);
1401     }
1402     else if(DOMAINDECOMP(cr))
1403     {
1404         at0=0;
1405         at1=cr->dd->nat_home;
1406     }
1407     else
1408     {
1409         at0=0;
1410         at1=natoms;
1411         
1412     }
1413         
1414     /* Scale the electrostatics by gb_epsilon_solvent */
1415     facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent);
1416     
1417     vtot=0.0;
1418     
1419     /* Apply self corrections */
1420     for(i=at0;i<at1;i++)
1421     {
1422         ai       = i;
1423         
1424         if(born->use[ai]==1)
1425         {
1426             rai      = born->bRad[ai];
1427             rai_inv  = 1.0/rai;
1428             q        = charge[ai];
1429             q2       = q*q;
1430             fi       = facel*q2;
1431             e        = fi*rai_inv;
1432             derb     = 0.5*e*rai_inv*rai_inv;
1433             dvda[ai] += derb*rai;
1434             vtot     -= 0.5*e;
1435         }
1436     }
1437     
1438    return vtot;    
1439     
1440 }
1441
1442 real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr,int natoms,gmx_genborn_t *born, gmx_localtop_t *top, 
1443                       const t_atomtypes *atype, real *dvda,int gb_algorithm, t_mdatoms *md)
1444 {
1445     int ai,i,at0,at1;
1446     real e,es,rai,rbi,term,probe,tmp,factor;
1447     real rbi_inv,rbi_inv2;
1448     
1449     /* To keep the compiler happy */
1450     factor=0;
1451     
1452     if(PARTDECOMP(cr))
1453     {
1454         pd_at_range(cr,&at0,&at1);
1455     }
1456     else if(DOMAINDECOMP(cr))
1457     {
1458         at0 = 0;
1459         at1 = cr->dd->nat_home;
1460     }
1461     else
1462     {
1463         at0=0;
1464         at1=natoms;
1465     }
1466     
1467   /* factor is the surface tension */
1468   factor = born->sa_surface_tension;
1469   /*
1470   
1471     // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1472     if(gb_algorithm==egbSTILL)
1473     {
1474         factor=0.0049*100*CAL2JOULE;
1475     }
1476     else    
1477     {
1478         factor=0.0054*100*CAL2JOULE;    
1479     }
1480     */
1481     /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1482     
1483     es    = 0;
1484     probe = 0.14;
1485     term  = M_PI*4;
1486     
1487     for(i=at0;i<at1;i++)
1488     {
1489         ai        = i;
1490         
1491         if(born->use[ai]==1)
1492         {
1493             rai       = top->atomtypes.gb_radius[md->typeA[ai]];
1494             rbi_inv   = fr->invsqrta[ai];
1495             rbi_inv2  = rbi_inv * rbi_inv;
1496             tmp       = (rai*rbi_inv2)*(rai*rbi_inv2);
1497             tmp       = tmp*tmp*tmp;
1498             e         = factor*term*(rai+probe)*(rai+probe)*tmp;
1499             dvda[ai]  = dvda[ai] - 6*e*rbi_inv2;    
1500             es        = es + e;
1501         }
1502     }    
1503
1504     return es;
1505 }
1506
1507
1508
1509 real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[], 
1510                        rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
1511 {    
1512     int i,k,n,ai,aj,nj0,nj1,n0,n1;
1513     int shift;
1514     real shX,shY,shZ;
1515     real fgb,fij,rb2,rbi,fix1,fiy1,fiz1;
1516     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11,rsq11;
1517     real rinv11,tx,ty,tz,rbai,rbaj,fgb_ai;
1518     real *rb;
1519     volatile int idx;
1520         
1521     n  = 0;    
1522     rb = born->work;
1523         
1524   n0 = 0;
1525   n1 = natoms;
1526   
1527     if(gb_algorithm==egbSTILL) 
1528     {
1529         for(i=n0;i<n1;i++)
1530         {
1531           rbi   = born->bRad[i];
1532           rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
1533         }
1534     }
1535     else if(gb_algorithm==egbHCT) 
1536     {
1537         for(i=n0;i<n1;i++)
1538         {
1539           rbi   = born->bRad[i];
1540           rb[i] = rbi * rbi * dvda[i];
1541         }
1542     }
1543     else if(gb_algorithm==egbOBC) 
1544     {
1545         for(i=n0;i<n1;i++)
1546         {
1547           rbi   = born->bRad[i];
1548           rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
1549         }
1550     }
1551     
1552     for(i=0;i<nl->nri;i++)
1553     {
1554         ai   = nl->iinr[i];
1555         
1556         nj0  = nl->jindex[i];
1557         nj1  = nl->jindex[i+1];
1558         
1559         /* Load shifts for this list */
1560         shift   = nl->shift[i];
1561         shX     = shift_vec[shift][0];
1562         shY     = shift_vec[shift][1];
1563         shZ     = shift_vec[shift][2];
1564         
1565         /* Load atom i coordinates, add shift vectors */
1566         ix1  = shX + x[ai][0];
1567         iy1  = shY + x[ai][1];
1568         iz1  = shZ + x[ai][2];
1569         
1570         fix1 = 0;
1571         fiy1 = 0;
1572         fiz1 = 0;
1573         
1574         rbai = rb[ai];
1575         
1576         for(k=nj0;k<nj1;k++)
1577         {
1578             aj = nl->jjnr[k];
1579             
1580             jx1     = x[aj][0];
1581             jy1     = x[aj][1];
1582             jz1     = x[aj][2];
1583             
1584             dx11    = ix1 - jx1;
1585             dy11    = iy1 - jy1;
1586             dz11    = iz1 - jz1;
1587             
1588             rbaj    = rb[aj];
1589             
1590             fgb     = rbai*dadx[n++]; 
1591             fgb_ai  = rbaj*dadx[n++];
1592             
1593             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1594             fgb     = fgb + fgb_ai;
1595             
1596             tx      = fgb * dx11;
1597             ty      = fgb * dy11;
1598             tz      = fgb * dz11;
1599                         
1600             fix1    = fix1 + tx;
1601             fiy1    = fiy1 + ty;
1602             fiz1    = fiz1 + tz;
1603             
1604             /* Update force on atom aj */
1605             t[aj][0] = t[aj][0] - tx;
1606             t[aj][1] = t[aj][1] - ty;
1607             t[aj][2] = t[aj][2] - tz;
1608         }
1609                 
1610         /* Update force and shift forces on atom ai */
1611         t[ai][0] = t[ai][0] + fix1;
1612         t[ai][1] = t[ai][1] + fiy1;
1613         t[ai][2] = t[ai][2] + fiz1;
1614         
1615         fshift[shift][0] = fshift[shift][0] + fix1;
1616         fshift[shift][1] = fshift[shift][1] + fiy1;
1617         fshift[shift][2] = fshift[shift][2] + fiz1;
1618         
1619     }
1620
1621     return 0;    
1622 }
1623
1624
1625 void
1626 calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top, const t_atomtypes *atype, 
1627                rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb, gmx_bool bRad,
1628                const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
1629 {
1630     real v=0;
1631     int  cnt;
1632     int i;
1633     
1634         /* PBC or not? */
1635         const t_pbc *pbc_null;
1636         
1637         if (fr->bMolPBC)
1638                 pbc_null = pbc;
1639         else
1640                 pbc_null = NULL;
1641   
1642   if(sa_algorithm == esaAPPROX)
1643   {
1644     /* Do a simple ACE type approximation for the non-polar solvation */
1645     enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr,born->nr, born, top, atype, fr->dvda, gb_algorithm,md);
1646   }
1647   
1648   /* Calculate the bonded GB-interactions using either table or analytical formula */
1649     enerd->term[F_GBPOL]       += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
1650                                      fr->invsqrta,fr->dvda,fr->gbtab.data,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1651     
1652     /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1653     enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr,born->nr,md->chargeA, born, fr->dvda, md, fr->epsfac);         
1654
1655     /* If parallel, sum the derivative of the potential w.r.t the born radii */
1656     if(PARTDECOMP(cr))
1657     {
1658         gmx_sum(md->nr,fr->dvda, cr);
1659     }
1660     else if(DOMAINDECOMP(cr))
1661     {
1662         dd_atom_sum_real(cr->dd,fr->dvda);
1663         dd_atom_spread_real(cr->dd,fr->dvda);
1664     }
1665
1666     if(fr->bAllvsAll)
1667     {
1668 #if 0 && defined (GMX_X86_SSE2)
1669         if(fr->use_acceleration)
1670         {
1671 #  ifdef GMX_DOUBLE
1672             genborn_allvsall_calc_chainrule_sse2_double(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1673 #  else
1674             genborn_allvsall_calc_chainrule_sse2_single(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1675 #  endif
1676         }
1677         else
1678         {
1679             genborn_allvsall_calc_chainrule(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1680         }
1681 #else
1682         genborn_allvsall_calc_chainrule(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1683 #endif
1684         cnt = md->homenr*(md->nr/2+1);
1685         /* 9 flops for outer loop, 15 for inner */
1686         inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,md->homenr*9+cnt*15);
1687         return;
1688     }
1689     
1690 #if 0 && defined (GMX_X86_SSE2)
1691     if(fr->use_acceleration)
1692     {
1693 #  ifdef GMX_DOUBLE
1694         calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist),fr->dadx,fr->dvda,x[0], 
1695                                       f[0],fr->fshift[0],fr->shift_vec[0],gb_algorithm,born,md);
1696 #  else
1697         calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist),fr->dadx,fr->dvda,x[0], 
1698                                       f[0],fr->fshift[0],fr->shift_vec[0],gb_algorithm,born,md);
1699 #  endif
1700     }
1701     else
1702     {
1703         calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
1704                           x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
1705     }
1706 #else
1707     calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, 
1708                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
1709 #endif
1710
1711     if(!fr->bAllvsAll)
1712     {
1713         /* 9 flops for outer loop, 15 for inner */
1714         inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nri*9+fr->gblist.nrj*15);
1715     }
1716 }
1717
1718 static void add_j_to_gblist(gbtmpnbl_t *list,int aj)
1719 {
1720     if (list->naj >= list->aj_nalloc)
1721     {
1722         list->aj_nalloc = over_alloc_large(list->naj+1);
1723         srenew(list->aj,list->aj_nalloc);
1724     }
1725
1726     list->aj[list->naj++] = aj;
1727 }
1728
1729 static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists,int shift)
1730 {
1731     int ind,i;
1732
1733     /* Search the list with the same shift, if there is one */
1734     ind = 0;
1735     while (ind < lists->nlist && shift != lists->list[ind].shift)
1736     {
1737         ind++;
1738     }
1739     if (ind == lists->nlist)
1740     {
1741         if (lists->nlist == lists->list_nalloc)
1742         {
1743             lists->list_nalloc++;
1744             srenew(lists->list,lists->list_nalloc);
1745             for(i=lists->nlist; i<lists->list_nalloc; i++)
1746             {
1747                 lists->list[i].aj        = NULL;
1748                 lists->list[i].aj_nalloc = 0;
1749             }
1750
1751         }
1752         
1753         lists->list[lists->nlist].shift = shift;
1754         lists->list[lists->nlist].naj   = 0;
1755         lists->nlist++;
1756     }
1757
1758     return &lists->list[ind];
1759 }
1760
1761 static void add_bondeds_to_gblist(t_ilist *il,
1762                                   gmx_bool bMolPBC,t_pbc *pbc,t_graph *g,rvec *x,
1763                                   struct gbtmpnbls *nls)
1764 {
1765     int  ind,j,ai,aj,shift,found;
1766     rvec dx;
1767     ivec dt;
1768     gbtmpnbl_t *list;
1769
1770     shift = CENTRAL;
1771     for(ind=0; ind<il->nr; ind+=3)
1772     {
1773         ai = il->iatoms[ind+1];
1774         aj = il->iatoms[ind+2];
1775                 
1776         shift = CENTRAL;
1777         if (g != NULL)
1778         {
1779           rvec_sub(x[ai],x[aj],dx);
1780           ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
1781           shift = IVEC2IS(dt);
1782         }
1783         else if (bMolPBC)
1784         {
1785           shift = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
1786         }
1787
1788         /* Find the list for this shift or create one */
1789         list = find_gbtmplist(&nls[ai],shift);
1790         
1791         found=0;
1792         
1793         /* So that we do not add the same bond twice.
1794          * This happens with some constraints between 1-3 atoms
1795          * that are in the bond-list but should not be in the GB nb-list */
1796         for(j=0;j<list->naj;j++)
1797         {
1798             if (list->aj[j] == aj)
1799             {
1800                 found = 1;
1801             }
1802         }    
1803         
1804         if (found == 0)
1805         {
1806                         if(ai == aj)
1807                         {
1808                                 gmx_incons("ai == aj");
1809                         }
1810                         
1811             add_j_to_gblist(list,aj);
1812         }
1813     }
1814 }
1815
1816 static int
1817 compare_int (const void * a, const void * b)
1818 {
1819     return ( *(int*)a - *(int*)b );
1820 }
1821
1822
1823
1824 int make_gb_nblist(t_commrec *cr, int gb_algorithm, real gbcut,
1825                    rvec x[], matrix box,
1826                    t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
1827 {
1828     int i,l,ii,j,k,n,nj0,nj1,ai,aj,at0,at1,found,shift,s;
1829     int apa;
1830     t_nblist *nblist;
1831     t_pbc pbc;
1832     
1833     struct gbtmpnbls *nls;
1834     gbtmpnbl_t *list =NULL;
1835     
1836     set_pbc(&pbc,fr->ePBC,box);
1837     nls   = born->nblist_work;
1838     
1839     for(i=0;i<born->nr;i++)
1840     {
1841         nls[i].nlist = 0;
1842     }
1843
1844     if (fr->bMolPBC)
1845     {
1846         set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
1847     }
1848
1849     switch (gb_algorithm)
1850     {
1851     case egbHCT:
1852     case egbOBC:
1853         /* Loop over 1-2, 1-3 and 1-4 interactions */
1854         for(j=F_GB12;j<=F_GB14;j++)
1855         {
1856             add_bondeds_to_gblist(&idef->il[j],fr->bMolPBC,&pbc,graph,x,nls);
1857         }
1858         break;
1859     case egbSTILL:
1860         /* Loop over 1-4 interactions */
1861         add_bondeds_to_gblist(&idef->il[F_GB14],fr->bMolPBC,&pbc,graph,x,nls);
1862         break;
1863     default:
1864         gmx_incons("Unknown GB algorithm");
1865     }
1866     
1867     /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1868     for(n=0; (n<fr->nnblists); n++)
1869     {
1870         for(i=0; (i<eNL_NR); i++)
1871         {
1872             nblist=&(fr->nblists[n].nlist_sr[i]);
1873             
1874             if (nblist->nri > 0 && (i==eNL_VDWQQ || i==eNL_QQ))
1875             {
1876                 for(j=0;j<nblist->nri;j++)
1877                 {
1878                     ai    = nblist->iinr[j];
1879                     shift = nblist->shift[j];
1880
1881                     /* Find the list for this shift or create one */
1882                     list = find_gbtmplist(&nls[ai],shift);
1883
1884                     nj0 = nblist->jindex[j];
1885                     nj1 = nblist->jindex[j+1];
1886                     
1887                     /* Add all the j-atoms in the non-bonded list to the GB list */
1888                     for(k=nj0;k<nj1;k++)
1889                     {
1890                         add_j_to_gblist(list,nblist->jjnr[k]);
1891                     }
1892                 }
1893             }
1894         }
1895     }
1896         
1897     /* Zero out some counters */
1898         fr->gblist.nri=0;
1899     fr->gblist.nrj=0;
1900     
1901         fr->gblist.jindex[0] = fr->gblist.nri;
1902         
1903         for(i=0;i<fr->natoms_force;i++)
1904     {
1905         for(s=0; s<nls[i].nlist; s++)
1906         {
1907             list = &nls[i].list[s];
1908
1909             /* Only add those atoms that actually have neighbours */
1910             if (born->use[i] != 0)
1911             {
1912                 fr->gblist.iinr[fr->gblist.nri]  = i;
1913                 fr->gblist.shift[fr->gblist.nri] = list->shift;
1914                 fr->gblist.nri++;
1915             
1916                 for(k=0; k<list->naj; k++)
1917                 {
1918                     /* Memory allocation for jjnr */
1919                     if(fr->gblist.nrj >= fr->gblist.maxnrj)
1920                     {
1921                         fr->gblist.maxnrj += over_alloc_large(fr->gblist.maxnrj);
1922                         
1923                         if (debug)
1924                         {
1925                             fprintf(debug,"Increasing GB neighbourlist j size to %d\n",fr->gblist.maxnrj);
1926                         }
1927                         
1928                         srenew(fr->gblist.jjnr,fr->gblist.maxnrj);
1929                     }
1930             
1931                     /* Put in list */
1932                                         if(i == list->aj[k])
1933                                         {
1934                                                 gmx_incons("i == list->aj[k]");
1935                                         }
1936                     fr->gblist.jjnr[fr->gblist.nrj++] = list->aj[k];
1937                 }
1938                                 
1939                                 fr->gblist.jindex[fr->gblist.nri] = fr->gblist.nrj;  
1940             }
1941                 }
1942         }
1943
1944         
1945 #ifdef SORT_GB_LIST
1946     for(i=0;i<fr->gblist.nri;i++)
1947     {
1948         nj0 = fr->gblist.jindex[i];
1949         nj1 = fr->gblist.jindex[i+1];
1950         ai  = fr->gblist.iinr[i];
1951         
1952         /* Temporary fix */
1953                 for(j=nj0;j<nj1;j++)
1954                 {
1955             if(fr->gblist.jjnr[j]<ai)
1956                 fr->gblist.jjnr[j]+=fr->natoms_force;
1957         }
1958         qsort(fr->gblist.jjnr+nj0,nj1-nj0,sizeof(int),compare_int);
1959         /* Fix back */
1960         for(j=nj0;j<nj1;j++)
1961         {
1962             if(fr->gblist.jjnr[j]>=fr->natoms_force)
1963                 fr->gblist.jjnr[j]-=fr->natoms_force;
1964         }
1965         
1966     }
1967 #endif
1968         
1969     return 0;
1970 }
1971
1972 void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm)
1973 {
1974     int i,at0,at1;
1975     gmx_domdec_t *dd=NULL;
1976     
1977     if(DOMAINDECOMP(cr))
1978     {
1979         dd = cr->dd;
1980         at0 = 0;
1981         at1 = dd->nat_tot;
1982     }
1983     else
1984     {
1985         /* Single node or particle decomp (global==local), just copy pointers and return */
1986         if(gb_algorithm==egbSTILL)
1987         {
1988             born->gpol      = born->gpol_globalindex;
1989             born->vsolv     = born->vsolv_globalindex; 
1990             born->gb_radius = born->gb_radius_globalindex; 
1991         }
1992         else
1993         {
1994             born->param     = born->param_globalindex;
1995             born->gb_radius = born->gb_radius_globalindex; 
1996         }
1997         
1998         born->use = born->use_globalindex;
1999         
2000         return;
2001     }
2002     
2003     /* Reallocation of local arrays if necessary */
2004     /* fr->natoms_force is equal to dd->nat_tot */
2005     if (DOMAINDECOMP(cr) && dd->nat_tot > born->nalloc)
2006     {
2007         int nalloc;
2008
2009         nalloc = dd->nat_tot;
2010
2011         /* Arrays specific to different gb algorithms */
2012         if (gb_algorithm == egbSTILL)
2013         {
2014             srenew(born->gpol,  nalloc+3);
2015             srenew(born->vsolv, nalloc+3);
2016             srenew(born->gb_radius, nalloc+3);
2017             for(i=born->nalloc; (i<nalloc+3); i++) 
2018             {
2019                 born->gpol[i] = 0;
2020                 born->vsolv[i] = 0;
2021                 born->gb_radius[i] = 0;
2022             }
2023         }
2024         else
2025         {
2026             srenew(born->param, nalloc+3);
2027             srenew(born->gb_radius, nalloc+3);
2028             for(i=born->nalloc; (i<nalloc+3); i++) 
2029             {
2030                 born->param[i] = 0;
2031                 born->gb_radius[i] = 0;
2032             }
2033         }
2034         
2035         /* All gb-algorithms use the array for vsites exclusions */
2036         srenew(born->use,    nalloc+3);
2037         for(i=born->nalloc; (i<nalloc+3); i++) 
2038         {
2039             born->use[i] = 0;
2040         }
2041
2042         born->nalloc = nalloc;
2043     }
2044     
2045     /* With dd, copy algorithm specific arrays */
2046     if(gb_algorithm==egbSTILL)
2047     {
2048         for(i=at0;i<at1;i++)
2049         {
2050             born->gpol[i]  = born->gpol_globalindex[dd->gatindex[i]];
2051             born->vsolv[i] = born->vsolv_globalindex[dd->gatindex[i]];
2052             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2053             born->use[i]   = born->use_globalindex[dd->gatindex[i]];
2054         }
2055     }
2056     else
2057     {
2058         for(i=at0;i<at1;i++)
2059         {
2060             born->param[i]     = born->param_globalindex[dd->gatindex[i]];
2061             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2062             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2063         }
2064     }
2065 }
2066