src/gromacs/mdlib/genborn.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2008, The GROMACS development team.
   6  * Copyright (c) 2013, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43 #include <string.h>
  44
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "genborn.h"
  48 #include "vec.h"
  49 #include "grompp.h"
  50 #include "gromacs/fileio/pdbio.h"
  51 #include "names.h"
  52 #include "physics.h"
  53 #include "partdec.h"
  54 #include "domdec.h"
  55 #include "network.h"
  56 #include "gmx_fatal.h"
  57 #include "mtop_util.h"
  58 #include "pbc.h"
  59 #include "nrnb.h"
  60 #include "bondf.h"
  61
  62 #include "gromacs/utility/gmxmpi.h"
  63
  64 #ifdef GMX_X86_SSE2
  65 #  ifdef GMX_DOUBLE
  66 #    include "genborn_sse2_double.h"
  67 #    include "genborn_allvsall_sse2_double.h"
  68 #  else
  69 #    include "genborn_sse2_single.h"
  70 #    include "genborn_allvsall_sse2_single.h"
  71 #  endif /* GMX_DOUBLE */
  72 #endif   /* SSE or AVX present */
  73
  74 #include "genborn_allvsall.h"
  75
  76 /*#define DISABLE_SSE*/
  77
  78 typedef struct {
  79     int  shift;
  80     int  naj;
  81     int *aj;
  82     int  aj_nalloc;
  83 } gbtmpnbl_t;
  84
  85 typedef struct gbtmpnbls {
  86     int         nlist;
  87     gbtmpnbl_t *list;
  88     int         list_nalloc;
  89 } t_gbtmpnbls;
  90
  91 /* This function is exactly the same as the one in bondfree.c. The reason
  92  * it is copied here is that the bonded gb-interactions are evaluated
  93  * not in calc_bonds, but rather in calc_gb_forces
  94  */
  95 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
  96 {
  97     if (pbc)
  98     {
  99         return pbc_dx_aiuc(pbc, xi, xj, dx);
 100     }
 101     else
 102     {
 103         rvec_sub(xi, xj, dx);
 104         return CENTRAL;
 105     }
 106 }
 107
 108 int init_gb_nblist(int natoms, t_nblist *nl)
 109 {
 110     nl->maxnri      = natoms*4;
 111     nl->maxnrj      = 0;
 112     nl->maxlen      = 0;
 113     nl->nri         = 0;
 114     nl->nrj         = 0;
 115     nl->iinr        = NULL;
 116     nl->gid         = NULL;
 117     nl->shift       = NULL;
 118     nl->jindex      = NULL;
 119     nl->jjnr        = NULL;
 120     /*nl->nltype      = nltype;*/
 121
 122     srenew(nl->iinr,   nl->maxnri);
 123     srenew(nl->gid,    nl->maxnri);
 124     srenew(nl->shift,  nl->maxnri);
 125     srenew(nl->jindex, nl->maxnri+1);
 126
 127     nl->jindex[0] = 0;
 128
 129     return 0;
 130 }
 131
 132 void gb_pd_send(t_commrec gmx_unused *cr, real gmx_unused *send_data, int gmx_unused nr)
 133 {
 134 #ifdef GMX_MPI
 135     int  i, cur;
 136     int *index, *sendc, *disp;
 137
 138     snew(sendc, cr->nnodes);
 139     snew(disp, cr->nnodes);
 140
 141     index = pd_index(cr);
 142     cur   = cr->nodeid;
 143
 144     /* Setup count/index arrays */
 145     for (i = 0; i < cr->nnodes; i++)
 146     {
 147         sendc[i]  = index[i+1]-index[i];
 148         disp[i]   = index[i];
 149     }
 150
 151     /* Do communication */
 152     MPI_Gatherv(send_data+index[cur], sendc[cur], GMX_MPI_REAL, send_data, sendc,
 153                 disp, GMX_MPI_REAL, 0, cr->mpi_comm_mygroup);
 154     MPI_Bcast(send_data, nr, GMX_MPI_REAL, 0, cr->mpi_comm_mygroup);
 155
 156 #endif
 157 }
 158
 159
 160 int init_gb_plist(t_params *p_list)
 161 {
 162     p_list->nr    = 0;
 163     p_list->param = NULL;
 164
 165     return 0;
 166 }
 167
 168
 169
 170 int init_gb_still(const t_commrec *cr,
 171                   const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
 172                   gmx_genborn_t *born, int natoms)
 173 {
 174
 175     int   i, j, i1, i2, k, m, nbond, nang, ia, ib, ic, id, nb, idx, idx2, at;
 176     int   iam, ibm;
 177     int   at0, at1;
 178     real  length, angle;
 179     real  r, ri, rj, ri2, ri3, rj2, r2, r3, r4, rk, ratio, term, h, doffset;
 180     real  p1, p2, p3, factor, cosine, rab, rbc;
 181
 182     real *vsol;
 183     real *gp;
 184
 185     snew(vsol, natoms);
 186     snew(gp, natoms);
 187     snew(born->gpol_still_work, natoms+3);
 188
 189     if (PAR(cr))
 190     {
 191         if (PARTDECOMP(cr))
 192         {
 193             pd_at_range(cr, &at0, &at1);
 194
 195             for (i = 0; i < natoms; i++)
 196             {
 197                 vsol[i] = gp[i] = 0;
 198             }
 199         }
 200         else
 201         {
 202             at0 = 0;
 203             at1 = natoms;
 204         }
 205     }
 206     else
 207     {
 208         at0 = 0;
 209         at1 = natoms;
 210     }
 211
 212     doffset = born->gb_doffset;
 213
 214     for (i = 0; i < natoms; i++)
 215     {
 216         born->gpol_globalindex[i]              = born->vsolv_globalindex[i] =
 217                 born->gb_radius_globalindex[i] = 0;
 218     }
 219
 220     /* Compute atomic solvation volumes for Still method */
 221     for (i = 0; i < natoms; i++)
 222     {
 223         ri = atype->gb_radius[atoms->atom[i].type];
 224         born->gb_radius_globalindex[i] = ri;
 225         r3 = ri*ri*ri;
 226         born->vsolv_globalindex[i] = (4*M_PI/3)*r3;
 227     }
 228
 229     for (j = 0; j < idef->il[F_GB12].nr; j += 3)
 230     {
 231         m  = idef->il[F_GB12].iatoms[j];
 232         ia = idef->il[F_GB12].iatoms[j+1];
 233         ib = idef->il[F_GB12].iatoms[j+2];
 234
 235         r = 1.01*idef->iparams[m].gb.st;
 236
 237         ri   = atype->gb_radius[atoms->atom[ia].type];
 238         rj   = atype->gb_radius[atoms->atom[ib].type];
 239
 240         ri2  = ri*ri;
 241         ri3  = ri2*ri;
 242         rj2  = rj*rj;
 243
 244         ratio  = (rj2-ri2-r*r)/(2*ri*r);
 245         h      = ri*(1+ratio);
 246         term   = (M_PI/3.0)*h*h*(3.0*ri-h);
 247
 248         if (PARTDECOMP(cr))
 249         {
 250             vsol[ia] += term;
 251         }
 252         else
 253         {
 254             born->vsolv_globalindex[ia] -= term;
 255         }
 256
 257         ratio  = (ri2-rj2-r*r)/(2*rj*r);
 258         h      = rj*(1+ratio);
 259         term   = (M_PI/3.0)*h*h*(3.0*rj-h);
 260
 261         if (PARTDECOMP(cr))
 262         {
 263             vsol[ib] += term;
 264         }
 265         else
 266         {
 267             born->vsolv_globalindex[ib] -= term;
 268         }
 269     }
 270
 271     if (PARTDECOMP(cr))
 272     {
 273         gmx_sum(natoms, vsol, cr);
 274
 275         for (i = 0; i < natoms; i++)
 276         {
 277             born->vsolv_globalindex[i] = born->vsolv_globalindex[i]-vsol[i];
 278         }
 279     }
 280
 281     /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
 282        method */
 283     /* Self */
 284     for (j = 0; j < natoms; j++)
 285     {
 286         if (born->use_globalindex[j] == 1)
 287         {
 288             born->gpol_globalindex[j] = -0.5*ONE_4PI_EPS0/
 289                 (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1);
 290         }
 291     }
 292
 293     /* 1-2 */
 294     for (j = 0; j < idef->il[F_GB12].nr; j += 3)
 295     {
 296         m  = idef->il[F_GB12].iatoms[j];
 297         ia = idef->il[F_GB12].iatoms[j+1];
 298         ib = idef->il[F_GB12].iatoms[j+2];
 299
 300         r = idef->iparams[m].gb.st;
 301
 302         r4 = r*r*r*r;
 303
 304         if (PARTDECOMP(cr))
 305         {
 306             gp[ia] += STILL_P2*born->vsolv_globalindex[ib]/r4;
 307             gp[ib] += STILL_P2*born->vsolv_globalindex[ia]/r4;
 308         }
 309         else
 310         {
 311             born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
 312                 STILL_P2*born->vsolv_globalindex[ib]/r4;
 313             born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
 314                 STILL_P2*born->vsolv_globalindex[ia]/r4;
 315         }
 316     }
 317
 318     /* 1-3 */
 319     for (j = 0; j < idef->il[F_GB13].nr; j += 3)
 320     {
 321         m  = idef->il[F_GB13].iatoms[j];
 322         ia = idef->il[F_GB13].iatoms[j+1];
 323         ib = idef->il[F_GB13].iatoms[j+2];
 324
 325         r  = idef->iparams[m].gb.st;
 326         r4 = r*r*r*r;
 327
 328         if (PARTDECOMP(cr))
 329         {
 330             gp[ia] += STILL_P3*born->vsolv[ib]/r4;
 331             gp[ib] += STILL_P3*born->vsolv[ia]/r4;
 332         }
 333         else
 334         {
 335             born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
 336                 STILL_P3*born->vsolv_globalindex[ib]/r4;
 337             born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
 338                 STILL_P3*born->vsolv_globalindex[ia]/r4;
 339         }
 340     }
 341
 342     if (PARTDECOMP(cr))
 343     {
 344         gmx_sum(natoms, gp, cr);
 345
 346         for (i = 0; i < natoms; i++)
 347         {
 348             born->gpol_globalindex[i] = born->gpol_globalindex[i]+gp[i];
 349         }
 350     }
 351
 352     sfree(vsol);
 353     sfree(gp);
 354
 355     return 0;
 356 }
 357
 358 /* Initialize all GB datastructs and compute polarization energies */
 359 int init_gb(gmx_genborn_t **p_born,
 360             const t_commrec *cr, t_forcerec *fr, const t_inputrec *ir,
 361             const gmx_mtop_t *mtop, int gb_algorithm)
 362 {
 363     int             i, j, m, ai, aj, jj, natoms, nalloc;
 364     real            rai, sk, p, doffset;
 365
 366     t_atoms         atoms;
 367     gmx_genborn_t  *born;
 368     gmx_localtop_t *localtop;
 369
 370     natoms   = mtop->natoms;
 371
 372     atoms    = gmx_mtop_global_atoms(mtop);
 373     localtop = gmx_mtop_generate_local_top(mtop, ir);
 374
 375     snew(born, 1);
 376     *p_born = born;
 377
 378     born->nr  = natoms;
 379
 380     snew(born->drobc, natoms);
 381     snew(born->bRad,  natoms);
 382
 383     /* Allocate memory for the global data arrays */
 384     snew(born->param_globalindex, natoms+3);
 385     snew(born->gpol_globalindex,  natoms+3);
 386     snew(born->vsolv_globalindex, natoms+3);
 387     snew(born->gb_radius_globalindex, natoms+3);
 388     snew(born->use_globalindex,    natoms+3);
 389
 390     snew(fr->invsqrta, natoms);
 391     snew(fr->dvda,     natoms);
 392
 393     fr->dadx              = NULL;
 394     fr->dadx_rawptr       = NULL;
 395     fr->nalloc_dadx       = 0;
 396     born->gpol_still_work = NULL;
 397     born->gpol_hct_work   = NULL;
 398
 399     /* snew(born->asurf,natoms); */
 400     /* snew(born->dasurf,natoms); */
 401
 402     /* Initialize the gb neighbourlist */
 403     init_gb_nblist(natoms, &(fr->gblist));
 404
 405     /* Do the Vsites exclusions (if any) */
 406     for (i = 0; i < natoms; i++)
 407     {
 408         jj = atoms.atom[i].type;
 409         if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0)
 410         {
 411             born->use_globalindex[i] = 1;
 412         }
 413         else
 414         {
 415             born->use_globalindex[i] = 0;
 416         }
 417
 418         /* If we have a Vsite, put vs_globalindex[i]=0 */
 419         if (C6 (fr->nbfp, fr->ntype, jj, jj) == 0 &&
 420             C12(fr->nbfp, fr->ntype, jj, jj) == 0 &&
 421             atoms.atom[i].q == 0)
 422         {
 423             born->use_globalindex[i] = 0;
 424         }
 425     }
 426
 427     /* Copy algorithm parameters from inputrecord to local structure */
 428     born->obc_alpha          = ir->gb_obc_alpha;
 429     born->obc_beta           = ir->gb_obc_beta;
 430     born->obc_gamma          = ir->gb_obc_gamma;
 431     born->gb_doffset         = ir->gb_dielectric_offset;
 432     born->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 433     born->epsilon_r          = ir->epsilon_r;
 434
 435     doffset = born->gb_doffset;
 436
 437     /* Set the surface tension */
 438     born->sa_surface_tension = ir->sa_surface_tension;
 439
 440     /* If Still model, initialise the polarisation energies */
 441     if (gb_algorithm == egbSTILL)
 442     {
 443         init_gb_still(cr, &(mtop->atomtypes), &(localtop->idef), &atoms,
 444                       born, natoms);
 445     }
 446
 447
 448     /* If HCT/OBC,  precalculate the sk*atype->S_hct factors */
 449     else if (gb_algorithm == egbHCT || gb_algorithm == egbOBC)
 450     {
 451
 452         snew(born->gpol_hct_work, natoms+3);
 453
 454         for (i = 0; i < natoms; i++)
 455         {
 456             if (born->use_globalindex[i] == 1)
 457             {
 458                 rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset;
 459                 sk  = rai * mtop->atomtypes.S_hct[atoms.atom[i].type];
 460                 born->param_globalindex[i]     = sk;
 461                 born->gb_radius_globalindex[i] = rai;
 462             }
 463             else
 464             {
 465                 born->param_globalindex[i]     = 0;
 466                 born->gb_radius_globalindex[i] = 0;
 467             }
 468         }
 469     }
 470
 471     /* Allocate memory for work arrays for temporary use */
 472     snew(born->work, natoms+4);
 473     snew(born->count, natoms);
 474     snew(born->nblist_work, natoms);
 475
 476     /* Domain decomposition specific stuff */
 477     born->nalloc = 0;
 478
 479     return 0;
 480 }
 481
 482
 483
 484 static int
 485 calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 486                   rvec x[], t_nblist *nl,
 487                   gmx_genborn_t *born, t_mdatoms *md)
 488 {
 489     int  i, k, n, nj0, nj1, ai, aj, type;
 490     int  shift;
 491     real shX, shY, shZ;
 492     real gpi, dr, dr2, dr4, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
 493     real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 494     real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2;
 495     real factor;
 496     real vai, prod_ai, icf4, icf6;
 497
 498     factor  = 0.5*ONE_4PI_EPS0;
 499     n       = 0;
 500
 501     for (i = 0; i < born->nr; i++)
 502     {
 503         born->gpol_still_work[i] = 0;
 504     }
 505
 506     for (i = 0; i < nl->nri; i++)
 507     {
 508         ai      = nl->iinr[i];
 509
 510         nj0     = nl->jindex[i];
 511         nj1     = nl->jindex[i+1];
 512
 513         /* Load shifts for this list */
 514         shift   = nl->shift[i];
 515         shX     = fr->shift_vec[shift][0];
 516         shY     = fr->shift_vec[shift][1];
 517         shZ     = fr->shift_vec[shift][2];
 518
 519         gpi     = 0;
 520
 521         rai     = top->atomtypes.gb_radius[md->typeA[ai]];
 522         vai     = born->vsolv[ai];
 523         prod_ai = STILL_P4*vai;
 524
 525         /* Load atom i coordinates, add shift vectors */
 526         ix1     = shX + x[ai][0];
 527         iy1     = shY + x[ai][1];
 528         iz1     = shZ + x[ai][2];
 529
 530         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 531         {
 532             aj    = nl->jjnr[k];
 533             jx1   = x[aj][0];
 534             jy1   = x[aj][1];
 535             jz1   = x[aj][2];
 536
 537             dx11  = ix1-jx1;
 538             dy11  = iy1-jy1;
 539             dz11  = iz1-jz1;
 540
 541             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 542             rinv  = gmx_invsqrt(dr2);
 543             idr2  = rinv*rinv;
 544             idr4  = idr2*idr2;
 545             idr6  = idr4*idr2;
 546
 547             raj = top->atomtypes.gb_radius[md->typeA[aj]];
 548
 549             rvdw  = rai + raj;
 550
 551             ratio = dr2 / (rvdw * rvdw);
 552             vaj   = born->vsolv[aj];
 553
 554             if (ratio > STILL_P5INV)
 555             {
 556                 ccf  = 1.0;
 557                 dccf = 0.0;
 558             }
 559             else
 560             {
 561                 theta = ratio*STILL_PIP5;
 562                 cosq  = cos(theta);
 563                 term  = 0.5*(1.0-cosq);
 564                 ccf   = term*term;
 565                 sinq  = 1.0 - cosq*cosq;
 566                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 567             }
 568
 569             prod                       = STILL_P4*vaj;
 570             icf4                       = ccf*idr4;
 571             icf6                       = (4*ccf-dccf)*idr6;
 572             born->gpol_still_work[aj] += prod_ai*icf4;
 573             gpi                        = gpi+prod*icf4;
 574
 575             /* Save ai->aj and aj->ai chain rule terms */
 576             fr->dadx[n++]   = prod*icf6;
 577             fr->dadx[n++]   = prod_ai*icf6;
 578         }
 579         born->gpol_still_work[ai] += gpi;
 580     }
 581
 582     /* Parallel summations */
 583     if (PARTDECOMP(cr))
 584     {
 585         gmx_sum(natoms, born->gpol_still_work, cr);
 586     }
 587     else if (DOMAINDECOMP(cr))
 588     {
 589         dd_atom_sum_real(cr->dd, born->gpol_still_work);
 590     }
 591
 592     /* Calculate the radii */
 593     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
 594     {
 595         if (born->use[i] != 0)
 596         {
 597             gpi             = born->gpol[i]+born->gpol_still_work[i];
 598             gpi2            = gpi * gpi;
 599             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 600             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 601         }
 602     }
 603
 604     /* Extra communication required for DD */
 605     if (DOMAINDECOMP(cr))
 606     {
 607         dd_atom_spread_real(cr->dd, born->bRad);
 608         dd_atom_spread_real(cr->dd, fr->invsqrta);
 609     }
 610
 611     return 0;
 612
 613 }
 614
 615
 616 static int
 617 calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 618                 rvec x[], t_nblist *nl,
 619                 gmx_genborn_t *born, t_mdatoms *md)
 620 {
 621     int   i, k, n, ai, aj, nj0, nj1, at0, at1;
 622     int   shift;
 623     real  shX, shY, shZ;
 624     real  rai, raj, gpi, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
 625     real  rad, min_rad, rinv, rai_inv;
 626     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 627     real  lij2, uij2, lij3, uij3, t1, t2, t3;
 628     real  lij_inv, dlij, duij, sk2_rinv, prod, log_term;
 629     real  doffset, raj_inv, dadx_val;
 630     real *gb_radius;
 631
 632     doffset   = born->gb_doffset;
 633     gb_radius = born->gb_radius;
 634
 635     for (i = 0; i < born->nr; i++)
 636     {
 637         born->gpol_hct_work[i] = 0;
 638     }
 639
 640     /* Keep the compiler happy */
 641     n    = 0;
 642     prod = 0;
 643
 644     for (i = 0; i < nl->nri; i++)
 645     {
 646         ai     = nl->iinr[i];
 647
 648         nj0    = nl->jindex[i];
 649         nj1    = nl->jindex[i+1];
 650
 651         /* Load shifts for this list */
 652         shift   = nl->shift[i];
 653         shX     = fr->shift_vec[shift][0];
 654         shY     = fr->shift_vec[shift][1];
 655         shZ     = fr->shift_vec[shift][2];
 656
 657         rai     = gb_radius[ai];
 658         rai_inv = 1.0/rai;
 659
 660         sk_ai   = born->param[ai];
 661         sk2_ai  = sk_ai*sk_ai;
 662
 663         /* Load atom i coordinates, add shift vectors */
 664         ix1     = shX + x[ai][0];
 665         iy1     = shY + x[ai][1];
 666         iz1     = shZ + x[ai][2];
 667
 668         sum_ai  = 0;
 669
 670         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 671         {
 672             aj    = nl->jjnr[k];
 673
 674             jx1   = x[aj][0];
 675             jy1   = x[aj][1];
 676             jz1   = x[aj][2];
 677
 678             dx11  = ix1 - jx1;
 679             dy11  = iy1 - jy1;
 680             dz11  = iz1 - jz1;
 681
 682             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 683             rinv  = gmx_invsqrt(dr2);
 684             dr    = rinv*dr2;
 685
 686             sk    = born->param[aj];
 687             raj   = gb_radius[aj];
 688
 689             /* aj -> ai interaction */
 690             if (rai < dr+sk)
 691             {
 692                 lij     = 1.0/(dr-sk);
 693                 dlij    = 1.0;
 694
 695                 if (rai > dr-sk)
 696                 {
 697                     lij  = rai_inv;
 698                     dlij = 0.0;
 699                 }
 700
 701                 lij2     = lij*lij;
 702                 lij3     = lij2*lij;
 703
 704                 uij      = 1.0/(dr+sk);
 705                 uij2     = uij*uij;
 706                 uij3     = uij2*uij;
 707
 708                 diff2    = uij2-lij2;
 709
 710                 lij_inv  = gmx_invsqrt(lij2);
 711                 sk2      = sk*sk;
 712                 sk2_rinv = sk2*rinv;
 713                 prod     = 0.25*sk2_rinv;
 714
 715                 log_term = log(uij*lij_inv);
 716
 717                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 718                     prod*(-diff2);
 719
 720                 if (rai < sk-dr)
 721                 {
 722                     tmp = tmp + 2.0 * (rai_inv-lij);
 723                 }
 724
 725                 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 726                 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 727                 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 728
 729                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
 730                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
 731                 /* rb2 is moved to chainrule    */
 732
 733                 sum_ai += 0.5*tmp;
 734             }
 735             else
 736             {
 737                 dadx_val = 0.0;
 738             }
 739             fr->dadx[n++] = dadx_val;
 740
 741
 742             /* ai -> aj interaction */
 743             if (raj < dr + sk_ai)
 744             {
 745                 lij     = 1.0/(dr-sk_ai);
 746                 dlij    = 1.0;
 747                 raj_inv = 1.0/raj;
 748
 749                 if (raj > dr-sk_ai)
 750                 {
 751                     lij  = raj_inv;
 752                     dlij = 0.0;
 753                 }
 754
 755                 lij2     = lij  * lij;
 756                 lij3     = lij2 * lij;
 757
 758                 uij      = 1.0/(dr+sk_ai);
 759                 uij2     = uij  * uij;
 760                 uij3     = uij2 * uij;
 761
 762                 diff2    = uij2-lij2;
 763
 764                 lij_inv  = gmx_invsqrt(lij2);
 765                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 766                 sk2_rinv = sk2*rinv;
 767                 prod     = 0.25 * sk2_rinv;
 768
 769                 /* log_term = table_log(uij*lij_inv,born->log_table,
 770                    LOG_TABLE_ACCURACY); */
 771                 log_term = log(uij*lij_inv);
 772
 773                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 774                     prod*(-diff2);
 775
 776                 if (raj < sk_ai-dr)
 777                 {
 778                     tmp     = tmp + 2.0 * (raj_inv-lij);
 779                 }
 780
 781                 /* duij = 1.0 */
 782                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 783                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 784                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 785
 786                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 787                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule    */
 788
 789                 born->gpol_hct_work[aj] += 0.5*tmp;
 790             }
 791             else
 792             {
 793                 dadx_val = 0.0;
 794             }
 795             fr->dadx[n++] = dadx_val;
 796         }
 797
 798         born->gpol_hct_work[ai] += sum_ai;
 799     }
 800
 801     /* Parallel summations */
 802     if (PARTDECOMP(cr))
 803     {
 804         gmx_sum(natoms, born->gpol_hct_work, cr);
 805     }
 806     else if (DOMAINDECOMP(cr))
 807     {
 808         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
 809     }
 810
 811     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
 812     {
 813         if (born->use[i] != 0)
 814         {
 815             rai     = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
 816             sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 817             min_rad = rai + doffset;
 818             rad     = 1.0/sum_ai;
 819
 820             born->bRad[i]   = rad > min_rad ? rad : min_rad;
 821             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 822         }
 823     }
 824
 825     /* Extra communication required for DD */
 826     if (DOMAINDECOMP(cr))
 827     {
 828         dd_atom_spread_real(cr->dd, born->bRad);
 829         dd_atom_spread_real(cr->dd, fr->invsqrta);
 830     }
 831
 832
 833     return 0;
 834 }
 835
 836 static int
 837 calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 838                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md)
 839 {
 840     int   i, k, ai, aj, nj0, nj1, n, at0, at1;
 841     int   shift;
 842     real  shX, shY, shZ;
 843     real  rai, raj, gpi, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
 844     real  rad, min_rad, sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
 845     real  log_term, prod, sk2_rinv, sk_ai, sk2_ai;
 846     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 847     real  lij2, uij2, lij3, uij3, dlij, duij, t1, t2, t3;
 848     real  doffset, raj_inv, dadx_val;
 849     real *gb_radius;
 850
 851     /* Keep the compiler happy */
 852     n    = 0;
 853     prod = 0;
 854     raj  = 0;
 855
 856     doffset   = born->gb_doffset;
 857     gb_radius = born->gb_radius;
 858
 859     for (i = 0; i < born->nr; i++)
 860     {
 861         born->gpol_hct_work[i] = 0;
 862     }
 863
 864     for (i = 0; i < nl->nri; i++)
 865     {
 866         ai      = nl->iinr[i];
 867
 868         nj0     = nl->jindex[i];
 869         nj1     = nl->jindex[i+1];
 870
 871         /* Load shifts for this list */
 872         shift   = nl->shift[i];
 873         shX     = fr->shift_vec[shift][0];
 874         shY     = fr->shift_vec[shift][1];
 875         shZ     = fr->shift_vec[shift][2];
 876
 877         rai      = gb_radius[ai];
 878         rai_inv  = 1.0/rai;
 879
 880         sk_ai    = born->param[ai];
 881         sk2_ai   = sk_ai*sk_ai;
 882
 883         /* Load atom i coordinates, add shift vectors */
 884         ix1      = shX + x[ai][0];
 885         iy1      = shY + x[ai][1];
 886         iz1      = shZ + x[ai][2];
 887
 888         sum_ai   = 0;
 889
 890         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 891         {
 892             aj    = nl->jjnr[k];
 893
 894             jx1   = x[aj][0];
 895             jy1   = x[aj][1];
 896             jz1   = x[aj][2];
 897
 898             dx11  = ix1 - jx1;
 899             dy11  = iy1 - jy1;
 900             dz11  = iz1 - jz1;
 901
 902             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 903             rinv  = gmx_invsqrt(dr2);
 904             dr    = dr2*rinv;
 905
 906             /* sk is precalculated in init_gb() */
 907             sk    = born->param[aj];
 908             raj   = gb_radius[aj];
 909
 910             /* aj -> ai interaction */
 911             if (rai < dr+sk)
 912             {
 913                 lij       = 1.0/(dr-sk);
 914                 dlij      = 1.0;
 915
 916                 if (rai > dr-sk)
 917                 {
 918                     lij  = rai_inv;
 919                     dlij = 0.0;
 920                 }
 921
 922                 uij      = 1.0/(dr+sk);
 923                 lij2     = lij  * lij;
 924                 lij3     = lij2 * lij;
 925                 uij2     = uij  * uij;
 926                 uij3     = uij2 * uij;
 927
 928                 diff2    = uij2-lij2;
 929
 930                 lij_inv  = gmx_invsqrt(lij2);
 931                 sk2      = sk*sk;
 932                 sk2_rinv = sk2*rinv;
 933                 prod     = 0.25*sk2_rinv;
 934
 935                 log_term = log(uij*lij_inv);
 936
 937                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 938
 939                 if (rai < sk-dr)
 940                 {
 941                     tmp = tmp + 2.0 * (rai_inv-lij);
 942                 }
 943
 944                 /* duij    = 1.0; */
 945                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 946                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 947                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 948
 949                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 950
 951                 sum_ai += 0.5*tmp;
 952             }
 953             else
 954             {
 955                 dadx_val = 0.0;
 956             }
 957             fr->dadx[n++] = dadx_val;
 958
 959             /* ai -> aj interaction */
 960             if (raj < dr + sk_ai)
 961             {
 962                 lij     = 1.0/(dr-sk_ai);
 963                 dlij    = 1.0;
 964                 raj_inv = 1.0/raj;
 965
 966                 if (raj > dr-sk_ai)
 967                 {
 968                     lij  = raj_inv;
 969                     dlij = 0.0;
 970                 }
 971
 972                 lij2     = lij  * lij;
 973                 lij3     = lij2 * lij;
 974
 975                 uij      = 1.0/(dr+sk_ai);
 976                 uij2     = uij  * uij;
 977                 uij3     = uij2 * uij;
 978
 979                 diff2    = uij2-lij2;
 980
 981                 lij_inv  = gmx_invsqrt(lij2);
 982                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 983                 sk2_rinv = sk2*rinv;
 984                 prod     = 0.25 * sk2_rinv;
 985
 986                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 987                 log_term = log(uij*lij_inv);
 988
 989                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 990
 991                 if (raj < sk_ai-dr)
 992                 {
 993                     tmp     = tmp + 2.0 * (raj_inv-lij);
 994                 }
 995
 996                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 997                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 998                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 999
1000                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
1001
1002                 born->gpol_hct_work[aj] += 0.5*tmp;
1003
1004             }
1005             else
1006             {
1007                 dadx_val = 0.0;
1008             }
1009             fr->dadx[n++] = dadx_val;
1010
1011         }
1012         born->gpol_hct_work[ai] += sum_ai;
1013
1014     }
1015
1016     /* Parallel summations */
1017     if (PARTDECOMP(cr))
1018     {
1019         gmx_sum(natoms, born->gpol_hct_work, cr);
1020     }
1021     else if (DOMAINDECOMP(cr))
1022     {
1023         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
1024     }
1025
1026     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
1027     {
1028         if (born->use[i] != 0)
1029         {
1030             rai        = top->atomtypes.gb_radius[md->typeA[i]];
1031             rai_inv2   = 1.0/rai;
1032             rai        = rai-doffset;
1033             rai_inv    = 1.0/rai;
1034             sum_ai     = rai * born->gpol_hct_work[i];
1035             sum_ai2    = sum_ai  * sum_ai;
1036             sum_ai3    = sum_ai2 * sum_ai;
1037
1038             tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
1039             born->bRad[i] = rai_inv - tsum*rai_inv2;
1040             born->bRad[i] = 1.0 / born->bRad[i];
1041
1042             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
1043
1044             tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
1045             born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
1046         }
1047     }
1048
1049     /* Extra (local) communication required for DD */
1050     if (DOMAINDECOMP(cr))
1051     {
1052         dd_atom_spread_real(cr->dd, born->bRad);
1053         dd_atom_spread_real(cr->dd, fr->invsqrta);
1054         dd_atom_spread_real(cr->dd, born->drobc);
1055     }
1056
1057     return 0;
1058
1059 }
1060
1061
1062
1063 int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top,
1064                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb     *nrnb)
1065 {
1066     real *p;
1067     int   cnt;
1068     int   ndadx;
1069
1070     if (fr->bAllvsAll && fr->dadx == NULL)
1071     {
1072         /* We might need up to 8 atoms of padding before and after,
1073          * and another 4 units to guarantee SSE alignment.
1074          */
1075         fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12);
1076         snew(fr->dadx_rawptr, fr->nalloc_dadx);
1077         fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1078     }
1079     else
1080     {
1081         /* In the SSE-enabled gb-loops, when writing to dadx, we
1082          * always write 2*4 elements at a time, even in the case with only
1083          * 1-3 j particles, where we only really need to write 2*(1-3)
1084          * elements. This is because we want dadx to be aligned to a 16-
1085          * byte boundary, and being able to use _mm_store/load_ps
1086          */
1087         ndadx = 2 * (nl->nrj + 3*nl->nri);
1088
1089         /* First, reallocate the dadx array, we need 3 extra for SSE */
1090         if (ndadx + 3 > fr->nalloc_dadx)
1091         {
1092             fr->nalloc_dadx = over_alloc_large(ndadx) + 3;
1093             srenew(fr->dadx_rawptr, fr->nalloc_dadx);
1094             fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1095         }
1096     }
1097
1098     if (fr->bAllvsAll)
1099     {
1100         cnt = md->homenr*(md->nr/2+1);
1101
1102         if (ir->gb_algorithm == egbSTILL)
1103         {
1104 #if 0 && defined (GMX_X86_SSE2)
1105             if (fr->use_acceleration)
1106             {
1107 #  ifdef GMX_DOUBLE
1108                 genborn_allvsall_calc_still_radii_sse2_double(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1109 #  else
1110                 genborn_allvsall_calc_still_radii_sse2_single(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1111 #  endif
1112             }
1113             else
1114             {
1115                 genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1116             }
1117 #else
1118             genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1119 #endif
1120             /* 13 flops in outer loop, 47 flops in inner loop */
1121             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47);
1122         }
1123         else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
1124         {
1125 #if 0 && defined (GMX_X86_SSE2)
1126             if (fr->use_acceleration)
1127             {
1128 #  ifdef GMX_DOUBLE
1129                 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1130 #  else
1131                 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1132 #  endif
1133             }
1134             else
1135             {
1136                 genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1137             }
1138 #else
1139             genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1140 #endif
1141             /* 24 flops in outer loop, 183 in inner */
1142             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183);
1143         }
1144         else
1145         {
1146             gmx_fatal(FARGS, "Bad gb algorithm for all-vs-all interactions");
1147         }
1148         return 0;
1149     }
1150
1151     /* Switch for determining which algorithm to use for Born radii calculation */
1152 #ifdef GMX_DOUBLE
1153
1154 #if 0 && defined (GMX_X86_SSE2)
1155     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1156     switch (ir->gb_algorithm)
1157     {
1158         case egbSTILL:
1159             if (fr->use_acceleration)
1160             {
1161                 calc_gb_rad_still_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born);
1162             }
1163             else
1164             {
1165                 calc_gb_rad_still(cr, fr, born->nr, top, atype, x, nl, born, md);
1166             }
1167             break;
1168         case egbHCT:
1169             if (fr->use_acceleration)
1170             {
1171                 calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
1172             }
1173             else
1174             {
1175                 calc_gb_rad_hct(cr, fr, born->nr, top, atype, x, nl, born, md);
1176             }
1177             break;
1178         case egbOBC:
1179             if (fr->use_acceleration)
1180             {
1181                 calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
1182             }
1183             else
1184             {
1185                 calc_gb_rad_obc(cr, fr, born->nr, top, atype, x, nl, born, md);
1186             }
1187             break;
1188
1189         default:
1190             gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
1191     }
1192 #else
1193     switch (ir->gb_algorithm)
1194     {
1195         case egbSTILL:
1196             calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1197             break;
1198         case egbHCT:
1199             calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1200             break;
1201         case egbOBC:
1202             calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1203             break;
1204
1205         default:
1206             gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm);
1207     }
1208
1209 #endif
1210
1211 #else
1212
1213 #if 0 && defined (GMX_X86_SSE2)
1214     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1215     switch (ir->gb_algorithm)
1216     {
1217         case egbSTILL:
1218             if (fr->use_acceleration)
1219             {
1220                 calc_gb_rad_still_sse2_single(cr, fr, born->nr, top, x[0], nl, born);
1221             }
1222             else
1223             {
1224                 calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1225             }
1226             break;
1227         case egbHCT:
1228             if (fr->use_acceleration)
1229             {
1230                 calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
1231             }
1232             else
1233             {
1234                 calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1235             }
1236             break;
1237
1238         case egbOBC:
1239             if (fr->use_acceleration)
1240             {
1241                 calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
1242             }
1243             else
1244             {
1245                 calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1246             }
1247             break;
1248
1249         default:
1250             gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
1251     }
1252
1253 #else
1254     switch (ir->gb_algorithm)
1255     {
1256         case egbSTILL:
1257             calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1258             break;
1259         case egbHCT:
1260             calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1261             break;
1262         case egbOBC:
1263             calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1264             break;
1265
1266         default:
1267             gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm);
1268     }
1269
1270 #endif /* Single precision sse */
1271
1272 #endif /* Double or single precision */
1273
1274     if (fr->bAllvsAll == FALSE)
1275     {
1276         switch (ir->gb_algorithm)
1277         {
1278             case egbSTILL:
1279                 /* 17 flops per outer loop iteration, 47 flops per inner loop */
1280                 inc_nrnb(nrnb, eNR_BORN_RADII_STILL, nl->nri*17+nl->nrj*47);
1281                 break;
1282             case egbHCT:
1283             case egbOBC:
1284                 /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
1285                 inc_nrnb(nrnb, eNR_BORN_RADII_HCT_OBC, nl->nri*61+nl->nrj*183);
1286                 break;
1287
1288             default:
1289                 break;
1290         }
1291     }
1292
1293     return 0;
1294 }
1295
1296
1297
1298 real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
1299                   real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
1300                   real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
1301 {
1302     int      i, j, n0, m, nnn, type, ai, aj;
1303     int      ki;
1304
1305     real     isai, isaj;
1306     real     r, rsq11;
1307     real     rinv11, iq;
1308     real     isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2;
1309     real     vgb, fgb, vcoul, fijC, dvdatmp, fscal, dvdaj;
1310     real     vctot;
1311
1312     rvec     dx;
1313     ivec     dt;
1314
1315     t_iatom *forceatoms;
1316
1317     /* Scale the electrostatics by gb_epsilon_solvent */
1318     facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent);
1319
1320     gbtabscale = *p_gbtabscale;
1321     vctot      = 0.0;
1322
1323     for (j = F_GB12; j <= F_GB14; j++)
1324     {
1325         forceatoms = idef->il[j].iatoms;
1326
1327         for (i = 0; i < idef->il[j].nr; )
1328         {
1329             /* To avoid reading in the interaction type, we just increment i to pass over
1330              * the types in the forceatoms array, this saves some memory accesses
1331              */
1332             i++;
1333             ai            = forceatoms[i++];
1334             aj            = forceatoms[i++];
1335
1336             ki            = pbc_rvec_sub(pbc, x[ai], x[aj], dx);
1337             rsq11         = iprod(dx, dx);
1338
1339             isai          = invsqrta[ai];
1340             iq            = (-1)*facel*charge[ai];
1341
1342             rinv11        = gmx_invsqrt(rsq11);
1343             isaj          = invsqrta[aj];
1344             isaprod       = isai*isaj;
1345             qq            = isaprod*iq*charge[aj];
1346             gbscale       = isaprod*gbtabscale;
1347             r             = rsq11*rinv11;
1348             rt            = r*gbscale;
1349             n0            = rt;
1350             eps           = rt-n0;
1351             eps2          = eps*eps;
1352             nnn           = 4*n0;
1353             Y             = GBtab[nnn];
1354             F             = GBtab[nnn+1];
1355             Geps          = eps*GBtab[nnn+2];
1356             Heps2         = eps2*GBtab[nnn+3];
1357             Fp            = F+Geps+Heps2;
1358             VV            = Y+eps*Fp;
1359             FF            = Fp+Geps+2.0*Heps2;
1360             vgb           = qq*VV;
1361             fijC          = qq*FF*gbscale;
1362             dvdatmp       = -(vgb+fijC*r)*0.5;
1363             dvda[aj]      = dvda[aj] + dvdatmp*isaj*isaj;
1364             dvda[ai]      = dvda[ai] + dvdatmp*isai*isai;
1365             vctot         = vctot + vgb;
1366             fgb           = -(fijC)*rinv11;
1367
1368             if (graph)
1369             {
1370                 ivec_sub(SHIFT_IVEC(graph, ai), SHIFT_IVEC(graph, aj), dt);
1371                 ki = IVEC2IS(dt);
1372             }
1373
1374             for (m = 0; (m < DIM); m++)             /*  15              */
1375             {
1376                 fscal               = fgb*dx[m];
1377                 f[ai][m]           += fscal;
1378                 f[aj][m]           -= fscal;
1379                 fshift[ki][m]      += fscal;
1380                 fshift[CENTRAL][m] -= fscal;
1381             }
1382         }
1383     }
1384
1385     return vctot;
1386 }
1387
1388 real calc_gb_selfcorrections(t_commrec *cr, int natoms,
1389                              real *charge, gmx_genborn_t *born, real *dvda, double facel)
1390 {
1391     int  i, ai, at0, at1;
1392     real rai, e, derb, q, q2, fi, rai_inv, vtot;
1393
1394     if (PARTDECOMP(cr))
1395     {
1396         pd_at_range(cr, &at0, &at1);
1397     }
1398     else if (DOMAINDECOMP(cr))
1399     {
1400         at0 = 0;
1401         at1 = cr->dd->nat_home;
1402     }
1403     else
1404     {
1405         at0 = 0;
1406         at1 = natoms;
1407
1408     }
1409
1410     /* Scale the electrostatics by gb_epsilon_solvent */
1411     facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent);
1412
1413     vtot = 0.0;
1414
1415     /* Apply self corrections */
1416     for (i = at0; i < at1; i++)
1417     {
1418         ai       = i;
1419
1420         if (born->use[ai] == 1)
1421         {
1422             rai       = born->bRad[ai];
1423             rai_inv   = 1.0/rai;
1424             q         = charge[ai];
1425             q2        = q*q;
1426             fi        = facel*q2;
1427             e         = fi*rai_inv;
1428             derb      = 0.5*e*rai_inv*rai_inv;
1429             dvda[ai] += derb*rai;
1430             vtot     -= 0.5*e;
1431         }
1432     }
1433
1434     return vtot;
1435
1436 }
1437
1438 real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *born, gmx_localtop_t *top,
1439                       real *dvda, t_mdatoms *md)
1440 {
1441     int  ai, i, at0, at1;
1442     real e, es, rai, rbi, term, probe, tmp, factor;
1443     real rbi_inv, rbi_inv2;
1444
1445     /* To keep the compiler happy */
1446     factor = 0;
1447
1448     if (PARTDECOMP(cr))
1449     {
1450         pd_at_range(cr, &at0, &at1);
1451     }
1452     else if (DOMAINDECOMP(cr))
1453     {
1454         at0 = 0;
1455         at1 = cr->dd->nat_home;
1456     }
1457     else
1458     {
1459         at0 = 0;
1460         at1 = natoms;
1461     }
1462
1463     /* factor is the surface tension */
1464     factor = born->sa_surface_tension;
1465     /*
1466
1467        // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1468        if(gb_algorithm==egbSTILL)
1469        {
1470           factor=0.0049*100*CAL2JOULE;
1471        }
1472        else
1473        {
1474           factor=0.0054*100*CAL2JOULE;
1475        }
1476      */
1477     /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1478
1479     es    = 0;
1480     probe = 0.14;
1481     term  = M_PI*4;
1482
1483     for (i = at0; i < at1; i++)
1484     {
1485         ai        = i;
1486
1487         if (born->use[ai] == 1)
1488         {
1489             rai       = top->atomtypes.gb_radius[md->typeA[ai]];
1490             rbi_inv   = fr->invsqrta[ai];
1491             rbi_inv2  = rbi_inv * rbi_inv;
1492             tmp       = (rai*rbi_inv2)*(rai*rbi_inv2);
1493             tmp       = tmp*tmp*tmp;
1494             e         = factor*term*(rai+probe)*(rai+probe)*tmp;
1495             dvda[ai]  = dvda[ai] - 6*e*rbi_inv2;
1496             es        = es + e;
1497         }
1498     }
1499
1500     return es;
1501 }
1502
1503
1504
1505 real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[],
1506                        rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born)
1507 {
1508     int          i, k, n, ai, aj, nj0, nj1, n0, n1;
1509     int          shift;
1510     real         shX, shY, shZ;
1511     real         fgb, fij, rb2, rbi, fix1, fiy1, fiz1;
1512     real         ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11, rsq11;
1513     real         rinv11, tx, ty, tz, rbai, rbaj, fgb_ai;
1514     real        *rb;
1515     volatile int idx;
1516
1517     n  = 0;
1518     rb = born->work;
1519
1520     n0 = 0;
1521     n1 = natoms;
1522
1523     if (gb_algorithm == egbSTILL)
1524     {
1525         for (i = n0; i < n1; i++)
1526         {
1527             rbi   = born->bRad[i];
1528             rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
1529         }
1530     }
1531     else if (gb_algorithm == egbHCT)
1532     {
1533         for (i = n0; i < n1; i++)
1534         {
1535             rbi   = born->bRad[i];
1536             rb[i] = rbi * rbi * dvda[i];
1537         }
1538     }
1539     else if (gb_algorithm == egbOBC)
1540     {
1541         for (i = n0; i < n1; i++)
1542         {
1543             rbi   = born->bRad[i];
1544             rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
1545         }
1546     }
1547
1548     for (i = 0; i < nl->nri; i++)
1549     {
1550         ai   = nl->iinr[i];
1551
1552         nj0  = nl->jindex[i];
1553         nj1  = nl->jindex[i+1];
1554
1555         /* Load shifts for this list */
1556         shift   = nl->shift[i];
1557         shX     = shift_vec[shift][0];
1558         shY     = shift_vec[shift][1];
1559         shZ     = shift_vec[shift][2];
1560
1561         /* Load atom i coordinates, add shift vectors */
1562         ix1  = shX + x[ai][0];
1563         iy1  = shY + x[ai][1];
1564         iz1  = shZ + x[ai][2];
1565
1566         fix1 = 0;
1567         fiy1 = 0;
1568         fiz1 = 0;
1569
1570         rbai = rb[ai];
1571
1572         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
1573         {
1574             aj = nl->jjnr[k];
1575
1576             jx1     = x[aj][0];
1577             jy1     = x[aj][1];
1578             jz1     = x[aj][2];
1579
1580             dx11    = ix1 - jx1;
1581             dy11    = iy1 - jy1;
1582             dz11    = iz1 - jz1;
1583
1584             rbaj    = rb[aj];
1585
1586             fgb     = rbai*dadx[n++];
1587             fgb_ai  = rbaj*dadx[n++];
1588
1589             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1590             fgb     = fgb + fgb_ai;
1591
1592             tx      = fgb * dx11;
1593             ty      = fgb * dy11;
1594             tz      = fgb * dz11;
1595
1596             fix1    = fix1 + tx;
1597             fiy1    = fiy1 + ty;
1598             fiz1    = fiz1 + tz;
1599
1600             /* Update force on atom aj */
1601             t[aj][0] = t[aj][0] - tx;
1602             t[aj][1] = t[aj][1] - ty;
1603             t[aj][2] = t[aj][2] - tz;
1604         }
1605
1606         /* Update force and shift forces on atom ai */
1607         t[ai][0] = t[ai][0] + fix1;
1608         t[ai][1] = t[ai][1] + fiy1;
1609         t[ai][2] = t[ai][2] + fiz1;
1610
1611         fshift[shift][0] = fshift[shift][0] + fix1;
1612         fshift[shift][1] = fshift[shift][1] + fiy1;
1613         fshift[shift][2] = fshift[shift][2] + fiz1;
1614
1615     }
1616
1617     return 0;
1618 }
1619
1620
1621 void
1622 calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top,
1623                rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
1624                const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
1625 {
1626     real v = 0;
1627     int  cnt;
1628     int  i;
1629
1630     /* PBC or not? */
1631     const t_pbc *pbc_null;
1632
1633     if (fr->bMolPBC)
1634     {
1635         pbc_null = pbc;
1636     }
1637     else
1638     {
1639         pbc_null = NULL;
1640     }
1641
1642     if (sa_algorithm == esaAPPROX)
1643     {
1644         /* Do a simple ACE type approximation for the non-polar solvation */
1645         enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr, born->nr, born, top, fr->dvda, md);
1646     }
1647
1648     /* Calculate the bonded GB-interactions using either table or analytical formula */
1649     enerd->term[F_GBPOL]       += gb_bonds_tab(x, f, fr->fshift, md->chargeA, &(fr->gbtabscale),
1650                                                fr->invsqrta, fr->dvda, fr->gbtab.data, idef, born->epsilon_r, born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1651
1652     /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1653     enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr, born->nr, md->chargeA, born, fr->dvda, fr->epsfac);
1654
1655     /* If parallel, sum the derivative of the potential w.r.t the born radii */
1656     if (PARTDECOMP(cr))
1657     {
1658         gmx_sum(md->nr, fr->dvda, cr);
1659     }
1660     else if (DOMAINDECOMP(cr))
1661     {
1662         dd_atom_sum_real(cr->dd, fr->dvda);
1663         dd_atom_spread_real(cr->dd, fr->dvda);
1664     }
1665
1666     if (fr->bAllvsAll)
1667     {
1668 #if 0 && defined (GMX_X86_SSE2)
1669         if (fr->use_acceleration)
1670         {
1671 #  ifdef GMX_DOUBLE
1672             genborn_allvsall_calc_chainrule_sse2_double(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1673 #  else
1674             genborn_allvsall_calc_chainrule_sse2_single(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1675 #  endif
1676         }
1677         else
1678         {
1679             genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1680         }
1681 #else
1682         genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1683 #endif
1684         cnt = md->homenr*(md->nr/2+1);
1685         /* 9 flops for outer loop, 15 for inner */
1686         inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15);
1687         return;
1688     }
1689
1690 #if 0 && defined (GMX_X86_SSE2)
1691     if (fr->use_acceleration)
1692     {
1693 #  ifdef GMX_DOUBLE
1694         calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
1695                                       f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
1696 #  else
1697         calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
1698                                       f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
1699 #  endif
1700     }
1701     else
1702     {
1703         calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
1704                           x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
1705     }
1706 #else
1707     calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
1708                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born);
1709 #endif
1710
1711     if (!fr->bAllvsAll)
1712     {
1713         /* 9 flops for outer loop, 15 for inner */
1714         inc_nrnb(nrnb, eNR_BORN_CHAINRULE, fr->gblist.nri*9+fr->gblist.nrj*15);
1715     }
1716 }
1717
1718 static void add_j_to_gblist(gbtmpnbl_t *list, int aj)
1719 {
1720     if (list->naj >= list->aj_nalloc)
1721     {
1722         list->aj_nalloc = over_alloc_large(list->naj+1);
1723         srenew(list->aj, list->aj_nalloc);
1724     }
1725
1726     list->aj[list->naj++] = aj;
1727 }
1728
1729 static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists, int shift)
1730 {
1731     int ind, i;
1732
1733     /* Search the list with the same shift, if there is one */
1734     ind = 0;
1735     while (ind < lists->nlist && shift != lists->list[ind].shift)
1736     {
1737         ind++;
1738     }
1739     if (ind == lists->nlist)
1740     {
1741         if (lists->nlist == lists->list_nalloc)
1742         {
1743             lists->list_nalloc++;
1744             srenew(lists->list, lists->list_nalloc);
1745             for (i = lists->nlist; i < lists->list_nalloc; i++)
1746             {
1747                 lists->list[i].aj        = NULL;
1748                 lists->list[i].aj_nalloc = 0;
1749             }
1750
1751         }
1752
1753         lists->list[lists->nlist].shift = shift;
1754         lists->list[lists->nlist].naj   = 0;
1755         lists->nlist++;
1756     }
1757
1758     return &lists->list[ind];
1759 }
1760
1761 static void add_bondeds_to_gblist(t_ilist *il,
1762                                   gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x,
1763                                   struct gbtmpnbls *nls)
1764 {
1765     int         ind, j, ai, aj, shift, found;
1766     rvec        dx;
1767     ivec        dt;
1768     gbtmpnbl_t *list;
1769
1770     shift = CENTRAL;
1771     for (ind = 0; ind < il->nr; ind += 3)
1772     {
1773         ai = il->iatoms[ind+1];
1774         aj = il->iatoms[ind+2];
1775
1776         shift = CENTRAL;
1777         if (g != NULL)
1778         {
1779             rvec_sub(x[ai], x[aj], dx);
1780             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
1781             shift = IVEC2IS(dt);
1782         }
1783         else if (bMolPBC)
1784         {
1785             shift = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
1786         }
1787
1788         /* Find the list for this shift or create one */
1789         list = find_gbtmplist(&nls[ai], shift);
1790
1791         found = 0;
1792
1793         /* So that we do not add the same bond twice.
1794          * This happens with some constraints between 1-3 atoms
1795          * that are in the bond-list but should not be in the GB nb-list */
1796         for (j = 0; j < list->naj; j++)
1797         {
1798             if (list->aj[j] == aj)
1799             {
1800                 found = 1;
1801             }
1802         }
1803
1804         if (found == 0)
1805         {
1806             if (ai == aj)
1807             {
1808                 gmx_incons("ai == aj");
1809             }
1810
1811             add_j_to_gblist(list, aj);
1812         }
1813     }
1814 }
1815
1816 static int
1817 compare_int (const void * a, const void * b)
1818 {
1819     return ( *(int*)a - *(int*)b );
1820 }
1821
1822
1823
1824 int make_gb_nblist(t_commrec *cr, int gb_algorithm,
1825                    rvec x[], matrix box,
1826                    t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
1827 {
1828     int               i, l, ii, j, k, n, nj0, nj1, ai, aj, at0, at1, found, shift, s;
1829     int               apa;
1830     t_nblist         *nblist;
1831     t_pbc             pbc;
1832
1833     struct gbtmpnbls *nls;
1834     gbtmpnbl_t       *list = NULL;
1835
1836     set_pbc(&pbc, fr->ePBC, box);
1837     nls   = born->nblist_work;
1838
1839     for (i = 0; i < born->nr; i++)
1840     {
1841         nls[i].nlist = 0;
1842     }
1843
1844     if (fr->bMolPBC)
1845     {
1846         set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
1847     }
1848
1849     switch (gb_algorithm)
1850     {
1851         case egbHCT:
1852         case egbOBC:
1853             /* Loop over 1-2, 1-3 and 1-4 interactions */
1854             for (j = F_GB12; j <= F_GB14; j++)
1855             {
1856                 add_bondeds_to_gblist(&idef->il[j], fr->bMolPBC, &pbc, graph, x, nls);
1857             }
1858             break;
1859         case egbSTILL:
1860             /* Loop over 1-4 interactions */
1861             add_bondeds_to_gblist(&idef->il[F_GB14], fr->bMolPBC, &pbc, graph, x, nls);
1862             break;
1863         default:
1864             gmx_incons("Unknown GB algorithm");
1865     }
1866
1867     /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1868     for (n = 0; (n < fr->nnblists); n++)
1869     {
1870         for (i = 0; (i < eNL_NR); i++)
1871         {
1872             nblist = &(fr->nblists[n].nlist_sr[i]);
1873
1874             if (nblist->nri > 0 && (i == eNL_VDWQQ || i == eNL_QQ))
1875             {
1876                 for (j = 0; j < nblist->nri; j++)
1877                 {
1878                     ai    = nblist->iinr[j];
1879                     shift = nblist->shift[j];
1880
1881                     /* Find the list for this shift or create one */
1882                     list = find_gbtmplist(&nls[ai], shift);
1883
1884                     nj0 = nblist->jindex[j];
1885                     nj1 = nblist->jindex[j+1];
1886
1887                     /* Add all the j-atoms in the non-bonded list to the GB list */
1888                     for (k = nj0; k < nj1; k++)
1889                     {
1890                         add_j_to_gblist(list, nblist->jjnr[k]);
1891                     }
1892                 }
1893             }
1894         }
1895     }
1896
1897     /* Zero out some counters */
1898     fr->gblist.nri = 0;
1899     fr->gblist.nrj = 0;
1900
1901     fr->gblist.jindex[0] = fr->gblist.nri;
1902
1903     for (i = 0; i < fr->natoms_force; i++)
1904     {
1905         for (s = 0; s < nls[i].nlist; s++)
1906         {
1907             list = &nls[i].list[s];
1908
1909             /* Only add those atoms that actually have neighbours */
1910             if (born->use[i] != 0)
1911             {
1912                 fr->gblist.iinr[fr->gblist.nri]  = i;
1913                 fr->gblist.shift[fr->gblist.nri] = list->shift;
1914                 fr->gblist.nri++;
1915
1916                 for (k = 0; k < list->naj; k++)
1917                 {
1918                     /* Memory allocation for jjnr */
1919                     if (fr->gblist.nrj >= fr->gblist.maxnrj)
1920                     {
1921                         fr->gblist.maxnrj += over_alloc_large(fr->gblist.maxnrj);
1922
1923                         if (debug)
1924                         {
1925                             fprintf(debug, "Increasing GB neighbourlist j size to %d\n", fr->gblist.maxnrj);
1926                         }
1927
1928                         srenew(fr->gblist.jjnr, fr->gblist.maxnrj);
1929                     }
1930
1931                     /* Put in list */
1932                     if (i == list->aj[k])
1933                     {
1934                         gmx_incons("i == list->aj[k]");
1935                     }
1936                     fr->gblist.jjnr[fr->gblist.nrj++] = list->aj[k];
1937                 }
1938
1939                 fr->gblist.jindex[fr->gblist.nri] = fr->gblist.nrj;
1940             }
1941         }
1942     }
1943
1944
1945 #ifdef SORT_GB_LIST
1946     for (i = 0; i < fr->gblist.nri; i++)
1947     {
1948         nj0 = fr->gblist.jindex[i];
1949         nj1 = fr->gblist.jindex[i+1];
1950         ai  = fr->gblist.iinr[i];
1951
1952         /* Temporary fix */
1953         for (j = nj0; j < nj1; j++)
1954         {
1955             if (fr->gblist.jjnr[j] < ai)
1956             {
1957                 fr->gblist.jjnr[j] += fr->natoms_force;
1958             }
1959         }
1960         qsort(fr->gblist.jjnr+nj0, nj1-nj0, sizeof(int), compare_int);
1961         /* Fix back */
1962         for (j = nj0; j < nj1; j++)
1963         {
1964             if (fr->gblist.jjnr[j] >= fr->natoms_force)
1965             {
1966                 fr->gblist.jjnr[j] -= fr->natoms_force;
1967             }
1968         }
1969
1970     }
1971 #endif
1972
1973     return 0;
1974 }
1975
1976 void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm)
1977 {
1978     int           i, at0, at1;
1979     gmx_domdec_t *dd = NULL;
1980
1981     if (DOMAINDECOMP(cr))
1982     {
1983         dd  = cr->dd;
1984         at0 = 0;
1985         at1 = dd->nat_tot;
1986     }
1987     else
1988     {
1989         /* Single node or particle decomp (global==local), just copy pointers and return */
1990         if (gb_algorithm == egbSTILL)
1991         {
1992             born->gpol      = born->gpol_globalindex;
1993             born->vsolv     = born->vsolv_globalindex;
1994             born->gb_radius = born->gb_radius_globalindex;
1995         }
1996         else
1997         {
1998             born->param     = born->param_globalindex;
1999             born->gb_radius = born->gb_radius_globalindex;
2000         }
2001
2002         born->use = born->use_globalindex;
2003
2004         return;
2005     }
2006
2007     /* Reallocation of local arrays if necessary */
2008     /* fr->natoms_force is equal to dd->nat_tot */
2009     if (DOMAINDECOMP(cr) && dd->nat_tot > born->nalloc)
2010     {
2011         int nalloc;
2012
2013         nalloc = dd->nat_tot;
2014
2015         /* Arrays specific to different gb algorithms */
2016         if (gb_algorithm == egbSTILL)
2017         {
2018             srenew(born->gpol,  nalloc+3);
2019             srenew(born->vsolv, nalloc+3);
2020             srenew(born->gb_radius, nalloc+3);
2021             for (i = born->nalloc; (i < nalloc+3); i++)
2022             {
2023                 born->gpol[i]      = 0;
2024                 born->vsolv[i]     = 0;
2025                 born->gb_radius[i] = 0;
2026             }
2027         }
2028         else
2029         {
2030             srenew(born->param, nalloc+3);
2031             srenew(born->gb_radius, nalloc+3);
2032             for (i = born->nalloc; (i < nalloc+3); i++)
2033             {
2034                 born->param[i]     = 0;
2035                 born->gb_radius[i] = 0;
2036             }
2037         }
2038
2039         /* All gb-algorithms use the array for vsites exclusions */
2040         srenew(born->use,    nalloc+3);
2041         for (i = born->nalloc; (i < nalloc+3); i++)
2042         {
2043             born->use[i] = 0;
2044         }
2045
2046         born->nalloc = nalloc;
2047     }
2048
2049     /* With dd, copy algorithm specific arrays */
2050     if (gb_algorithm == egbSTILL)
2051     {
2052         for (i = at0; i < at1; i++)
2053         {
2054             born->gpol[i]      = born->gpol_globalindex[dd->gatindex[i]];
2055             born->vsolv[i]     = born->vsolv_globalindex[dd->gatindex[i]];
2056             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2057             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2058         }
2059     }
2060     else
2061     {
2062         for (i = at0; i < at1; i++)
2063         {
2064             born->param[i]     = born->param_globalindex[dd->gatindex[i]];
2065             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2066             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2067         }
2068     }
2069 }