src/gromacs/mdlib/genborn.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2008, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43 #include <string.h>
  44
  45 #include "typedefs.h"
  46 #include "smalloc.h"
  47 #include "genborn.h"
  48 #include "vec.h"
  49 #include "gromacs/fileio/pdbio.h"
  50 #include "names.h"
  51 #include "physics.h"
  52 #include "partdec.h"
  53 #include "domdec.h"
  54 #include "network.h"
  55 #include "gmx_fatal.h"
  56 #include "mtop_util.h"
  57 #include "pbc.h"
  58 #include "nrnb.h"
  59 #include "bondf.h"
  60
  61 #include "gromacs/utility/gmxmpi.h"
  62
  63 #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
  64 #  ifdef GMX_DOUBLE
  65 #    include "genborn_sse2_double.h"
  66 #    include "genborn_allvsall_sse2_double.h"
  67 #  else
  68 #    include "genborn_sse2_single.h"
  69 #    include "genborn_allvsall_sse2_single.h"
  70 #  endif /* GMX_DOUBLE */
  71 #endif   /* SSE or AVX present */
  72
  73 #include "genborn_allvsall.h"
  74
  75 /*#define DISABLE_SSE*/
  76
  77 typedef struct {
  78     int  shift;
  79     int  naj;
  80     int *aj;
  81     int  aj_nalloc;
  82 } gbtmpnbl_t;
  83
  84 typedef struct gbtmpnbls {
  85     int         nlist;
  86     gbtmpnbl_t *list;
  87     int         list_nalloc;
  88 } t_gbtmpnbls;
  89
  90 /* This function is exactly the same as the one in bondfree.c. The reason
  91  * it is copied here is that the bonded gb-interactions are evaluated
  92  * not in calc_bonds, but rather in calc_gb_forces
  93  */
  94 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
  95 {
  96     if (pbc)
  97     {
  98         return pbc_dx_aiuc(pbc, xi, xj, dx);
  99     }
 100     else
 101     {
 102         rvec_sub(xi, xj, dx);
 103         return CENTRAL;
 104     }
 105 }
 106
 107 int init_gb_nblist(int natoms, t_nblist *nl)
 108 {
 109     nl->maxnri      = natoms*4;
 110     nl->maxnrj      = 0;
 111     nl->maxlen      = 0;
 112     nl->nri         = 0;
 113     nl->nrj         = 0;
 114     nl->iinr        = NULL;
 115     nl->gid         = NULL;
 116     nl->shift       = NULL;
 117     nl->jindex      = NULL;
 118     nl->jjnr        = NULL;
 119     /*nl->nltype      = nltype;*/
 120
 121     srenew(nl->iinr,   nl->maxnri);
 122     srenew(nl->gid,    nl->maxnri);
 123     srenew(nl->shift,  nl->maxnri);
 124     srenew(nl->jindex, nl->maxnri+1);
 125
 126     nl->jindex[0] = 0;
 127
 128     return 0;
 129 }
 130
 131 void gb_pd_send(t_commrec gmx_unused *cr, real gmx_unused *send_data, int gmx_unused nr)
 132 {
 133 #ifdef GMX_MPI
 134     int  i, cur;
 135     int *index, *sendc, *disp;
 136
 137     snew(sendc, cr->nnodes);
 138     snew(disp, cr->nnodes);
 139
 140     index = pd_index(cr);
 141     cur   = cr->nodeid;
 142
 143     /* Setup count/index arrays */
 144     for (i = 0; i < cr->nnodes; i++)
 145     {
 146         sendc[i]  = index[i+1]-index[i];
 147         disp[i]   = index[i];
 148     }
 149
 150     /* Do communication */
 151     MPI_Gatherv(send_data+index[cur], sendc[cur], GMX_MPI_REAL, send_data, sendc,
 152                 disp, GMX_MPI_REAL, 0, cr->mpi_comm_mygroup);
 153     MPI_Bcast(send_data, nr, GMX_MPI_REAL, 0, cr->mpi_comm_mygroup);
 154
 155 #endif
 156 }
 157
 158
 159 int init_gb_still(const t_commrec *cr,
 160                   const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
 161                   gmx_genborn_t *born, int natoms)
 162 {
 163
 164     int   i, j, i1, i2, k, m, nbond, nang, ia, ib, ic, id, nb, idx, idx2, at;
 165     int   iam, ibm;
 166     int   at0, at1;
 167     real  length, angle;
 168     real  r, ri, rj, ri2, ri3, rj2, r2, r3, r4, rk, ratio, term, h, doffset;
 169     real  p1, p2, p3, factor, cosine, rab, rbc;
 170
 171     real *vsol;
 172     real *gp;
 173
 174     snew(vsol, natoms);
 175     snew(gp, natoms);
 176     snew(born->gpol_still_work, natoms+3);
 177
 178     if (PAR(cr))
 179     {
 180         if (PARTDECOMP(cr))
 181         {
 182             pd_at_range(cr, &at0, &at1);
 183
 184             for (i = 0; i < natoms; i++)
 185             {
 186                 vsol[i] = gp[i] = 0;
 187             }
 188         }
 189         else
 190         {
 191             at0 = 0;
 192             at1 = natoms;
 193         }
 194     }
 195     else
 196     {
 197         at0 = 0;
 198         at1 = natoms;
 199     }
 200
 201     doffset = born->gb_doffset;
 202
 203     for (i = 0; i < natoms; i++)
 204     {
 205         born->gpol_globalindex[i]              = born->vsolv_globalindex[i] =
 206                 born->gb_radius_globalindex[i] = 0;
 207     }
 208
 209     /* Compute atomic solvation volumes for Still method */
 210     for (i = 0; i < natoms; i++)
 211     {
 212         ri = atype->gb_radius[atoms->atom[i].type];
 213         born->gb_radius_globalindex[i] = ri;
 214         r3 = ri*ri*ri;
 215         born->vsolv_globalindex[i] = (4*M_PI/3)*r3;
 216     }
 217
 218     for (j = 0; j < idef->il[F_GB12].nr; j += 3)
 219     {
 220         m  = idef->il[F_GB12].iatoms[j];
 221         ia = idef->il[F_GB12].iatoms[j+1];
 222         ib = idef->il[F_GB12].iatoms[j+2];
 223
 224         r = 1.01*idef->iparams[m].gb.st;
 225
 226         ri   = atype->gb_radius[atoms->atom[ia].type];
 227         rj   = atype->gb_radius[atoms->atom[ib].type];
 228
 229         ri2  = ri*ri;
 230         ri3  = ri2*ri;
 231         rj2  = rj*rj;
 232
 233         ratio  = (rj2-ri2-r*r)/(2*ri*r);
 234         h      = ri*(1+ratio);
 235         term   = (M_PI/3.0)*h*h*(3.0*ri-h);
 236
 237         if (PARTDECOMP(cr))
 238         {
 239             vsol[ia] += term;
 240         }
 241         else
 242         {
 243             born->vsolv_globalindex[ia] -= term;
 244         }
 245
 246         ratio  = (ri2-rj2-r*r)/(2*rj*r);
 247         h      = rj*(1+ratio);
 248         term   = (M_PI/3.0)*h*h*(3.0*rj-h);
 249
 250         if (PARTDECOMP(cr))
 251         {
 252             vsol[ib] += term;
 253         }
 254         else
 255         {
 256             born->vsolv_globalindex[ib] -= term;
 257         }
 258     }
 259
 260     if (PARTDECOMP(cr))
 261     {
 262         gmx_sum(natoms, vsol, cr);
 263
 264         for (i = 0; i < natoms; i++)
 265         {
 266             born->vsolv_globalindex[i] = born->vsolv_globalindex[i]-vsol[i];
 267         }
 268     }
 269
 270     /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
 271        method */
 272     /* Self */
 273     for (j = 0; j < natoms; j++)
 274     {
 275         if (born->use_globalindex[j] == 1)
 276         {
 277             born->gpol_globalindex[j] = -0.5*ONE_4PI_EPS0/
 278                 (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1);
 279         }
 280     }
 281
 282     /* 1-2 */
 283     for (j = 0; j < idef->il[F_GB12].nr; j += 3)
 284     {
 285         m  = idef->il[F_GB12].iatoms[j];
 286         ia = idef->il[F_GB12].iatoms[j+1];
 287         ib = idef->il[F_GB12].iatoms[j+2];
 288
 289         r = idef->iparams[m].gb.st;
 290
 291         r4 = r*r*r*r;
 292
 293         if (PARTDECOMP(cr))
 294         {
 295             gp[ia] += STILL_P2*born->vsolv_globalindex[ib]/r4;
 296             gp[ib] += STILL_P2*born->vsolv_globalindex[ia]/r4;
 297         }
 298         else
 299         {
 300             born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
 301                 STILL_P2*born->vsolv_globalindex[ib]/r4;
 302             born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
 303                 STILL_P2*born->vsolv_globalindex[ia]/r4;
 304         }
 305     }
 306
 307     /* 1-3 */
 308     for (j = 0; j < idef->il[F_GB13].nr; j += 3)
 309     {
 310         m  = idef->il[F_GB13].iatoms[j];
 311         ia = idef->il[F_GB13].iatoms[j+1];
 312         ib = idef->il[F_GB13].iatoms[j+2];
 313
 314         r  = idef->iparams[m].gb.st;
 315         r4 = r*r*r*r;
 316
 317         if (PARTDECOMP(cr))
 318         {
 319             gp[ia] += STILL_P3*born->vsolv[ib]/r4;
 320             gp[ib] += STILL_P3*born->vsolv[ia]/r4;
 321         }
 322         else
 323         {
 324             born->gpol_globalindex[ia] = born->gpol_globalindex[ia]+
 325                 STILL_P3*born->vsolv_globalindex[ib]/r4;
 326             born->gpol_globalindex[ib] = born->gpol_globalindex[ib]+
 327                 STILL_P3*born->vsolv_globalindex[ia]/r4;
 328         }
 329     }
 330
 331     if (PARTDECOMP(cr))
 332     {
 333         gmx_sum(natoms, gp, cr);
 334
 335         for (i = 0; i < natoms; i++)
 336         {
 337             born->gpol_globalindex[i] = born->gpol_globalindex[i]+gp[i];
 338         }
 339     }
 340
 341     sfree(vsol);
 342     sfree(gp);
 343
 344     return 0;
 345 }
 346
 347 /* Initialize all GB datastructs and compute polarization energies */
 348 int init_gb(gmx_genborn_t **p_born,
 349             const t_commrec *cr, t_forcerec *fr, const t_inputrec *ir,
 350             const gmx_mtop_t *mtop, int gb_algorithm)
 351 {
 352     int             i, j, m, ai, aj, jj, natoms, nalloc;
 353     real            rai, sk, p, doffset;
 354
 355     t_atoms         atoms;
 356     gmx_genborn_t  *born;
 357     gmx_localtop_t *localtop;
 358
 359     natoms   = mtop->natoms;
 360
 361     atoms    = gmx_mtop_global_atoms(mtop);
 362     localtop = gmx_mtop_generate_local_top(mtop, ir);
 363
 364     snew(born, 1);
 365     *p_born = born;
 366
 367     born->nr  = natoms;
 368
 369     snew(born->drobc, natoms);
 370     snew(born->bRad,  natoms);
 371
 372     /* Allocate memory for the global data arrays */
 373     snew(born->param_globalindex, natoms+3);
 374     snew(born->gpol_globalindex,  natoms+3);
 375     snew(born->vsolv_globalindex, natoms+3);
 376     snew(born->gb_radius_globalindex, natoms+3);
 377     snew(born->use_globalindex,    natoms+3);
 378
 379     snew(fr->invsqrta, natoms);
 380     snew(fr->dvda,     natoms);
 381
 382     fr->dadx              = NULL;
 383     fr->dadx_rawptr       = NULL;
 384     fr->nalloc_dadx       = 0;
 385     born->gpol_still_work = NULL;
 386     born->gpol_hct_work   = NULL;
 387
 388     /* snew(born->asurf,natoms); */
 389     /* snew(born->dasurf,natoms); */
 390
 391     /* Initialize the gb neighbourlist */
 392     init_gb_nblist(natoms, &(fr->gblist));
 393
 394     /* Do the Vsites exclusions (if any) */
 395     for (i = 0; i < natoms; i++)
 396     {
 397         jj = atoms.atom[i].type;
 398         if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0)
 399         {
 400             born->use_globalindex[i] = 1;
 401         }
 402         else
 403         {
 404             born->use_globalindex[i] = 0;
 405         }
 406
 407         /* If we have a Vsite, put vs_globalindex[i]=0 */
 408         if (C6 (fr->nbfp, fr->ntype, jj, jj) == 0 &&
 409             C12(fr->nbfp, fr->ntype, jj, jj) == 0 &&
 410             atoms.atom[i].q == 0)
 411         {
 412             born->use_globalindex[i] = 0;
 413         }
 414     }
 415
 416     /* Copy algorithm parameters from inputrecord to local structure */
 417     born->obc_alpha          = ir->gb_obc_alpha;
 418     born->obc_beta           = ir->gb_obc_beta;
 419     born->obc_gamma          = ir->gb_obc_gamma;
 420     born->gb_doffset         = ir->gb_dielectric_offset;
 421     born->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 422     born->epsilon_r          = ir->epsilon_r;
 423
 424     doffset = born->gb_doffset;
 425
 426     /* Set the surface tension */
 427     born->sa_surface_tension = ir->sa_surface_tension;
 428
 429     /* If Still model, initialise the polarisation energies */
 430     if (gb_algorithm == egbSTILL)
 431     {
 432         init_gb_still(cr, &(mtop->atomtypes), &(localtop->idef), &atoms,
 433                       born, natoms);
 434     }
 435
 436
 437     /* If HCT/OBC,  precalculate the sk*atype->S_hct factors */
 438     else if (gb_algorithm == egbHCT || gb_algorithm == egbOBC)
 439     {
 440
 441         snew(born->gpol_hct_work, natoms+3);
 442
 443         for (i = 0; i < natoms; i++)
 444         {
 445             if (born->use_globalindex[i] == 1)
 446             {
 447                 rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset;
 448                 sk  = rai * mtop->atomtypes.S_hct[atoms.atom[i].type];
 449                 born->param_globalindex[i]     = sk;
 450                 born->gb_radius_globalindex[i] = rai;
 451             }
 452             else
 453             {
 454                 born->param_globalindex[i]     = 0;
 455                 born->gb_radius_globalindex[i] = 0;
 456             }
 457         }
 458     }
 459
 460     /* Allocate memory for work arrays for temporary use */
 461     snew(born->work, natoms+4);
 462     snew(born->count, natoms);
 463     snew(born->nblist_work, natoms);
 464
 465     /* Domain decomposition specific stuff */
 466     born->nalloc = 0;
 467
 468     return 0;
 469 }
 470
 471
 472
 473 static int
 474 calc_gb_rad_still(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 475                   rvec x[], t_nblist *nl,
 476                   gmx_genborn_t *born, t_mdatoms *md)
 477 {
 478     int  i, k, n, nj0, nj1, ai, aj, type;
 479     int  shift;
 480     real shX, shY, shZ;
 481     real gpi, dr, dr2, dr4, idr4, rvdw, ratio, ccf, theta, term, rai, raj;
 482     real ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 483     real rinv, idr2, idr6, vaj, dccf, cosq, sinq, prod, gpi2;
 484     real factor;
 485     real vai, prod_ai, icf4, icf6;
 486
 487     factor  = 0.5*ONE_4PI_EPS0;
 488     n       = 0;
 489
 490     for (i = 0; i < born->nr; i++)
 491     {
 492         born->gpol_still_work[i] = 0;
 493     }
 494
 495     for (i = 0; i < nl->nri; i++)
 496     {
 497         ai      = nl->iinr[i];
 498
 499         nj0     = nl->jindex[i];
 500         nj1     = nl->jindex[i+1];
 501
 502         /* Load shifts for this list */
 503         shift   = nl->shift[i];
 504         shX     = fr->shift_vec[shift][0];
 505         shY     = fr->shift_vec[shift][1];
 506         shZ     = fr->shift_vec[shift][2];
 507
 508         gpi     = 0;
 509
 510         rai     = top->atomtypes.gb_radius[md->typeA[ai]];
 511         vai     = born->vsolv[ai];
 512         prod_ai = STILL_P4*vai;
 513
 514         /* Load atom i coordinates, add shift vectors */
 515         ix1     = shX + x[ai][0];
 516         iy1     = shY + x[ai][1];
 517         iz1     = shZ + x[ai][2];
 518
 519         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 520         {
 521             aj    = nl->jjnr[k];
 522             jx1   = x[aj][0];
 523             jy1   = x[aj][1];
 524             jz1   = x[aj][2];
 525
 526             dx11  = ix1-jx1;
 527             dy11  = iy1-jy1;
 528             dz11  = iz1-jz1;
 529
 530             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 531             rinv  = gmx_invsqrt(dr2);
 532             idr2  = rinv*rinv;
 533             idr4  = idr2*idr2;
 534             idr6  = idr4*idr2;
 535
 536             raj = top->atomtypes.gb_radius[md->typeA[aj]];
 537
 538             rvdw  = rai + raj;
 539
 540             ratio = dr2 / (rvdw * rvdw);
 541             vaj   = born->vsolv[aj];
 542
 543             if (ratio > STILL_P5INV)
 544             {
 545                 ccf  = 1.0;
 546                 dccf = 0.0;
 547             }
 548             else
 549             {
 550                 theta = ratio*STILL_PIP5;
 551                 cosq  = cos(theta);
 552                 term  = 0.5*(1.0-cosq);
 553                 ccf   = term*term;
 554                 sinq  = 1.0 - cosq*cosq;
 555                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 556             }
 557
 558             prod                       = STILL_P4*vaj;
 559             icf4                       = ccf*idr4;
 560             icf6                       = (4*ccf-dccf)*idr6;
 561             born->gpol_still_work[aj] += prod_ai*icf4;
 562             gpi                        = gpi+prod*icf4;
 563
 564             /* Save ai->aj and aj->ai chain rule terms */
 565             fr->dadx[n++]   = prod*icf6;
 566             fr->dadx[n++]   = prod_ai*icf6;
 567         }
 568         born->gpol_still_work[ai] += gpi;
 569     }
 570
 571     /* Parallel summations */
 572     if (PARTDECOMP(cr))
 573     {
 574         gmx_sum(natoms, born->gpol_still_work, cr);
 575     }
 576     else if (DOMAINDECOMP(cr))
 577     {
 578         dd_atom_sum_real(cr->dd, born->gpol_still_work);
 579     }
 580
 581     /* Calculate the radii */
 582     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
 583     {
 584         if (born->use[i] != 0)
 585         {
 586             gpi             = born->gpol[i]+born->gpol_still_work[i];
 587             gpi2            = gpi * gpi;
 588             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 589             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 590         }
 591     }
 592
 593     /* Extra communication required for DD */
 594     if (DOMAINDECOMP(cr))
 595     {
 596         dd_atom_spread_real(cr->dd, born->bRad);
 597         dd_atom_spread_real(cr->dd, fr->invsqrta);
 598     }
 599
 600     return 0;
 601
 602 }
 603
 604
 605 static int
 606 calc_gb_rad_hct(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 607                 rvec x[], t_nblist *nl,
 608                 gmx_genborn_t *born, t_mdatoms *md)
 609 {
 610     int   i, k, n, ai, aj, nj0, nj1, at0, at1;
 611     int   shift;
 612     real  shX, shY, shZ;
 613     real  rai, raj, gpi, dr2, dr, sk, sk_ai, sk2, sk2_ai, lij, uij, diff2, tmp, sum_ai;
 614     real  rad, min_rad, rinv, rai_inv;
 615     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 616     real  lij2, uij2, lij3, uij3, t1, t2, t3;
 617     real  lij_inv, dlij, duij, sk2_rinv, prod, log_term;
 618     real  doffset, raj_inv, dadx_val;
 619     real *gb_radius;
 620
 621     doffset   = born->gb_doffset;
 622     gb_radius = born->gb_radius;
 623
 624     for (i = 0; i < born->nr; i++)
 625     {
 626         born->gpol_hct_work[i] = 0;
 627     }
 628
 629     /* Keep the compiler happy */
 630     n    = 0;
 631     prod = 0;
 632
 633     for (i = 0; i < nl->nri; i++)
 634     {
 635         ai     = nl->iinr[i];
 636
 637         nj0    = nl->jindex[i];
 638         nj1    = nl->jindex[i+1];
 639
 640         /* Load shifts for this list */
 641         shift   = nl->shift[i];
 642         shX     = fr->shift_vec[shift][0];
 643         shY     = fr->shift_vec[shift][1];
 644         shZ     = fr->shift_vec[shift][2];
 645
 646         rai     = gb_radius[ai];
 647         rai_inv = 1.0/rai;
 648
 649         sk_ai   = born->param[ai];
 650         sk2_ai  = sk_ai*sk_ai;
 651
 652         /* Load atom i coordinates, add shift vectors */
 653         ix1     = shX + x[ai][0];
 654         iy1     = shY + x[ai][1];
 655         iz1     = shZ + x[ai][2];
 656
 657         sum_ai  = 0;
 658
 659         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 660         {
 661             aj    = nl->jjnr[k];
 662
 663             jx1   = x[aj][0];
 664             jy1   = x[aj][1];
 665             jz1   = x[aj][2];
 666
 667             dx11  = ix1 - jx1;
 668             dy11  = iy1 - jy1;
 669             dz11  = iz1 - jz1;
 670
 671             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 672             rinv  = gmx_invsqrt(dr2);
 673             dr    = rinv*dr2;
 674
 675             sk    = born->param[aj];
 676             raj   = gb_radius[aj];
 677
 678             /* aj -> ai interaction */
 679             if (rai < dr+sk)
 680             {
 681                 lij     = 1.0/(dr-sk);
 682                 dlij    = 1.0;
 683
 684                 if (rai > dr-sk)
 685                 {
 686                     lij  = rai_inv;
 687                     dlij = 0.0;
 688                 }
 689
 690                 lij2     = lij*lij;
 691                 lij3     = lij2*lij;
 692
 693                 uij      = 1.0/(dr+sk);
 694                 uij2     = uij*uij;
 695                 uij3     = uij2*uij;
 696
 697                 diff2    = uij2-lij2;
 698
 699                 lij_inv  = gmx_invsqrt(lij2);
 700                 sk2      = sk*sk;
 701                 sk2_rinv = sk2*rinv;
 702                 prod     = 0.25*sk2_rinv;
 703
 704                 log_term = log(uij*lij_inv);
 705
 706                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 707                     prod*(-diff2);
 708
 709                 if (rai < sk-dr)
 710                 {
 711                     tmp = tmp + 2.0 * (rai_inv-lij);
 712                 }
 713
 714                 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 715                 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 716                 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 717
 718                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
 719                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
 720                 /* rb2 is moved to chainrule    */
 721
 722                 sum_ai += 0.5*tmp;
 723             }
 724             else
 725             {
 726                 dadx_val = 0.0;
 727             }
 728             fr->dadx[n++] = dadx_val;
 729
 730
 731             /* ai -> aj interaction */
 732             if (raj < dr + sk_ai)
 733             {
 734                 lij     = 1.0/(dr-sk_ai);
 735                 dlij    = 1.0;
 736                 raj_inv = 1.0/raj;
 737
 738                 if (raj > dr-sk_ai)
 739                 {
 740                     lij  = raj_inv;
 741                     dlij = 0.0;
 742                 }
 743
 744                 lij2     = lij  * lij;
 745                 lij3     = lij2 * lij;
 746
 747                 uij      = 1.0/(dr+sk_ai);
 748                 uij2     = uij  * uij;
 749                 uij3     = uij2 * uij;
 750
 751                 diff2    = uij2-lij2;
 752
 753                 lij_inv  = gmx_invsqrt(lij2);
 754                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 755                 sk2_rinv = sk2*rinv;
 756                 prod     = 0.25 * sk2_rinv;
 757
 758                 /* log_term = table_log(uij*lij_inv,born->log_table,
 759                    LOG_TABLE_ACCURACY); */
 760                 log_term = log(uij*lij_inv);
 761
 762                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 763                     prod*(-diff2);
 764
 765                 if (raj < sk_ai-dr)
 766                 {
 767                     tmp     = tmp + 2.0 * (raj_inv-lij);
 768                 }
 769
 770                 /* duij = 1.0 */
 771                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 772                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 773                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 774
 775                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 776                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule    */
 777
 778                 born->gpol_hct_work[aj] += 0.5*tmp;
 779             }
 780             else
 781             {
 782                 dadx_val = 0.0;
 783             }
 784             fr->dadx[n++] = dadx_val;
 785         }
 786
 787         born->gpol_hct_work[ai] += sum_ai;
 788     }
 789
 790     /* Parallel summations */
 791     if (PARTDECOMP(cr))
 792     {
 793         gmx_sum(natoms, born->gpol_hct_work, cr);
 794     }
 795     else if (DOMAINDECOMP(cr))
 796     {
 797         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
 798     }
 799
 800     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
 801     {
 802         if (born->use[i] != 0)
 803         {
 804             rai     = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
 805             sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 806             min_rad = rai + doffset;
 807             rad     = 1.0/sum_ai;
 808
 809             born->bRad[i]   = rad > min_rad ? rad : min_rad;
 810             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 811         }
 812     }
 813
 814     /* Extra communication required for DD */
 815     if (DOMAINDECOMP(cr))
 816     {
 817         dd_atom_spread_real(cr->dd, born->bRad);
 818         dd_atom_spread_real(cr->dd, fr->invsqrta);
 819     }
 820
 821
 822     return 0;
 823 }
 824
 825 static int
 826 calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 827                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md)
 828 {
 829     int   i, k, ai, aj, nj0, nj1, n, at0, at1;
 830     int   shift;
 831     real  shX, shY, shZ;
 832     real  rai, raj, gpi, dr2, dr, sk, sk2, lij, uij, diff2, tmp, sum_ai;
 833     real  rad, min_rad, sum_ai2, sum_ai3, tsum, tchain, rinv, rai_inv, lij_inv, rai_inv2;
 834     real  log_term, prod, sk2_rinv, sk_ai, sk2_ai;
 835     real  ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11;
 836     real  lij2, uij2, lij3, uij3, dlij, duij, t1, t2, t3;
 837     real  doffset, raj_inv, dadx_val;
 838     real *gb_radius;
 839
 840     /* Keep the compiler happy */
 841     n    = 0;
 842     prod = 0;
 843     raj  = 0;
 844
 845     doffset   = born->gb_doffset;
 846     gb_radius = born->gb_radius;
 847
 848     for (i = 0; i < born->nr; i++)
 849     {
 850         born->gpol_hct_work[i] = 0;
 851     }
 852
 853     for (i = 0; i < nl->nri; i++)
 854     {
 855         ai      = nl->iinr[i];
 856
 857         nj0     = nl->jindex[i];
 858         nj1     = nl->jindex[i+1];
 859
 860         /* Load shifts for this list */
 861         shift   = nl->shift[i];
 862         shX     = fr->shift_vec[shift][0];
 863         shY     = fr->shift_vec[shift][1];
 864         shZ     = fr->shift_vec[shift][2];
 865
 866         rai      = gb_radius[ai];
 867         rai_inv  = 1.0/rai;
 868
 869         sk_ai    = born->param[ai];
 870         sk2_ai   = sk_ai*sk_ai;
 871
 872         /* Load atom i coordinates, add shift vectors */
 873         ix1      = shX + x[ai][0];
 874         iy1      = shY + x[ai][1];
 875         iz1      = shZ + x[ai][2];
 876
 877         sum_ai   = 0;
 878
 879         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
 880         {
 881             aj    = nl->jjnr[k];
 882
 883             jx1   = x[aj][0];
 884             jy1   = x[aj][1];
 885             jz1   = x[aj][2];
 886
 887             dx11  = ix1 - jx1;
 888             dy11  = iy1 - jy1;
 889             dz11  = iz1 - jz1;
 890
 891             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 892             rinv  = gmx_invsqrt(dr2);
 893             dr    = dr2*rinv;
 894
 895             /* sk is precalculated in init_gb() */
 896             sk    = born->param[aj];
 897             raj   = gb_radius[aj];
 898
 899             /* aj -> ai interaction */
 900             if (rai < dr+sk)
 901             {
 902                 lij       = 1.0/(dr-sk);
 903                 dlij      = 1.0;
 904
 905                 if (rai > dr-sk)
 906                 {
 907                     lij  = rai_inv;
 908                     dlij = 0.0;
 909                 }
 910
 911                 uij      = 1.0/(dr+sk);
 912                 lij2     = lij  * lij;
 913                 lij3     = lij2 * lij;
 914                 uij2     = uij  * uij;
 915                 uij3     = uij2 * uij;
 916
 917                 diff2    = uij2-lij2;
 918
 919                 lij_inv  = gmx_invsqrt(lij2);
 920                 sk2      = sk*sk;
 921                 sk2_rinv = sk2*rinv;
 922                 prod     = 0.25*sk2_rinv;
 923
 924                 log_term = log(uij*lij_inv);
 925
 926                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 927
 928                 if (rai < sk-dr)
 929                 {
 930                     tmp = tmp + 2.0 * (rai_inv-lij);
 931                 }
 932
 933                 /* duij    = 1.0; */
 934                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 935                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 936                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 937
 938                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 939
 940                 sum_ai += 0.5*tmp;
 941             }
 942             else
 943             {
 944                 dadx_val = 0.0;
 945             }
 946             fr->dadx[n++] = dadx_val;
 947
 948             /* ai -> aj interaction */
 949             if (raj < dr + sk_ai)
 950             {
 951                 lij     = 1.0/(dr-sk_ai);
 952                 dlij    = 1.0;
 953                 raj_inv = 1.0/raj;
 954
 955                 if (raj > dr-sk_ai)
 956                 {
 957                     lij  = raj_inv;
 958                     dlij = 0.0;
 959                 }
 960
 961                 lij2     = lij  * lij;
 962                 lij3     = lij2 * lij;
 963
 964                 uij      = 1.0/(dr+sk_ai);
 965                 uij2     = uij  * uij;
 966                 uij3     = uij2 * uij;
 967
 968                 diff2    = uij2-lij2;
 969
 970                 lij_inv  = gmx_invsqrt(lij2);
 971                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 972                 sk2_rinv = sk2*rinv;
 973                 prod     = 0.25 * sk2_rinv;
 974
 975                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 976                 log_term = log(uij*lij_inv);
 977
 978                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 979
 980                 if (raj < sk_ai-dr)
 981                 {
 982                     tmp     = tmp + 2.0 * (raj_inv-lij);
 983                 }
 984
 985                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 986                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 987                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 988
 989                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 990
 991                 born->gpol_hct_work[aj] += 0.5*tmp;
 992
 993             }
 994             else
 995             {
 996                 dadx_val = 0.0;
 997             }
 998             fr->dadx[n++] = dadx_val;
 999
1000         }
1001         born->gpol_hct_work[ai] += sum_ai;
1002
1003     }
1004
1005     /* Parallel summations */
1006     if (PARTDECOMP(cr))
1007     {
1008         gmx_sum(natoms, born->gpol_hct_work, cr);
1009     }
1010     else if (DOMAINDECOMP(cr))
1011     {
1012         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
1013     }
1014
1015     for (i = 0; i < fr->natoms_force; i++) /* PELA born->nr */
1016     {
1017         if (born->use[i] != 0)
1018         {
1019             rai        = top->atomtypes.gb_radius[md->typeA[i]];
1020             rai_inv2   = 1.0/rai;
1021             rai        = rai-doffset;
1022             rai_inv    = 1.0/rai;
1023             sum_ai     = rai * born->gpol_hct_work[i];
1024             sum_ai2    = sum_ai  * sum_ai;
1025             sum_ai3    = sum_ai2 * sum_ai;
1026
1027             tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
1028             born->bRad[i] = rai_inv - tsum*rai_inv2;
1029             born->bRad[i] = 1.0 / born->bRad[i];
1030
1031             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
1032
1033             tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
1034             born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
1035         }
1036     }
1037
1038     /* Extra (local) communication required for DD */
1039     if (DOMAINDECOMP(cr))
1040     {
1041         dd_atom_spread_real(cr->dd, born->bRad);
1042         dd_atom_spread_real(cr->dd, fr->invsqrta);
1043         dd_atom_spread_real(cr->dd, born->drobc);
1044     }
1045
1046     return 0;
1047
1048 }
1049
1050
1051
1052 int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir, gmx_localtop_t *top,
1053                 rvec x[], t_nblist *nl, gmx_genborn_t *born, t_mdatoms *md, t_nrnb     *nrnb)
1054 {
1055     real *p;
1056     int   cnt;
1057     int   ndadx;
1058
1059     if (fr->bAllvsAll && fr->dadx == NULL)
1060     {
1061         /* We might need up to 8 atoms of padding before and after,
1062          * and another 4 units to guarantee SSE alignment.
1063          */
1064         fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12);
1065         snew(fr->dadx_rawptr, fr->nalloc_dadx);
1066         fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1067     }
1068     else
1069     {
1070         /* In the SSE-enabled gb-loops, when writing to dadx, we
1071          * always write 2*4 elements at a time, even in the case with only
1072          * 1-3 j particles, where we only really need to write 2*(1-3)
1073          * elements. This is because we want dadx to be aligned to a 16-
1074          * byte boundary, and being able to use _mm_store/load_ps
1075          */
1076         ndadx = 2 * (nl->nrj + 3*nl->nri);
1077
1078         /* First, reallocate the dadx array, we need 3 extra for SSE */
1079         if (ndadx + 3 > fr->nalloc_dadx)
1080         {
1081             fr->nalloc_dadx = over_alloc_large(ndadx) + 3;
1082             srenew(fr->dadx_rawptr, fr->nalloc_dadx);
1083             fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1084         }
1085     }
1086
1087     if (fr->bAllvsAll)
1088     {
1089         cnt = md->homenr*(md->nr/2+1);
1090
1091         if (ir->gb_algorithm == egbSTILL)
1092         {
1093 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1094             if (fr->use_simd_kernels)
1095             {
1096 #  ifdef GMX_DOUBLE
1097                 genborn_allvsall_calc_still_radii_sse2_double(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1098 #  else
1099                 genborn_allvsall_calc_still_radii_sse2_single(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1100 #  endif
1101             }
1102             else
1103             {
1104                 genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1105             }
1106 #else
1107             genborn_allvsall_calc_still_radii(fr, md, born, top, x[0], cr, &fr->AllvsAll_workgb);
1108 #endif
1109             /* 13 flops in outer loop, 47 flops in inner loop */
1110             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_STILL, md->homenr*13+cnt*47);
1111         }
1112         else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC)
1113         {
1114 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1115             if (fr->use_simd_kernels)
1116             {
1117 #  ifdef GMX_DOUBLE
1118                 genborn_allvsall_calc_hct_obc_radii_sse2_double(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1119 #  else
1120                 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1121 #  endif
1122             }
1123             else
1124             {
1125                 genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1126             }
1127 #else
1128             genborn_allvsall_calc_hct_obc_radii(fr, md, born, ir->gb_algorithm, top, x[0], cr, &fr->AllvsAll_workgb);
1129 #endif
1130             /* 24 flops in outer loop, 183 in inner */
1131             inc_nrnb(nrnb, eNR_BORN_AVA_RADII_HCT_OBC, md->homenr*24+cnt*183);
1132         }
1133         else
1134         {
1135             gmx_fatal(FARGS, "Bad gb algorithm for all-vs-all interactions");
1136         }
1137         return 0;
1138     }
1139
1140     /* Switch for determining which algorithm to use for Born radii calculation */
1141 #ifdef GMX_DOUBLE
1142
1143 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1144     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1145     switch (ir->gb_algorithm)
1146     {
1147         case egbSTILL:
1148             if (fr->use_simd_kernels)
1149             {
1150                 calc_gb_rad_still_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born);
1151             }
1152             else
1153             {
1154                 calc_gb_rad_still(cr, fr, born->nr, top, atype, x, nl, born, md);
1155             }
1156             break;
1157         case egbHCT:
1158             if (fr->use_simd_kernels)
1159             {
1160                 calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
1161             }
1162             else
1163             {
1164                 calc_gb_rad_hct(cr, fr, born->nr, top, atype, x, nl, born, md);
1165             }
1166             break;
1167         case egbOBC:
1168             if (fr->use_simd_kernels)
1169             {
1170                 calc_gb_rad_hct_obc_sse2_double(cr, fr, born->nr, top, atype, x[0], nl, born, md, ir->gb_algorithm);
1171             }
1172             else
1173             {
1174                 calc_gb_rad_obc(cr, fr, born->nr, top, atype, x, nl, born, md);
1175             }
1176             break;
1177
1178         default:
1179             gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
1180     }
1181 #else
1182     switch (ir->gb_algorithm)
1183     {
1184         case egbSTILL:
1185             calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1186             break;
1187         case egbHCT:
1188             calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1189             break;
1190         case egbOBC:
1191             calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1192             break;
1193
1194         default:
1195             gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d", ir->gb_algorithm);
1196     }
1197
1198 #endif
1199
1200 #else
1201
1202 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1203     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1204     switch (ir->gb_algorithm)
1205     {
1206         case egbSTILL:
1207             if (fr->use_simd_kernels)
1208             {
1209                 calc_gb_rad_still_sse2_single(cr, fr, born->nr, top, x[0], nl, born);
1210             }
1211             else
1212             {
1213                 calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1214             }
1215             break;
1216         case egbHCT:
1217             if (fr->use_simd_kernels)
1218             {
1219                 calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
1220             }
1221             else
1222             {
1223                 calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1224             }
1225             break;
1226
1227         case egbOBC:
1228             if (fr->use_simd_kernels)
1229             {
1230                 calc_gb_rad_hct_obc_sse2_single(cr, fr, born->nr, top, x[0], nl, born, md, ir->gb_algorithm);
1231             }
1232             else
1233             {
1234                 calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1235             }
1236             break;
1237
1238         default:
1239             gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d", ir->gb_algorithm);
1240     }
1241
1242 #else
1243     switch (ir->gb_algorithm)
1244     {
1245         case egbSTILL:
1246             calc_gb_rad_still(cr, fr, born->nr, top, x, nl, born, md);
1247             break;
1248         case egbHCT:
1249             calc_gb_rad_hct(cr, fr, born->nr, top, x, nl, born, md);
1250             break;
1251         case egbOBC:
1252             calc_gb_rad_obc(cr, fr, born->nr, top, x, nl, born, md);
1253             break;
1254
1255         default:
1256             gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d", ir->gb_algorithm);
1257     }
1258
1259 #endif /* Single precision sse */
1260
1261 #endif /* Double or single precision */
1262
1263     if (fr->bAllvsAll == FALSE)
1264     {
1265         switch (ir->gb_algorithm)
1266         {
1267             case egbSTILL:
1268                 /* 17 flops per outer loop iteration, 47 flops per inner loop */
1269                 inc_nrnb(nrnb, eNR_BORN_RADII_STILL, nl->nri*17+nl->nrj*47);
1270                 break;
1271             case egbHCT:
1272             case egbOBC:
1273                 /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
1274                 inc_nrnb(nrnb, eNR_BORN_RADII_HCT_OBC, nl->nri*61+nl->nrj*183);
1275                 break;
1276
1277             default:
1278                 break;
1279         }
1280     }
1281
1282     return 0;
1283 }
1284
1285
1286
1287 real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
1288                   real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
1289                   real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
1290 {
1291     int      i, j, n0, m, nnn, type, ai, aj;
1292     int      ki;
1293
1294     real     isai, isaj;
1295     real     r, rsq11;
1296     real     rinv11, iq;
1297     real     isaprod, qq, gbscale, gbtabscale, Y, F, Geps, Heps2, Fp, VV, FF, rt, eps, eps2;
1298     real     vgb, fgb, vcoul, fijC, dvdatmp, fscal, dvdaj;
1299     real     vctot;
1300
1301     rvec     dx;
1302     ivec     dt;
1303
1304     t_iatom *forceatoms;
1305
1306     /* Scale the electrostatics by gb_epsilon_solvent */
1307     facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent);
1308
1309     gbtabscale = *p_gbtabscale;
1310     vctot      = 0.0;
1311
1312     for (j = F_GB12; j <= F_GB14; j++)
1313     {
1314         forceatoms = idef->il[j].iatoms;
1315
1316         for (i = 0; i < idef->il[j].nr; )
1317         {
1318             /* To avoid reading in the interaction type, we just increment i to pass over
1319              * the types in the forceatoms array, this saves some memory accesses
1320              */
1321             i++;
1322             ai            = forceatoms[i++];
1323             aj            = forceatoms[i++];
1324
1325             ki            = pbc_rvec_sub(pbc, x[ai], x[aj], dx);
1326             rsq11         = iprod(dx, dx);
1327
1328             isai          = invsqrta[ai];
1329             iq            = (-1)*facel*charge[ai];
1330
1331             rinv11        = gmx_invsqrt(rsq11);
1332             isaj          = invsqrta[aj];
1333             isaprod       = isai*isaj;
1334             qq            = isaprod*iq*charge[aj];
1335             gbscale       = isaprod*gbtabscale;
1336             r             = rsq11*rinv11;
1337             rt            = r*gbscale;
1338             n0            = rt;
1339             eps           = rt-n0;
1340             eps2          = eps*eps;
1341             nnn           = 4*n0;
1342             Y             = GBtab[nnn];
1343             F             = GBtab[nnn+1];
1344             Geps          = eps*GBtab[nnn+2];
1345             Heps2         = eps2*GBtab[nnn+3];
1346             Fp            = F+Geps+Heps2;
1347             VV            = Y+eps*Fp;
1348             FF            = Fp+Geps+2.0*Heps2;
1349             vgb           = qq*VV;
1350             fijC          = qq*FF*gbscale;
1351             dvdatmp       = -(vgb+fijC*r)*0.5;
1352             dvda[aj]      = dvda[aj] + dvdatmp*isaj*isaj;
1353             dvda[ai]      = dvda[ai] + dvdatmp*isai*isai;
1354             vctot         = vctot + vgb;
1355             fgb           = -(fijC)*rinv11;
1356
1357             if (graph)
1358             {
1359                 ivec_sub(SHIFT_IVEC(graph, ai), SHIFT_IVEC(graph, aj), dt);
1360                 ki = IVEC2IS(dt);
1361             }
1362
1363             for (m = 0; (m < DIM); m++)             /*  15              */
1364             {
1365                 fscal               = fgb*dx[m];
1366                 f[ai][m]           += fscal;
1367                 f[aj][m]           -= fscal;
1368                 fshift[ki][m]      += fscal;
1369                 fshift[CENTRAL][m] -= fscal;
1370             }
1371         }
1372     }
1373
1374     return vctot;
1375 }
1376
1377 real calc_gb_selfcorrections(t_commrec *cr, int natoms,
1378                              real *charge, gmx_genborn_t *born, real *dvda, double facel)
1379 {
1380     int  i, ai, at0, at1;
1381     real rai, e, derb, q, q2, fi, rai_inv, vtot;
1382
1383     if (PARTDECOMP(cr))
1384     {
1385         pd_at_range(cr, &at0, &at1);
1386     }
1387     else if (DOMAINDECOMP(cr))
1388     {
1389         at0 = 0;
1390         at1 = cr->dd->nat_home;
1391     }
1392     else
1393     {
1394         at0 = 0;
1395         at1 = natoms;
1396
1397     }
1398
1399     /* Scale the electrostatics by gb_epsilon_solvent */
1400     facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent);
1401
1402     vtot = 0.0;
1403
1404     /* Apply self corrections */
1405     for (i = at0; i < at1; i++)
1406     {
1407         ai       = i;
1408
1409         if (born->use[ai] == 1)
1410         {
1411             rai       = born->bRad[ai];
1412             rai_inv   = 1.0/rai;
1413             q         = charge[ai];
1414             q2        = q*q;
1415             fi        = facel*q2;
1416             e         = fi*rai_inv;
1417             derb      = 0.5*e*rai_inv*rai_inv;
1418             dvda[ai] += derb*rai;
1419             vtot     -= 0.5*e;
1420         }
1421     }
1422
1423     return vtot;
1424
1425 }
1426
1427 real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr, int natoms, gmx_genborn_t *born, gmx_localtop_t *top,
1428                       real *dvda, t_mdatoms *md)
1429 {
1430     int  ai, i, at0, at1;
1431     real e, es, rai, rbi, term, probe, tmp, factor;
1432     real rbi_inv, rbi_inv2;
1433
1434     /* To keep the compiler happy */
1435     factor = 0;
1436
1437     if (PARTDECOMP(cr))
1438     {
1439         pd_at_range(cr, &at0, &at1);
1440     }
1441     else if (DOMAINDECOMP(cr))
1442     {
1443         at0 = 0;
1444         at1 = cr->dd->nat_home;
1445     }
1446     else
1447     {
1448         at0 = 0;
1449         at1 = natoms;
1450     }
1451
1452     /* factor is the surface tension */
1453     factor = born->sa_surface_tension;
1454     /*
1455
1456        // The surface tension factor is 0.0049 for Still model, 0.0054 for HCT/OBC
1457        if(gb_algorithm==egbSTILL)
1458        {
1459           factor=0.0049*100*CAL2JOULE;
1460        }
1461        else
1462        {
1463           factor=0.0054*100*CAL2JOULE;
1464        }
1465      */
1466     /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1467
1468     es    = 0;
1469     probe = 0.14;
1470     term  = M_PI*4;
1471
1472     for (i = at0; i < at1; i++)
1473     {
1474         ai        = i;
1475
1476         if (born->use[ai] == 1)
1477         {
1478             rai       = top->atomtypes.gb_radius[md->typeA[ai]];
1479             rbi_inv   = fr->invsqrta[ai];
1480             rbi_inv2  = rbi_inv * rbi_inv;
1481             tmp       = (rai*rbi_inv2)*(rai*rbi_inv2);
1482             tmp       = tmp*tmp*tmp;
1483             e         = factor*term*(rai+probe)*(rai+probe)*tmp;
1484             dvda[ai]  = dvda[ai] - 6*e*rbi_inv2;
1485             es        = es + e;
1486         }
1487     }
1488
1489     return es;
1490 }
1491
1492
1493
1494 real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[],
1495                        rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born)
1496 {
1497     int          i, k, n, ai, aj, nj0, nj1, n0, n1;
1498     int          shift;
1499     real         shX, shY, shZ;
1500     real         fgb, fij, rb2, rbi, fix1, fiy1, fiz1;
1501     real         ix1, iy1, iz1, jx1, jy1, jz1, dx11, dy11, dz11, rsq11;
1502     real         rinv11, tx, ty, tz, rbai, rbaj, fgb_ai;
1503     real        *rb;
1504     volatile int idx;
1505
1506     n  = 0;
1507     rb = born->work;
1508
1509     n0 = 0;
1510     n1 = natoms;
1511
1512     if (gb_algorithm == egbSTILL)
1513     {
1514         for (i = n0; i < n1; i++)
1515         {
1516             rbi   = born->bRad[i];
1517             rb[i] = (2 * rbi * rbi * dvda[i])/ONE_4PI_EPS0;
1518         }
1519     }
1520     else if (gb_algorithm == egbHCT)
1521     {
1522         for (i = n0; i < n1; i++)
1523         {
1524             rbi   = born->bRad[i];
1525             rb[i] = rbi * rbi * dvda[i];
1526         }
1527     }
1528     else if (gb_algorithm == egbOBC)
1529     {
1530         for (i = n0; i < n1; i++)
1531         {
1532             rbi   = born->bRad[i];
1533             rb[i] = rbi * rbi * born->drobc[i] * dvda[i];
1534         }
1535     }
1536
1537     for (i = 0; i < nl->nri; i++)
1538     {
1539         ai   = nl->iinr[i];
1540
1541         nj0  = nl->jindex[i];
1542         nj1  = nl->jindex[i+1];
1543
1544         /* Load shifts for this list */
1545         shift   = nl->shift[i];
1546         shX     = shift_vec[shift][0];
1547         shY     = shift_vec[shift][1];
1548         shZ     = shift_vec[shift][2];
1549
1550         /* Load atom i coordinates, add shift vectors */
1551         ix1  = shX + x[ai][0];
1552         iy1  = shY + x[ai][1];
1553         iz1  = shZ + x[ai][2];
1554
1555         fix1 = 0;
1556         fiy1 = 0;
1557         fiz1 = 0;
1558
1559         rbai = rb[ai];
1560
1561         for (k = nj0; k < nj1 && nl->jjnr[k] >= 0; k++)
1562         {
1563             aj = nl->jjnr[k];
1564
1565             jx1     = x[aj][0];
1566             jy1     = x[aj][1];
1567             jz1     = x[aj][2];
1568
1569             dx11    = ix1 - jx1;
1570             dy11    = iy1 - jy1;
1571             dz11    = iz1 - jz1;
1572
1573             rbaj    = rb[aj];
1574
1575             fgb     = rbai*dadx[n++];
1576             fgb_ai  = rbaj*dadx[n++];
1577
1578             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1579             fgb     = fgb + fgb_ai;
1580
1581             tx      = fgb * dx11;
1582             ty      = fgb * dy11;
1583             tz      = fgb * dz11;
1584
1585             fix1    = fix1 + tx;
1586             fiy1    = fiy1 + ty;
1587             fiz1    = fiz1 + tz;
1588
1589             /* Update force on atom aj */
1590             t[aj][0] = t[aj][0] - tx;
1591             t[aj][1] = t[aj][1] - ty;
1592             t[aj][2] = t[aj][2] - tz;
1593         }
1594
1595         /* Update force and shift forces on atom ai */
1596         t[ai][0] = t[ai][0] + fix1;
1597         t[ai][1] = t[ai][1] + fiy1;
1598         t[ai][2] = t[ai][2] + fiz1;
1599
1600         fshift[shift][0] = fshift[shift][0] + fix1;
1601         fshift[shift][1] = fshift[shift][1] + fiy1;
1602         fshift[shift][2] = fshift[shift][2] + fiz1;
1603
1604     }
1605
1606     return 0;
1607 }
1608
1609
1610 void
1611 calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top,
1612                rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, int sa_algorithm, t_nrnb *nrnb,
1613                const t_pbc *pbc, const t_graph *graph, gmx_enerdata_t *enerd)
1614 {
1615     real v = 0;
1616     int  cnt;
1617     int  i;
1618
1619     /* PBC or not? */
1620     const t_pbc *pbc_null;
1621
1622     if (fr->bMolPBC)
1623     {
1624         pbc_null = pbc;
1625     }
1626     else
1627     {
1628         pbc_null = NULL;
1629     }
1630
1631     if (sa_algorithm == esaAPPROX)
1632     {
1633         /* Do a simple ACE type approximation for the non-polar solvation */
1634         enerd->term[F_NPSOLVATION] += calc_gb_nonpolar(cr, fr, born->nr, born, top, fr->dvda, md);
1635     }
1636
1637     /* Calculate the bonded GB-interactions using either table or analytical formula */
1638     enerd->term[F_GBPOL]       += gb_bonds_tab(x, f, fr->fshift, md->chargeA, &(fr->gbtabscale),
1639                                                fr->invsqrta, fr->dvda, fr->gbtab.data, idef, born->epsilon_r, born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1640
1641     /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1642     enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr, born->nr, md->chargeA, born, fr->dvda, fr->epsfac);
1643
1644     /* If parallel, sum the derivative of the potential w.r.t the born radii */
1645     if (PARTDECOMP(cr))
1646     {
1647         gmx_sum(md->nr, fr->dvda, cr);
1648     }
1649     else if (DOMAINDECOMP(cr))
1650     {
1651         dd_atom_sum_real(cr->dd, fr->dvda);
1652         dd_atom_spread_real(cr->dd, fr->dvda);
1653     }
1654
1655     if (fr->bAllvsAll)
1656     {
1657 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1658         if (fr->use_simd_kernels)
1659         {
1660 #  ifdef GMX_DOUBLE
1661             genborn_allvsall_calc_chainrule_sse2_double(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1662 #  else
1663             genborn_allvsall_calc_chainrule_sse2_single(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1664 #  endif
1665         }
1666         else
1667         {
1668             genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1669         }
1670 #else
1671         genborn_allvsall_calc_chainrule(fr, md, born, x[0], f[0], gb_algorithm, fr->AllvsAll_workgb);
1672 #endif
1673         cnt = md->homenr*(md->nr/2+1);
1674         /* 9 flops for outer loop, 15 for inner */
1675         inc_nrnb(nrnb, eNR_BORN_AVA_CHAINRULE, md->homenr*9+cnt*15);
1676         return;
1677     }
1678
1679 #if 0 && defined (GMX_SIMD_X86_SSE2_OR_HIGHER)
1680     if (fr->use_simd_kernels)
1681     {
1682 #  ifdef GMX_DOUBLE
1683         calc_gb_chainrule_sse2_double(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
1684                                       f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
1685 #  else
1686         calc_gb_chainrule_sse2_single(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda, x[0],
1687                                       f[0], fr->fshift[0], fr->shift_vec[0], gb_algorithm, born, md);
1688 #  endif
1689     }
1690     else
1691     {
1692         calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
1693                           x, f, fr->fshift, fr->shift_vec, gb_algorithm, born, md);
1694     }
1695 #else
1696     calc_gb_chainrule(fr->natoms_force, &(fr->gblist), fr->dadx, fr->dvda,
1697                       x, f, fr->fshift, fr->shift_vec, gb_algorithm, born);
1698 #endif
1699
1700     if (!fr->bAllvsAll)
1701     {
1702         /* 9 flops for outer loop, 15 for inner */
1703         inc_nrnb(nrnb, eNR_BORN_CHAINRULE, fr->gblist.nri*9+fr->gblist.nrj*15);
1704     }
1705 }
1706
1707 static void add_j_to_gblist(gbtmpnbl_t *list, int aj)
1708 {
1709     if (list->naj >= list->aj_nalloc)
1710     {
1711         list->aj_nalloc = over_alloc_large(list->naj+1);
1712         srenew(list->aj, list->aj_nalloc);
1713     }
1714
1715     list->aj[list->naj++] = aj;
1716 }
1717
1718 static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists, int shift)
1719 {
1720     int ind, i;
1721
1722     /* Search the list with the same shift, if there is one */
1723     ind = 0;
1724     while (ind < lists->nlist && shift != lists->list[ind].shift)
1725     {
1726         ind++;
1727     }
1728     if (ind == lists->nlist)
1729     {
1730         if (lists->nlist == lists->list_nalloc)
1731         {
1732             lists->list_nalloc++;
1733             srenew(lists->list, lists->list_nalloc);
1734             for (i = lists->nlist; i < lists->list_nalloc; i++)
1735             {
1736                 lists->list[i].aj        = NULL;
1737                 lists->list[i].aj_nalloc = 0;
1738             }
1739
1740         }
1741
1742         lists->list[lists->nlist].shift = shift;
1743         lists->list[lists->nlist].naj   = 0;
1744         lists->nlist++;
1745     }
1746
1747     return &lists->list[ind];
1748 }
1749
1750 static void add_bondeds_to_gblist(t_ilist *il,
1751                                   gmx_bool bMolPBC, t_pbc *pbc, t_graph *g, rvec *x,
1752                                   struct gbtmpnbls *nls)
1753 {
1754     int         ind, j, ai, aj, shift, found;
1755     rvec        dx;
1756     ivec        dt;
1757     gbtmpnbl_t *list;
1758
1759     shift = CENTRAL;
1760     for (ind = 0; ind < il->nr; ind += 3)
1761     {
1762         ai = il->iatoms[ind+1];
1763         aj = il->iatoms[ind+2];
1764
1765         shift = CENTRAL;
1766         if (g != NULL)
1767         {
1768             rvec_sub(x[ai], x[aj], dx);
1769             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
1770             shift = IVEC2IS(dt);
1771         }
1772         else if (bMolPBC)
1773         {
1774             shift = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
1775         }
1776
1777         /* Find the list for this shift or create one */
1778         list = find_gbtmplist(&nls[ai], shift);
1779
1780         found = 0;
1781
1782         /* So that we do not add the same bond twice.
1783          * This happens with some constraints between 1-3 atoms
1784          * that are in the bond-list but should not be in the GB nb-list */
1785         for (j = 0; j < list->naj; j++)
1786         {
1787             if (list->aj[j] == aj)
1788             {
1789                 found = 1;
1790             }
1791         }
1792
1793         if (found == 0)
1794         {
1795             if (ai == aj)
1796             {
1797                 gmx_incons("ai == aj");
1798             }
1799
1800             add_j_to_gblist(list, aj);
1801         }
1802     }
1803 }
1804
1805 static int
1806 compare_int (const void * a, const void * b)
1807 {
1808     return ( *(int*)a - *(int*)b );
1809 }
1810
1811
1812
1813 int make_gb_nblist(t_commrec *cr, int gb_algorithm,
1814                    rvec x[], matrix box,
1815                    t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
1816 {
1817     int               i, l, ii, j, k, n, nj0, nj1, ai, aj, at0, at1, found, shift, s;
1818     int               apa;
1819     t_nblist         *nblist;
1820     t_pbc             pbc;
1821
1822     struct gbtmpnbls *nls;
1823     gbtmpnbl_t       *list = NULL;
1824
1825     set_pbc(&pbc, fr->ePBC, box);
1826     nls   = born->nblist_work;
1827
1828     for (i = 0; i < born->nr; i++)
1829     {
1830         nls[i].nlist = 0;
1831     }
1832
1833     if (fr->bMolPBC)
1834     {
1835         set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
1836     }
1837
1838     switch (gb_algorithm)
1839     {
1840         case egbHCT:
1841         case egbOBC:
1842             /* Loop over 1-2, 1-3 and 1-4 interactions */
1843             for (j = F_GB12; j <= F_GB14; j++)
1844             {
1845                 add_bondeds_to_gblist(&idef->il[j], fr->bMolPBC, &pbc, graph, x, nls);
1846             }
1847             break;
1848         case egbSTILL:
1849             /* Loop over 1-4 interactions */
1850             add_bondeds_to_gblist(&idef->il[F_GB14], fr->bMolPBC, &pbc, graph, x, nls);
1851             break;
1852         default:
1853             gmx_incons("Unknown GB algorithm");
1854     }
1855
1856     /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1857     for (n = 0; (n < fr->nnblists); n++)
1858     {
1859         for (i = 0; (i < eNL_NR); i++)
1860         {
1861             nblist = &(fr->nblists[n].nlist_sr[i]);
1862
1863             if (nblist->nri > 0 && (i == eNL_VDWQQ || i == eNL_QQ))
1864             {
1865                 for (j = 0; j < nblist->nri; j++)
1866                 {
1867                     ai    = nblist->iinr[j];
1868                     shift = nblist->shift[j];
1869
1870                     /* Find the list for this shift or create one */
1871                     list = find_gbtmplist(&nls[ai], shift);
1872
1873                     nj0 = nblist->jindex[j];
1874                     nj1 = nblist->jindex[j+1];
1875
1876                     /* Add all the j-atoms in the non-bonded list to the GB list */
1877                     for (k = nj0; k < nj1; k++)
1878                     {
1879                         add_j_to_gblist(list, nblist->jjnr[k]);
1880                     }
1881                 }
1882             }
1883         }
1884     }
1885
1886     /* Zero out some counters */
1887     fr->gblist.nri = 0;
1888     fr->gblist.nrj = 0;
1889
1890     fr->gblist.jindex[0] = fr->gblist.nri;
1891
1892     for (i = 0; i < fr->natoms_force; i++)
1893     {
1894         for (s = 0; s < nls[i].nlist; s++)
1895         {
1896             list = &nls[i].list[s];
1897
1898             /* Only add those atoms that actually have neighbours */
1899             if (born->use[i] != 0)
1900             {
1901                 fr->gblist.iinr[fr->gblist.nri]  = i;
1902                 fr->gblist.shift[fr->gblist.nri] = list->shift;
1903                 fr->gblist.nri++;
1904
1905                 for (k = 0; k < list->naj; k++)
1906                 {
1907                     /* Memory allocation for jjnr */
1908                     if (fr->gblist.nrj >= fr->gblist.maxnrj)
1909                     {
1910                         fr->gblist.maxnrj += over_alloc_large(fr->gblist.maxnrj);
1911
1912                         if (debug)
1913                         {
1914                             fprintf(debug, "Increasing GB neighbourlist j size to %d\n", fr->gblist.maxnrj);
1915                         }
1916
1917                         srenew(fr->gblist.jjnr, fr->gblist.maxnrj);
1918                     }
1919
1920                     /* Put in list */
1921                     if (i == list->aj[k])
1922                     {
1923                         gmx_incons("i == list->aj[k]");
1924                     }
1925                     fr->gblist.jjnr[fr->gblist.nrj++] = list->aj[k];
1926                 }
1927
1928                 fr->gblist.jindex[fr->gblist.nri] = fr->gblist.nrj;
1929             }
1930         }
1931     }
1932
1933
1934 #ifdef SORT_GB_LIST
1935     for (i = 0; i < fr->gblist.nri; i++)
1936     {
1937         nj0 = fr->gblist.jindex[i];
1938         nj1 = fr->gblist.jindex[i+1];
1939         ai  = fr->gblist.iinr[i];
1940
1941         /* Temporary fix */
1942         for (j = nj0; j < nj1; j++)
1943         {
1944             if (fr->gblist.jjnr[j] < ai)
1945             {
1946                 fr->gblist.jjnr[j] += fr->natoms_force;
1947             }
1948         }
1949         qsort(fr->gblist.jjnr+nj0, nj1-nj0, sizeof(int), compare_int);
1950         /* Fix back */
1951         for (j = nj0; j < nj1; j++)
1952         {
1953             if (fr->gblist.jjnr[j] >= fr->natoms_force)
1954             {
1955                 fr->gblist.jjnr[j] -= fr->natoms_force;
1956             }
1957         }
1958
1959     }
1960 #endif
1961
1962     return 0;
1963 }
1964
1965 void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm)
1966 {
1967     int           i, at0, at1;
1968     gmx_domdec_t *dd = NULL;
1969
1970     if (DOMAINDECOMP(cr))
1971     {
1972         dd  = cr->dd;
1973         at0 = 0;
1974         at1 = dd->nat_tot;
1975     }
1976     else
1977     {
1978         /* Single node or particle decomp (global==local), just copy pointers and return */
1979         if (gb_algorithm == egbSTILL)
1980         {
1981             born->gpol      = born->gpol_globalindex;
1982             born->vsolv     = born->vsolv_globalindex;
1983             born->gb_radius = born->gb_radius_globalindex;
1984         }
1985         else
1986         {
1987             born->param     = born->param_globalindex;
1988             born->gb_radius = born->gb_radius_globalindex;
1989         }
1990
1991         born->use = born->use_globalindex;
1992
1993         return;
1994     }
1995
1996     /* Reallocation of local arrays if necessary */
1997     /* fr->natoms_force is equal to dd->nat_tot */
1998     if (DOMAINDECOMP(cr) && dd->nat_tot > born->nalloc)
1999     {
2000         int nalloc;
2001
2002         nalloc = dd->nat_tot;
2003
2004         /* Arrays specific to different gb algorithms */
2005         if (gb_algorithm == egbSTILL)
2006         {
2007             srenew(born->gpol,  nalloc+3);
2008             srenew(born->vsolv, nalloc+3);
2009             srenew(born->gb_radius, nalloc+3);
2010             for (i = born->nalloc; (i < nalloc+3); i++)
2011             {
2012                 born->gpol[i]      = 0;
2013                 born->vsolv[i]     = 0;
2014                 born->gb_radius[i] = 0;
2015             }
2016         }
2017         else
2018         {
2019             srenew(born->param, nalloc+3);
2020             srenew(born->gb_radius, nalloc+3);
2021             for (i = born->nalloc; (i < nalloc+3); i++)
2022             {
2023                 born->param[i]     = 0;
2024                 born->gb_radius[i] = 0;
2025             }
2026         }
2027
2028         /* All gb-algorithms use the array for vsites exclusions */
2029         srenew(born->use,    nalloc+3);
2030         for (i = born->nalloc; (i < nalloc+3); i++)
2031         {
2032             born->use[i] = 0;
2033         }
2034
2035         born->nalloc = nalloc;
2036     }
2037
2038     /* With dd, copy algorithm specific arrays */
2039     if (gb_algorithm == egbSTILL)
2040     {
2041         for (i = at0; i < at1; i++)
2042         {
2043             born->gpol[i]      = born->gpol_globalindex[dd->gatindex[i]];
2044             born->vsolv[i]     = born->vsolv_globalindex[dd->gatindex[i]];
2045             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2046             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2047         }
2048     }
2049     else
2050     {
2051         for (i = at0; i < at1; i++)
2052         {
2053             born->param[i]     = born->param_globalindex[dd->gatindex[i]];
2054             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2055             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2056         }
2057     }
2058 }