src/gromacs/mdlib/genborn_allvsall.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2009, The GROMACS Development Team.
   6  * Copyright (c) 2010,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "genborn_allvsall.h"
  40
  41 #include <cmath>
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/legacyheaders/genborn.h"
  46 #include "gromacs/legacyheaders/network.h"
  47 #include "gromacs/legacyheaders/types/simple.h"
  48 #include "gromacs/math/units.h"
  49 #include "gromacs/math/vec.h"
  50 #include "gromacs/utility/smalloc.h"
  51
  52
  53 typedef struct
  54 {
  55     int *      jindex_gb;
  56     int **     exclusion_mask_gb;
  57 }
  58 gmx_allvsallgb2_data_t;
  59
  60 static int
  61 calc_maxoffset(int i, int natoms)
  62 {
  63     int maxoffset;
  64
  65     if ((natoms % 2) == 1)
  66     {
  67         /* Odd number of atoms, easy */
  68         maxoffset = natoms/2;
  69     }
  70     else if ((natoms % 4) == 0)
  71     {
  72         /* Multiple of four is hard */
  73         if (i < natoms/2)
  74         {
  75             if ((i % 2) == 0)
  76             {
  77                 maxoffset = natoms/2;
  78             }
  79             else
  80             {
  81                 maxoffset = natoms/2-1;
  82             }
  83         }
  84         else
  85         {
  86             if ((i % 2) == 1)
  87             {
  88                 maxoffset = natoms/2;
  89             }
  90             else
  91             {
  92                 maxoffset = natoms/2-1;
  93             }
  94         }
  95     }
  96     else
  97     {
  98         /* natoms/2 = odd */
  99         if ((i % 2) == 0)
 100         {
 101             maxoffset = natoms/2;
 102         }
 103         else
 104         {
 105             maxoffset = natoms/2-1;
 106         }
 107     }
 108
 109     return maxoffset;
 110 }
 111
 112 static void
 113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
 114                                 t_ilist     *                  ilist,
 115                                 int                            natoms,
 116                                 gmx_bool                       bInclude12,
 117                                 gmx_bool                       bInclude13,
 118                                 gmx_bool                       bInclude14)
 119 {
 120     int i, j, k;
 121     int a1, a2;
 122     int max_offset;
 123     int max_excl_offset;
 124
 125     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
 126      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
 127      * whether they should interact or not.
 128      *
 129      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
 130      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
 131      * we create a jindex array with three elements per i atom: the starting point, the point to
 132      * which we need to check exclusions, and the end point.
 133      * This way we only have to allocate a short exclusion mask per i atom.
 134      */
 135
 136     /* Allocate memory for jindex arrays */
 137     snew(aadata->jindex_gb, 3*natoms);
 138
 139     /* Pointer to lists with exclusion masks */
 140     snew(aadata->exclusion_mask_gb, natoms);
 141
 142     for (i = 0; i < natoms; i++)
 143     {
 144         /* Start */
 145         aadata->jindex_gb[3*i]       = i+1;
 146         max_offset                   = calc_maxoffset(i, natoms);
 147
 148         /* first check the max range of atoms to EXCLUDE */
 149         max_excl_offset = 0;
 150         if (!bInclude12)
 151         {
 152             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 153             {
 154                 a1 = ilist[F_GB12].iatoms[j+1];
 155                 a2 = ilist[F_GB12].iatoms[j+2];
 156
 157                 if (a1 == i)
 158                 {
 159                     k = a2-a1;
 160                 }
 161                 else if (a2 == i)
 162                 {
 163                     k = a1+natoms-a2;
 164                 }
 165                 else
 166                 {
 167                     continue;
 168                 }
 169                 if (k > 0 && k <= max_offset)
 170                 {
 171                     max_excl_offset = std::max(k, max_excl_offset);
 172                 }
 173             }
 174         }
 175         if (!bInclude13)
 176         {
 177             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 178             {
 179                 a1 = ilist[F_GB13].iatoms[j+1];
 180                 a2 = ilist[F_GB13].iatoms[j+2];
 181
 182
 183                 if (a1 == i)
 184                 {
 185                     k = a2-a1;
 186                 }
 187                 else if (a2 == i)
 188                 {
 189                     k = a1+natoms-a2;
 190                 }
 191                 else
 192                 {
 193                     continue;
 194                 }
 195                 if (k > 0 && k <= max_offset)
 196                 {
 197                     max_excl_offset = std::max(k, max_excl_offset);
 198                 }
 199             }
 200         }
 201         if (!bInclude14)
 202         {
 203             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 204             {
 205                 a1 = ilist[F_GB14].iatoms[j+1];
 206                 a2 = ilist[F_GB14].iatoms[j+2];
 207
 208
 209                 if (a1 == i)
 210                 {
 211                     k = a2-a1;
 212                 }
 213                 else if (a2 == i)
 214                 {
 215                     k = a1+natoms-a2;
 216                 }
 217                 else
 218                 {
 219                     continue;
 220                 }
 221                 if (k > 0 && k <= max_offset)
 222                 {
 223                     max_excl_offset = std::max(k, max_excl_offset);
 224                 }
 225             }
 226         }
 227         max_excl_offset = std::min(max_offset, max_excl_offset);
 228
 229         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
 230
 231         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
 232
 233         /* Include everything by default */
 234         for (j = 0; j < max_excl_offset; j++)
 235         {
 236             /* Use all-ones to mark interactions that should be present, compatible with SSE */
 237             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
 238         }
 239         /* Go through exclusions again */
 240         if (!bInclude12)
 241         {
 242             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 243             {
 244                 a1 = ilist[F_GB12].iatoms[j+1];
 245                 a2 = ilist[F_GB12].iatoms[j+2];
 246
 247                 if (a1 == i)
 248                 {
 249                     k = a2-a1;
 250                 }
 251                 else if (a2 == i)
 252                 {
 253                     k = a1+natoms-a2;
 254                 }
 255                 else
 256                 {
 257                     continue;
 258                 }
 259                 if (k > 0 && k <= max_offset)
 260                 {
 261                     aadata->exclusion_mask_gb[i][k-1] = 0;
 262                 }
 263             }
 264         }
 265         if (!bInclude13)
 266         {
 267             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 268             {
 269                 a1 = ilist[F_GB13].iatoms[j+1];
 270                 a2 = ilist[F_GB13].iatoms[j+2];
 271
 272                 if (a1 == i)
 273                 {
 274                     k = a2-a1;
 275                 }
 276                 else if (a2 == i)
 277                 {
 278                     k = a1+natoms-a2;
 279                 }
 280                 else
 281                 {
 282                     continue;
 283                 }
 284                 if (k > 0 && k <= max_offset)
 285                 {
 286                     aadata->exclusion_mask_gb[i][k-1] = 0;
 287                 }
 288             }
 289         }
 290         if (!bInclude14)
 291         {
 292             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 293             {
 294                 a1 = ilist[F_GB14].iatoms[j+1];
 295                 a2 = ilist[F_GB14].iatoms[j+2];
 296
 297                 if (a1 == i)
 298                 {
 299                     k = a2-a1;
 300                 }
 301                 else if (a2 == i)
 302                 {
 303                     k = a1+natoms-a2;
 304                 }
 305                 else
 306                 {
 307                     continue;
 308                 }
 309                 if (k > 0 && k <= max_offset)
 310                 {
 311                     aadata->exclusion_mask_gb[i][k-1] = 0;
 312                 }
 313             }
 314         }
 315
 316         /* End */
 317
 318         /* End */
 319         aadata->jindex_gb[3*i+2] = i+1+max_offset;
 320     }
 321 }
 322
 323
 324 static void
 325 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
 326                        t_ilist     *                  ilist,
 327                        int                            natoms,
 328                        gmx_bool                       bInclude12,
 329                        gmx_bool                       bInclude13,
 330                        gmx_bool                       bInclude14)
 331 {
 332     gmx_allvsallgb2_data_t *aadata;
 333
 334     snew(aadata, 1);
 335     *p_aadata = aadata;
 336
 337     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
 338 }
 339
 340
 341
 342 int
 343 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
 344                                   t_mdatoms *            mdatoms,
 345                                   gmx_genborn_t *        born,
 346                                   gmx_localtop_t *       top,
 347                                   real *                 x,
 348                                   void *                 work)
 349 {
 350     gmx_allvsallgb2_data_t *aadata;
 351     int                     natoms;
 352     int                     ni0, ni1;
 353     int                     nj0, nj1, nj2;
 354     int                     i, j, k, n;
 355     int              *      mask;
 356
 357     real                    ix, iy, iz;
 358     real                    jx, jy, jz;
 359     real                    dx, dy, dz;
 360     real                    rsq, rinv;
 361     real                    gpi, rai, vai;
 362     real                    prod_ai;
 363     real                    irsq, idr4, idr6;
 364     real                    raj, rvdw, ratio;
 365     real                    vaj, ccf, dccf, theta, cosq;
 366     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
 367
 368     natoms              = mdatoms->nr;
 369     ni0                 = 0;
 370     ni1                 = mdatoms->homenr;
 371     factor              = 0.5*ONE_4PI_EPS0;
 372     n                   = 0;
 373
 374     aadata = *((gmx_allvsallgb2_data_t **)work);
 375
 376     if (aadata == NULL)
 377     {
 378         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 379                                FALSE, FALSE, TRUE);
 380         *((gmx_allvsallgb2_data_t **)work) = aadata;
 381     }
 382
 383
 384     for (i = 0; i < born->nr; i++)
 385     {
 386         born->gpol_still_work[i] = 0;
 387     }
 388
 389
 390     for (i = ni0; i < ni1; i++)
 391     {
 392         /* We assume shifts are NOT used for all-vs-all interactions */
 393
 394         /* Load i atom data */
 395         ix                = x[3*i];
 396         iy                = x[3*i+1];
 397         iz                = x[3*i+2];
 398
 399         gpi               = 0.0;
 400
 401         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 402         vai     = born->vsolv[i];
 403         prod_ai = STILL_P4*vai;
 404
 405         /* Load limits for loop over neighbors */
 406         nj0              = aadata->jindex_gb[3*i];
 407         nj1              = aadata->jindex_gb[3*i+1];
 408         nj2              = aadata->jindex_gb[3*i+2];
 409
 410         mask             = aadata->exclusion_mask_gb[i];
 411
 412         /* Prologue part, including exclusion mask */
 413         for (j = nj0; j < nj1; j++, mask++)
 414         {
 415             if (*mask != 0)
 416             {
 417                 k = j%natoms;
 418
 419                 /* load j atom coordinates */
 420                 jx                = x[3*k];
 421                 jy                = x[3*k+1];
 422                 jz                = x[3*k+2];
 423
 424                 /* Calculate distance */
 425                 dx                = ix - jx;
 426                 dy                = iy - jy;
 427                 dz                = iz - jz;
 428                 rsq               = dx*dx+dy*dy+dz*dz;
 429
 430                 /* Calculate 1/r and 1/r2 */
 431                 rinv              = gmx_invsqrt(rsq);
 432                 irsq              = rinv*rinv;
 433                 idr4              = irsq*irsq;
 434                 idr6              = idr4*irsq;
 435
 436                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 437
 438                 rvdw  = rai + raj;
 439
 440                 ratio = rsq / (rvdw * rvdw);
 441                 vaj   = born->vsolv[k];
 442
 443
 444                 if (ratio > STILL_P5INV)
 445                 {
 446                     ccf  = 1.0;
 447                     dccf = 0.0;
 448                 }
 449                 else
 450                 {
 451                     theta = ratio*STILL_PIP5;
 452                     cosq  = cos(theta);
 453                     term  = 0.5*(1.0-cosq);
 454                     ccf   = term*term;
 455                     sinq  = 1.0 - cosq*cosq;
 456                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 457                 }
 458
 459                 prod          = STILL_P4*vaj;
 460                 icf4          = ccf*idr4;
 461                 icf6          = (4*ccf-dccf)*idr6;
 462
 463                 born->gpol_still_work[k] += prod_ai*icf4;
 464                 gpi                       = gpi+prod*icf4;
 465
 466                 /* Save ai->aj and aj->ai chain rule terms */
 467                 fr->dadx[n++]   = prod*icf6;
 468                 fr->dadx[n++]   = prod_ai*icf6;
 469
 470                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
 471
 472             }
 473         }
 474
 475         /* Main part, no exclusions */
 476         for (j = nj1; j < nj2; j++)
 477         {
 478             k = j%natoms;
 479
 480             /* load j atom coordinates */
 481             jx                = x[3*k];
 482             jy                = x[3*k+1];
 483             jz                = x[3*k+2];
 484
 485             /* Calculate distance */
 486             dx                = ix - jx;
 487             dy                = iy - jy;
 488             dz                = iz - jz;
 489             rsq               = dx*dx+dy*dy+dz*dz;
 490
 491             /* Calculate 1/r and 1/r2 */
 492             rinv              = gmx_invsqrt(rsq);
 493             irsq              = rinv*rinv;
 494             idr4              = irsq*irsq;
 495             idr6              = idr4*irsq;
 496
 497             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 498
 499             rvdw  = rai + raj;
 500
 501             ratio = rsq / (rvdw * rvdw);
 502             vaj   = born->vsolv[k];
 503
 504             if (ratio > STILL_P5INV)
 505             {
 506                 ccf  = 1.0;
 507                 dccf = 0.0;
 508             }
 509             else
 510             {
 511                 theta = ratio*STILL_PIP5;
 512                 cosq  = cos(theta);
 513                 term  = 0.5*(1.0-cosq);
 514                 ccf   = term*term;
 515                 sinq  = 1.0 - cosq*cosq;
 516                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 517             }
 518
 519             prod          = STILL_P4*vaj;
 520             icf4          = ccf*idr4;
 521             icf6          = (4*ccf-dccf)*idr6;
 522
 523             born->gpol_still_work[k] += prod_ai*icf4;
 524             gpi                       = gpi+prod*icf4;
 525
 526             /* Save ai->aj and aj->ai chain rule terms */
 527             fr->dadx[n++]   = prod*icf6;
 528             fr->dadx[n++]   = prod_ai*icf6;
 529         }
 530         born->gpol_still_work[i] += gpi;
 531     }
 532
 533     /* Parallel summations would go here if ever implemented with DD */
 534
 535     /* Calculate the radii */
 536     for (i = 0; i < natoms; i++)
 537     {
 538         if (born->use[i] != 0)
 539         {
 540             gpi             = born->gpol[i]+born->gpol_still_work[i];
 541             gpi2            = gpi * gpi;
 542             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 543             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 544         }
 545     }
 546
 547     return 0;
 548 }
 549
 550
 551
 552 int
 553 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
 554                                     t_mdatoms *            mdatoms,
 555                                     gmx_genborn_t *        born,
 556                                     int                    gb_algorithm,
 557                                     gmx_localtop_t *       top,
 558                                     real *                 x,
 559                                     void *                 work)
 560 {
 561     gmx_allvsallgb2_data_t *aadata;
 562     int                     natoms;
 563     int                     ni0, ni1;
 564     int                     nj0, nj1, nj2;
 565     int                     i, j, k, n;
 566     int              *      mask;
 567
 568     real                    ix, iy, iz;
 569     real                    jx, jy, jz;
 570     real                    dx, dy, dz;
 571     real                    rsq, rinv;
 572     real                    prod, raj;
 573     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
 574     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
 575     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
 576     real                    tchain;
 577     real                    dadxi, dadxj;
 578     real                    rad, min_rad;
 579
 580     natoms              = mdatoms->nr;
 581     ni0                 = 0;
 582     ni1                 = mdatoms->homenr;
 583
 584     n       = 0;
 585     doffset = born->gb_doffset;
 586
 587     aadata = *((gmx_allvsallgb2_data_t **)work);
 588
 589     if (aadata == NULL)
 590     {
 591         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 592                                TRUE, TRUE, TRUE);
 593         *((gmx_allvsallgb2_data_t **)work) = aadata;
 594     }
 595
 596     for (i = 0; i < born->nr; i++)
 597     {
 598         born->gpol_hct_work[i] = 0;
 599     }
 600
 601     for (i = ni0; i < ni1; i++)
 602     {
 603         /* We assume shifts are NOT used for all-vs-all interactions */
 604
 605         /* Load i atom data */
 606         ix                = x[3*i];
 607         iy                = x[3*i+1];
 608         iz                = x[3*i+2];
 609
 610         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
 611         rai_inv  = 1.0/rai;
 612
 613         sk_ai    = born->param[i];
 614         sk2_ai   = sk_ai*sk_ai;
 615
 616         sum_ai   = 0;
 617
 618         /* Load limits for loop over neighbors */
 619         nj0              = aadata->jindex_gb[3*i];
 620         nj1              = aadata->jindex_gb[3*i+1];
 621         nj2              = aadata->jindex_gb[3*i+2];
 622
 623         mask             = aadata->exclusion_mask_gb[i];
 624
 625         /* Prologue part, including exclusion mask */
 626         for (j = nj0; j < nj1; j++, mask++)
 627         {
 628             if (*mask != 0)
 629             {
 630                 k = j%natoms;
 631
 632                 /* load j atom coordinates */
 633                 jx                = x[3*k];
 634                 jy                = x[3*k+1];
 635                 jz                = x[3*k+2];
 636
 637                 /* Calculate distance */
 638                 dx                = ix - jx;
 639                 dy                = iy - jy;
 640                 dz                = iz - jz;
 641                 rsq               = dx*dx+dy*dy+dz*dz;
 642
 643                 /* Calculate 1/r and 1/r2 */
 644                 rinv              = gmx_invsqrt(rsq);
 645                 dr                = rsq*rinv;
 646
 647                 /* sk is precalculated in init_gb() */
 648                 sk    = born->param[k];
 649                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 650
 651                 /* aj -> ai interaction */
 652
 653
 654                 if (rai < dr+sk)
 655                 {
 656                     lij       = 1.0/(dr-sk);
 657                     dlij      = 1.0;
 658
 659                     if (rai > dr-sk)
 660                     {
 661                         lij  = rai_inv;
 662                         dlij = 0.0;
 663                     }
 664
 665                     uij      = 1.0/(dr+sk);
 666                     lij2     = lij  * lij;
 667                     lij3     = lij2 * lij;
 668                     uij2     = uij  * uij;
 669                     uij3     = uij2 * uij;
 670
 671                     diff2    = uij2-lij2;
 672
 673                     lij_inv  = gmx_invsqrt(lij2);
 674                     sk2      = sk*sk;
 675                     sk2_rinv = sk2*rinv;
 676                     prod     = 0.25*sk2_rinv;
 677
 678                     log_term = std::log(uij*lij_inv);
 679                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 680                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 681
 682                     if (rai < sk-dr)
 683                     {
 684                         tmp = tmp + 2.0 * (rai_inv-lij);
 685                     }
 686
 687                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 688                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
 689
 690                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 691
 692                     dadxi = (dlij*t1+t2+t3)*rinv;
 693
 694                     sum_ai += 0.5*tmp;
 695                 }
 696                 else
 697                 {
 698                     dadxi = 0.0;
 699                 }
 700
 701                 /* ai -> aj interaction */
 702                 if (raj < dr + sk_ai)
 703                 {
 704                     lij     = 1.0/(dr-sk_ai);
 705                     dlij    = 1.0;
 706                     raj_inv = 1.0/raj;
 707
 708                     if (raj > dr-sk_ai)
 709                     {
 710                         lij  = raj_inv;
 711                         dlij = 0.0;
 712                     }
 713
 714                     lij2     = lij  * lij;
 715                     lij3     = lij2 * lij;
 716
 717                     uij      = 1.0/(dr+sk_ai);
 718                     uij2     = uij  * uij;
 719                     uij3     = uij2 * uij;
 720
 721                     diff2    = uij2-lij2;
 722
 723                     lij_inv  = gmx_invsqrt(lij2);
 724                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 725                     sk2_rinv = sk2*rinv;
 726                     prod     = 0.25 * sk2_rinv;
 727
 728                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 729                     log_term = std::log(uij*lij_inv);
 730
 731                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 732
 733                     if (raj < sk_ai-dr)
 734                     {
 735                         tmp     = tmp + 2.0 * (raj_inv-lij);
 736                     }
 737
 738                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 739                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 740                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 741
 742                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
 743
 744                     born->gpol_hct_work[k] += 0.5*tmp;
 745                 }
 746                 else
 747                 {
 748                     dadxj = 0.0;
 749                 }
 750                 fr->dadx[n++] = dadxi;
 751                 fr->dadx[n++] = dadxj;
 752
 753             }
 754         }
 755
 756         /* Main part, no exclusions */
 757         for (j = nj1; j < nj2; j++)
 758         {
 759             k = j%natoms;
 760
 761             /* load j atom coordinates */
 762             jx                = x[3*k];
 763             jy                = x[3*k+1];
 764             jz                = x[3*k+2];
 765
 766             /* Calculate distance */
 767             dx                = ix - jx;
 768             dy                = iy - jy;
 769             dz                = iz - jz;
 770             rsq               = dx*dx+dy*dy+dz*dz;
 771
 772             /* Calculate 1/r and 1/r2 */
 773             rinv              = gmx_invsqrt(rsq);
 774             dr                = rsq*rinv;
 775
 776             /* sk is precalculated in init_gb() */
 777             sk    = born->param[k];
 778             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 779
 780             /* aj -> ai interaction */
 781             if (rai < dr+sk)
 782             {
 783                 lij       = 1.0/(dr-sk);
 784                 dlij      = 1.0;
 785
 786                 if (rai > dr-sk)
 787                 {
 788                     lij  = rai_inv;
 789                     dlij = 0.0;
 790                 }
 791
 792                 uij      = 1.0/(dr+sk);
 793                 lij2     = lij  * lij;
 794                 lij3     = lij2 * lij;
 795                 uij2     = uij  * uij;
 796                 uij3     = uij2 * uij;
 797
 798                 diff2    = uij2-lij2;
 799
 800                 lij_inv  = gmx_invsqrt(lij2);
 801                 sk2      = sk*sk;
 802                 sk2_rinv = sk2*rinv;
 803                 prod     = 0.25*sk2_rinv;
 804
 805                 log_term = std::log(uij*lij_inv);
 806                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 807                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 808
 809                 if (rai < sk-dr)
 810                 {
 811                     tmp = tmp + 2.0 * (rai_inv-lij);
 812                 }
 813
 814                 /* duij    = 1.0; */
 815                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 816                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 817                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 818
 819                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 820
 821                 sum_ai += 0.5*tmp;
 822             }
 823             else
 824             {
 825                 dadxi = 0.0;
 826             }
 827
 828             /* ai -> aj interaction */
 829             if (raj < dr + sk_ai)
 830             {
 831                 lij     = 1.0/(dr-sk_ai);
 832                 dlij    = 1.0;
 833                 raj_inv = 1.0/raj;
 834
 835                 if (raj > dr-sk_ai)
 836                 {
 837                     lij  = raj_inv;
 838                     dlij = 0.0;
 839                 }
 840
 841                 lij2     = lij  * lij;
 842                 lij3     = lij2 * lij;
 843
 844                 uij      = 1.0/(dr+sk_ai);
 845                 uij2     = uij  * uij;
 846                 uij3     = uij2 * uij;
 847
 848                 diff2    = uij2-lij2;
 849
 850                 lij_inv  = gmx_invsqrt(lij2);
 851                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 852                 sk2_rinv = sk2*rinv;
 853                 prod     = 0.25 * sk2_rinv;
 854
 855                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 856                 log_term = std::log(uij*lij_inv);
 857
 858                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 859
 860                 if (raj < sk_ai-dr)
 861                 {
 862                     tmp     = tmp + 2.0 * (raj_inv-lij);
 863                 }
 864
 865                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 866                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 867                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 868
 869                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 870
 871                 born->gpol_hct_work[k] += 0.5*tmp;
 872             }
 873             else
 874             {
 875                 dadxj = 0.0;
 876             }
 877             fr->dadx[n++] = dadxi;
 878             fr->dadx[n++] = dadxj;
 879         }
 880         born->gpol_hct_work[i] += sum_ai;
 881     }
 882
 883     /* Parallel summations would go here if ever implemented with DD */
 884
 885     if (gb_algorithm == egbHCT)
 886     {
 887         /* HCT */
 888         for (i = 0; i < natoms; i++)
 889         {
 890             if (born->use[i] != 0)
 891             {
 892                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
 893                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 894                 min_rad = rai + born->gb_doffset;
 895                 rad     = 1.0/sum_ai;
 896
 897                 born->bRad[i]   = std::max(rad, min_rad);
 898                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 899             }
 900         }
 901
 902     }
 903     else
 904     {
 905         /* OBC */
 906         /* Calculate the radii */
 907         for (i = 0; i < natoms; i++)
 908         {
 909             if (born->use[i] != 0)
 910             {
 911                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 912                 rai_inv2   = 1.0/rai;
 913                 rai        = rai-doffset;
 914                 rai_inv    = 1.0/rai;
 915                 sum_ai     = rai * born->gpol_hct_work[i];
 916                 sum_ai2    = sum_ai  * sum_ai;
 917                 sum_ai3    = sum_ai2 * sum_ai;
 918
 919                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
 920                 born->bRad[i] = rai_inv - tsum*rai_inv2;
 921                 born->bRad[i] = 1.0 / born->bRad[i];
 922
 923                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 924
 925                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
 926                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
 927             }
 928         }
 929     }
 930     return 0;
 931 }
 932
 933
 934
 935
 936
 937 int
 938 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
 939                                 t_mdatoms *            mdatoms,
 940                                 gmx_genborn_t *        born,
 941                                 real *                 x,
 942                                 real *                 f,
 943                                 int                    gb_algorithm,
 944                                 void *                 work)
 945 {
 946     gmx_allvsallgb2_data_t *aadata;
 947     int                     natoms;
 948     int                     ni0, ni1;
 949     int                     nj0, nj1, nj2;
 950     int                     i, j, k, n;
 951     int                     idx;
 952     int              *      mask;
 953
 954     real                    ix, iy, iz;
 955     real                    fix, fiy, fiz;
 956     real                    jx, jy, jz;
 957     real                    dx, dy, dz;
 958     real                    tx, ty, tz;
 959     real                    rbai, rbaj, fgb, fgb_ai, rbi;
 960     real              *     rb;
 961     real              *     dadx;
 962
 963     natoms              = mdatoms->nr;
 964     ni0                 = 0;
 965     ni1                 = mdatoms->homenr;
 966     dadx                = fr->dadx;
 967
 968     aadata = (gmx_allvsallgb2_data_t *)work;
 969
 970     n  = 0;
 971     rb = born->work;
 972
 973     /* Loop to get the proper form for the Born radius term */
 974     if (gb_algorithm == egbSTILL)
 975     {
 976         for (i = 0; i < natoms; i++)
 977         {
 978             rbi   = born->bRad[i];
 979             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
 980         }
 981     }
 982     else if (gb_algorithm == egbHCT)
 983     {
 984         for (i = 0; i < natoms; i++)
 985         {
 986             rbi   = born->bRad[i];
 987             rb[i] = rbi * rbi * fr->dvda[i];
 988         }
 989     }
 990     else if (gb_algorithm == egbOBC)
 991     {
 992         for (idx = 0; idx < natoms; idx++)
 993         {
 994             rbi     = born->bRad[idx];
 995             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
 996         }
 997     }
 998
 999     for (i = ni0; i < ni1; i++)
1000     {
1001         /* We assume shifts are NOT used for all-vs-all interactions */
1002
1003         /* Load i atom data */
1004         ix                = x[3*i];
1005         iy                = x[3*i+1];
1006         iz                = x[3*i+2];
1007
1008         fix               = 0;
1009         fiy               = 0;
1010         fiz               = 0;
1011
1012         rbai              = rb[i];
1013
1014         /* Load limits for loop over neighbors */
1015         nj0              = aadata->jindex_gb[3*i];
1016         nj1              = aadata->jindex_gb[3*i+1];
1017         nj2              = aadata->jindex_gb[3*i+2];
1018
1019         mask             = aadata->exclusion_mask_gb[i];
1020
1021         /* Prologue part, including exclusion mask */
1022         for (j = nj0; j < nj1; j++, mask++)
1023         {
1024             if (*mask != 0)
1025             {
1026                 k = j%natoms;
1027
1028                 /* load j atom coordinates */
1029                 jx                = x[3*k];
1030                 jy                = x[3*k+1];
1031                 jz                = x[3*k+2];
1032
1033                 /* Calculate distance */
1034                 dx                = ix - jx;
1035                 dy                = iy - jy;
1036                 dz                = iz - jz;
1037
1038                 rbaj              = rb[k];
1039
1040                 fgb     = rbai*dadx[n++];
1041                 fgb_ai  = rbaj*dadx[n++];
1042
1043                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1044                 fgb     = fgb + fgb_ai;
1045
1046                 tx      = fgb * dx;
1047                 ty      = fgb * dy;
1048                 tz      = fgb * dz;
1049
1050                 fix     = fix + tx;
1051                 fiy     = fiy + ty;
1052                 fiz     = fiz + tz;
1053
1054                 /* Update force on atom aj */
1055                 f[3*k]   = f[3*k] - tx;
1056                 f[3*k+1] = f[3*k+1] - ty;
1057                 f[3*k+2] = f[3*k+2] - tz;
1058             }
1059         }
1060
1061         /* Main part, no exclusions */
1062         for (j = nj1; j < nj2; j++)
1063         {
1064             k = j%natoms;
1065
1066             /* load j atom coordinates */
1067             jx                = x[3*k];
1068             jy                = x[3*k+1];
1069             jz                = x[3*k+2];
1070
1071             /* Calculate distance */
1072             dx                = ix - jx;
1073             dy                = iy - jy;
1074             dz                = iz - jz;
1075
1076             rbaj              = rb[k];
1077
1078             fgb     = rbai*dadx[n++];
1079             fgb_ai  = rbaj*dadx[n++];
1080
1081             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1082             fgb     = fgb + fgb_ai;
1083
1084             tx      = fgb * dx;
1085             ty      = fgb * dy;
1086             tz      = fgb * dz;
1087
1088             fix     = fix + tx;
1089             fiy     = fiy + ty;
1090             fiz     = fiz + tz;
1091
1092             /* Update force on atom aj */
1093             f[3*k]   = f[3*k] - tx;
1094             f[3*k+1] = f[3*k+1] - ty;
1095             f[3*k+2] = f[3*k+2] - tz;
1096         }
1097         /* Update force and shift forces on atom ai */
1098         f[3*i]   = f[3*i] + fix;
1099         f[3*i+1] = f[3*i+1] + fiy;
1100         f[3*i+2] = f[3*i+2] + fiz;
1101     }
1102
1103     return 0;
1104 }