src/gromacs/mdlib/genborn_allvsall.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2009, The GROMACS Development Team.
   6  * Copyright (c) 2010,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "genborn_allvsall.h"
  40
  41 #include <math.h>
  42
  43 #include "gromacs/legacyheaders/genborn.h"
  44 #include "gromacs/legacyheaders/network.h"
  45 #include "gromacs/legacyheaders/types/simple.h"
  46 #include "gromacs/math/units.h"
  47 #include "gromacs/math/vec.h"
  48 #include "gromacs/utility/smalloc.h"
  49
  50
  51 typedef struct
  52 {
  53     int *      jindex_gb;
  54     int **     exclusion_mask_gb;
  55 }
  56 gmx_allvsallgb2_data_t;
  57
  58 static int
  59 calc_maxoffset(int i, int natoms)
  60 {
  61     int maxoffset;
  62
  63     if ((natoms % 2) == 1)
  64     {
  65         /* Odd number of atoms, easy */
  66         maxoffset = natoms/2;
  67     }
  68     else if ((natoms % 4) == 0)
  69     {
  70         /* Multiple of four is hard */
  71         if (i < natoms/2)
  72         {
  73             if ((i % 2) == 0)
  74             {
  75                 maxoffset = natoms/2;
  76             }
  77             else
  78             {
  79                 maxoffset = natoms/2-1;
  80             }
  81         }
  82         else
  83         {
  84             if ((i % 2) == 1)
  85             {
  86                 maxoffset = natoms/2;
  87             }
  88             else
  89             {
  90                 maxoffset = natoms/2-1;
  91             }
  92         }
  93     }
  94     else
  95     {
  96         /* natoms/2 = odd */
  97         if ((i % 2) == 0)
  98         {
  99             maxoffset = natoms/2;
 100         }
 101         else
 102         {
 103             maxoffset = natoms/2-1;
 104         }
 105     }
 106
 107     return maxoffset;
 108 }
 109
 110 static void
 111 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
 112                                 t_ilist     *                  ilist,
 113                                 int                            natoms,
 114                                 gmx_bool                       bInclude12,
 115                                 gmx_bool                       bInclude13,
 116                                 gmx_bool                       bInclude14)
 117 {
 118     int i, j, k, tp;
 119     int a1, a2;
 120     int nj0, nj1;
 121     int max_offset;
 122     int max_excl_offset;
 123     int nj;
 124
 125     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
 126      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
 127      * whether they should interact or not.
 128      *
 129      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
 130      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
 131      * we create a jindex array with three elements per i atom: the starting point, the point to
 132      * which we need to check exclusions, and the end point.
 133      * This way we only have to allocate a short exclusion mask per i atom.
 134      */
 135
 136     /* Allocate memory for jindex arrays */
 137     snew(aadata->jindex_gb, 3*natoms);
 138
 139     /* Pointer to lists with exclusion masks */
 140     snew(aadata->exclusion_mask_gb, natoms);
 141
 142     for (i = 0; i < natoms; i++)
 143     {
 144         /* Start */
 145         aadata->jindex_gb[3*i]       = i+1;
 146         max_offset                   = calc_maxoffset(i, natoms);
 147
 148         /* first check the max range of atoms to EXCLUDE */
 149         max_excl_offset = 0;
 150         if (!bInclude12)
 151         {
 152             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 153             {
 154                 a1 = ilist[F_GB12].iatoms[j+1];
 155                 a2 = ilist[F_GB12].iatoms[j+2];
 156
 157                 if (a1 == i)
 158                 {
 159                     k = a2-a1;
 160                 }
 161                 else if (a2 == i)
 162                 {
 163                     k = a1+natoms-a2;
 164                 }
 165                 else
 166                 {
 167                     continue;
 168                 }
 169                 if (k > 0 && k <= max_offset)
 170                 {
 171                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 172                 }
 173             }
 174         }
 175         if (!bInclude13)
 176         {
 177             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 178             {
 179                 a1 = ilist[F_GB13].iatoms[j+1];
 180                 a2 = ilist[F_GB13].iatoms[j+2];
 181
 182
 183                 if (a1 == i)
 184                 {
 185                     k = a2-a1;
 186                 }
 187                 else if (a2 == i)
 188                 {
 189                     k = a1+natoms-a2;
 190                 }
 191                 else
 192                 {
 193                     continue;
 194                 }
 195                 if (k > 0 && k <= max_offset)
 196                 {
 197                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 198                 }
 199             }
 200         }
 201         if (!bInclude14)
 202         {
 203             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 204             {
 205                 a1 = ilist[F_GB14].iatoms[j+1];
 206                 a2 = ilist[F_GB14].iatoms[j+2];
 207
 208
 209                 if (a1 == i)
 210                 {
 211                     k = a2-a1;
 212                 }
 213                 else if (a2 == i)
 214                 {
 215                     k = a1+natoms-a2;
 216                 }
 217                 else
 218                 {
 219                     continue;
 220                 }
 221                 if (k > 0 && k <= max_offset)
 222                 {
 223                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 224                 }
 225             }
 226         }
 227         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
 228
 229         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
 230
 231         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
 232
 233         /* Include everything by default */
 234         for (j = 0; j < max_excl_offset; j++)
 235         {
 236             /* Use all-ones to mark interactions that should be present, compatible with SSE */
 237             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
 238         }
 239         /* Go through exclusions again */
 240         if (!bInclude12)
 241         {
 242             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 243             {
 244                 a1 = ilist[F_GB12].iatoms[j+1];
 245                 a2 = ilist[F_GB12].iatoms[j+2];
 246
 247                 if (a1 == i)
 248                 {
 249                     k = a2-a1;
 250                 }
 251                 else if (a2 == i)
 252                 {
 253                     k = a1+natoms-a2;
 254                 }
 255                 else
 256                 {
 257                     continue;
 258                 }
 259                 if (k > 0 && k <= max_offset)
 260                 {
 261                     aadata->exclusion_mask_gb[i][k-1] = 0;
 262                 }
 263             }
 264         }
 265         if (!bInclude13)
 266         {
 267             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 268             {
 269                 a1 = ilist[F_GB13].iatoms[j+1];
 270                 a2 = ilist[F_GB13].iatoms[j+2];
 271
 272                 if (a1 == i)
 273                 {
 274                     k = a2-a1;
 275                 }
 276                 else if (a2 == i)
 277                 {
 278                     k = a1+natoms-a2;
 279                 }
 280                 else
 281                 {
 282                     continue;
 283                 }
 284                 if (k > 0 && k <= max_offset)
 285                 {
 286                     aadata->exclusion_mask_gb[i][k-1] = 0;
 287                 }
 288             }
 289         }
 290         if (!bInclude14)
 291         {
 292             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 293             {
 294                 a1 = ilist[F_GB14].iatoms[j+1];
 295                 a2 = ilist[F_GB14].iatoms[j+2];
 296
 297                 if (a1 == i)
 298                 {
 299                     k = a2-a1;
 300                 }
 301                 else if (a2 == i)
 302                 {
 303                     k = a1+natoms-a2;
 304                 }
 305                 else
 306                 {
 307                     continue;
 308                 }
 309                 if (k > 0 && k <= max_offset)
 310                 {
 311                     aadata->exclusion_mask_gb[i][k-1] = 0;
 312                 }
 313             }
 314         }
 315
 316         /* End */
 317
 318         /* End */
 319         aadata->jindex_gb[3*i+2] = i+1+max_offset;
 320     }
 321 }
 322
 323
 324 static void
 325 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
 326                        t_ilist     *                  ilist,
 327                        int                            natoms,
 328                        gmx_bool                       bInclude12,
 329                        gmx_bool                       bInclude13,
 330                        gmx_bool                       bInclude14)
 331 {
 332     int                     i, j, idx;
 333     gmx_allvsallgb2_data_t *aadata;
 334     real                   *p;
 335
 336     snew(aadata, 1);
 337     *p_aadata = aadata;
 338
 339     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
 340 }
 341
 342
 343
 344 int
 345 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
 346                                   t_mdatoms *            mdatoms,
 347                                   gmx_genborn_t *        born,
 348                                   gmx_localtop_t *       top,
 349                                   real *                 x,
 350                                   void *                 work)
 351 {
 352     gmx_allvsallgb2_data_t *aadata;
 353     int                     natoms;
 354     int                     ni0, ni1;
 355     int                     nj0, nj1, nj2;
 356     int                     i, j, k, n;
 357     int              *      mask;
 358
 359     real                    ix, iy, iz;
 360     real                    jx, jy, jz;
 361     real                    dx, dy, dz;
 362     real                    rsq, rinv;
 363     real                    gpi, rai, vai;
 364     real                    prod_ai;
 365     real                    irsq, idr4, idr6;
 366     real                    raj, rvdw, ratio;
 367     real                    vaj, ccf, dccf, theta, cosq;
 368     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
 369
 370     natoms              = mdatoms->nr;
 371     ni0                 = 0;
 372     ni1                 = mdatoms->homenr;
 373     factor              = 0.5*ONE_4PI_EPS0;
 374     n                   = 0;
 375
 376     aadata = *((gmx_allvsallgb2_data_t **)work);
 377
 378     if (aadata == NULL)
 379     {
 380         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 381                                FALSE, FALSE, TRUE);
 382         *((gmx_allvsallgb2_data_t **)work) = aadata;
 383     }
 384
 385
 386     for (i = 0; i < born->nr; i++)
 387     {
 388         born->gpol_still_work[i] = 0;
 389     }
 390
 391
 392     for (i = ni0; i < ni1; i++)
 393     {
 394         /* We assume shifts are NOT used for all-vs-all interactions */
 395
 396         /* Load i atom data */
 397         ix                = x[3*i];
 398         iy                = x[3*i+1];
 399         iz                = x[3*i+2];
 400
 401         gpi               = 0.0;
 402
 403         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 404         vai     = born->vsolv[i];
 405         prod_ai = STILL_P4*vai;
 406
 407         /* Load limits for loop over neighbors */
 408         nj0              = aadata->jindex_gb[3*i];
 409         nj1              = aadata->jindex_gb[3*i+1];
 410         nj2              = aadata->jindex_gb[3*i+2];
 411
 412         mask             = aadata->exclusion_mask_gb[i];
 413
 414         /* Prologue part, including exclusion mask */
 415         for (j = nj0; j < nj1; j++, mask++)
 416         {
 417             if (*mask != 0)
 418             {
 419                 k = j%natoms;
 420
 421                 /* load j atom coordinates */
 422                 jx                = x[3*k];
 423                 jy                = x[3*k+1];
 424                 jz                = x[3*k+2];
 425
 426                 /* Calculate distance */
 427                 dx                = ix - jx;
 428                 dy                = iy - jy;
 429                 dz                = iz - jz;
 430                 rsq               = dx*dx+dy*dy+dz*dz;
 431
 432                 /* Calculate 1/r and 1/r2 */
 433                 rinv              = gmx_invsqrt(rsq);
 434                 irsq              = rinv*rinv;
 435                 idr4              = irsq*irsq;
 436                 idr6              = idr4*irsq;
 437
 438                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 439
 440                 rvdw  = rai + raj;
 441
 442                 ratio = rsq / (rvdw * rvdw);
 443                 vaj   = born->vsolv[k];
 444
 445
 446                 if (ratio > STILL_P5INV)
 447                 {
 448                     ccf  = 1.0;
 449                     dccf = 0.0;
 450                 }
 451                 else
 452                 {
 453                     theta = ratio*STILL_PIP5;
 454                     cosq  = cos(theta);
 455                     term  = 0.5*(1.0-cosq);
 456                     ccf   = term*term;
 457                     sinq  = 1.0 - cosq*cosq;
 458                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 459                 }
 460
 461                 prod          = STILL_P4*vaj;
 462                 icf4          = ccf*idr4;
 463                 icf6          = (4*ccf-dccf)*idr6;
 464
 465                 born->gpol_still_work[k] += prod_ai*icf4;
 466                 gpi                       = gpi+prod*icf4;
 467
 468                 /* Save ai->aj and aj->ai chain rule terms */
 469                 fr->dadx[n++]   = prod*icf6;
 470                 fr->dadx[n++]   = prod_ai*icf6;
 471
 472                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
 473
 474             }
 475         }
 476
 477         /* Main part, no exclusions */
 478         for (j = nj1; j < nj2; j++)
 479         {
 480             k = j%natoms;
 481
 482             /* load j atom coordinates */
 483             jx                = x[3*k];
 484             jy                = x[3*k+1];
 485             jz                = x[3*k+2];
 486
 487             /* Calculate distance */
 488             dx                = ix - jx;
 489             dy                = iy - jy;
 490             dz                = iz - jz;
 491             rsq               = dx*dx+dy*dy+dz*dz;
 492
 493             /* Calculate 1/r and 1/r2 */
 494             rinv              = gmx_invsqrt(rsq);
 495             irsq              = rinv*rinv;
 496             idr4              = irsq*irsq;
 497             idr6              = idr4*irsq;
 498
 499             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 500
 501             rvdw  = rai + raj;
 502
 503             ratio = rsq / (rvdw * rvdw);
 504             vaj   = born->vsolv[k];
 505
 506             if (ratio > STILL_P5INV)
 507             {
 508                 ccf  = 1.0;
 509                 dccf = 0.0;
 510             }
 511             else
 512             {
 513                 theta = ratio*STILL_PIP5;
 514                 cosq  = cos(theta);
 515                 term  = 0.5*(1.0-cosq);
 516                 ccf   = term*term;
 517                 sinq  = 1.0 - cosq*cosq;
 518                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 519             }
 520
 521             prod          = STILL_P4*vaj;
 522             icf4          = ccf*idr4;
 523             icf6          = (4*ccf-dccf)*idr6;
 524
 525             born->gpol_still_work[k] += prod_ai*icf4;
 526             gpi                       = gpi+prod*icf4;
 527
 528             /* Save ai->aj and aj->ai chain rule terms */
 529             fr->dadx[n++]   = prod*icf6;
 530             fr->dadx[n++]   = prod_ai*icf6;
 531         }
 532         born->gpol_still_work[i] += gpi;
 533     }
 534
 535     /* Parallel summations would go here if ever implemented with DD */
 536
 537     /* Calculate the radii */
 538     for (i = 0; i < natoms; i++)
 539     {
 540         if (born->use[i] != 0)
 541         {
 542             gpi             = born->gpol[i]+born->gpol_still_work[i];
 543             gpi2            = gpi * gpi;
 544             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 545             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 546         }
 547     }
 548
 549     return 0;
 550 }
 551
 552
 553
 554 int
 555 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
 556                                     t_mdatoms *            mdatoms,
 557                                     gmx_genborn_t *        born,
 558                                     int                    gb_algorithm,
 559                                     gmx_localtop_t *       top,
 560                                     real *                 x,
 561                                     void *                 work)
 562 {
 563     gmx_allvsallgb2_data_t *aadata;
 564     int                     natoms;
 565     int                     ni0, ni1;
 566     int                     nj0, nj1, nj2;
 567     int                     i, j, k, n;
 568     int              *      mask;
 569
 570     real                    ix, iy, iz;
 571     real                    jx, jy, jz;
 572     real                    dx, dy, dz;
 573     real                    rsq, rinv;
 574     real                    prod, raj;
 575     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
 576     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
 577     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
 578     real                    tchain;
 579     real                    dadxi, dadxj;
 580     real                    rad, min_rad;
 581
 582     natoms              = mdatoms->nr;
 583     ni0                 = 0;
 584     ni1                 = mdatoms->homenr;
 585
 586     n       = 0;
 587     prod    = 0;
 588     raj     = 0;
 589     doffset = born->gb_doffset;
 590
 591     aadata = *((gmx_allvsallgb2_data_t **)work);
 592
 593     if (aadata == NULL)
 594     {
 595         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 596                                TRUE, TRUE, TRUE);
 597         *((gmx_allvsallgb2_data_t **)work) = aadata;
 598     }
 599
 600     for (i = 0; i < born->nr; i++)
 601     {
 602         born->gpol_hct_work[i] = 0;
 603     }
 604
 605     for (i = ni0; i < ni1; i++)
 606     {
 607         /* We assume shifts are NOT used for all-vs-all interactions */
 608
 609         /* Load i atom data */
 610         ix                = x[3*i];
 611         iy                = x[3*i+1];
 612         iz                = x[3*i+2];
 613
 614         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
 615         rai_inv  = 1.0/rai;
 616
 617         sk_ai    = born->param[i];
 618         sk2_ai   = sk_ai*sk_ai;
 619
 620         sum_ai   = 0;
 621
 622         /* Load limits for loop over neighbors */
 623         nj0              = aadata->jindex_gb[3*i];
 624         nj1              = aadata->jindex_gb[3*i+1];
 625         nj2              = aadata->jindex_gb[3*i+2];
 626
 627         mask             = aadata->exclusion_mask_gb[i];
 628
 629         /* Prologue part, including exclusion mask */
 630         for (j = nj0; j < nj1; j++, mask++)
 631         {
 632             if (*mask != 0)
 633             {
 634                 k = j%natoms;
 635
 636                 /* load j atom coordinates */
 637                 jx                = x[3*k];
 638                 jy                = x[3*k+1];
 639                 jz                = x[3*k+2];
 640
 641                 /* Calculate distance */
 642                 dx                = ix - jx;
 643                 dy                = iy - jy;
 644                 dz                = iz - jz;
 645                 rsq               = dx*dx+dy*dy+dz*dz;
 646
 647                 /* Calculate 1/r and 1/r2 */
 648                 rinv              = gmx_invsqrt(rsq);
 649                 dr                = rsq*rinv;
 650
 651                 /* sk is precalculated in init_gb() */
 652                 sk    = born->param[k];
 653                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 654
 655                 /* aj -> ai interaction */
 656
 657
 658                 if (rai < dr+sk)
 659                 {
 660                     lij       = 1.0/(dr-sk);
 661                     dlij      = 1.0;
 662
 663                     if (rai > dr-sk)
 664                     {
 665                         lij  = rai_inv;
 666                         dlij = 0.0;
 667                     }
 668
 669                     uij      = 1.0/(dr+sk);
 670                     lij2     = lij  * lij;
 671                     lij3     = lij2 * lij;
 672                     uij2     = uij  * uij;
 673                     uij3     = uij2 * uij;
 674
 675                     diff2    = uij2-lij2;
 676
 677                     lij_inv  = gmx_invsqrt(lij2);
 678                     sk2      = sk*sk;
 679                     sk2_rinv = sk2*rinv;
 680                     prod     = 0.25*sk2_rinv;
 681
 682                     log_term = log(uij*lij_inv);
 683                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 684                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 685
 686                     if (rai < sk-dr)
 687                     {
 688                         tmp = tmp + 2.0 * (rai_inv-lij);
 689                     }
 690
 691                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 692                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
 693
 694                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 695
 696                     dadxi = (dlij*t1+t2+t3)*rinv;
 697
 698                     sum_ai += 0.5*tmp;
 699                 }
 700                 else
 701                 {
 702                     dadxi = 0.0;
 703                 }
 704
 705                 /* ai -> aj interaction */
 706                 if (raj < dr + sk_ai)
 707                 {
 708                     lij     = 1.0/(dr-sk_ai);
 709                     dlij    = 1.0;
 710                     raj_inv = 1.0/raj;
 711
 712                     if (raj > dr-sk_ai)
 713                     {
 714                         lij  = raj_inv;
 715                         dlij = 0.0;
 716                     }
 717
 718                     lij2     = lij  * lij;
 719                     lij3     = lij2 * lij;
 720
 721                     uij      = 1.0/(dr+sk_ai);
 722                     uij2     = uij  * uij;
 723                     uij3     = uij2 * uij;
 724
 725                     diff2    = uij2-lij2;
 726
 727                     lij_inv  = gmx_invsqrt(lij2);
 728                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 729                     sk2_rinv = sk2*rinv;
 730                     prod     = 0.25 * sk2_rinv;
 731
 732                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 733                     log_term = log(uij*lij_inv);
 734
 735                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 736
 737                     if (raj < sk_ai-dr)
 738                     {
 739                         tmp     = tmp + 2.0 * (raj_inv-lij);
 740                     }
 741
 742                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 743                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 744                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 745
 746                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
 747
 748                     born->gpol_hct_work[k] += 0.5*tmp;
 749                 }
 750                 else
 751                 {
 752                     dadxj = 0.0;
 753                 }
 754                 fr->dadx[n++] = dadxi;
 755                 fr->dadx[n++] = dadxj;
 756
 757             }
 758         }
 759
 760         /* Main part, no exclusions */
 761         for (j = nj1; j < nj2; j++)
 762         {
 763             k = j%natoms;
 764
 765             /* load j atom coordinates */
 766             jx                = x[3*k];
 767             jy                = x[3*k+1];
 768             jz                = x[3*k+2];
 769
 770             /* Calculate distance */
 771             dx                = ix - jx;
 772             dy                = iy - jy;
 773             dz                = iz - jz;
 774             rsq               = dx*dx+dy*dy+dz*dz;
 775
 776             /* Calculate 1/r and 1/r2 */
 777             rinv              = gmx_invsqrt(rsq);
 778             dr                = rsq*rinv;
 779
 780             /* sk is precalculated in init_gb() */
 781             sk    = born->param[k];
 782             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 783
 784             /* aj -> ai interaction */
 785             if (rai < dr+sk)
 786             {
 787                 lij       = 1.0/(dr-sk);
 788                 dlij      = 1.0;
 789
 790                 if (rai > dr-sk)
 791                 {
 792                     lij  = rai_inv;
 793                     dlij = 0.0;
 794                 }
 795
 796                 uij      = 1.0/(dr+sk);
 797                 lij2     = lij  * lij;
 798                 lij3     = lij2 * lij;
 799                 uij2     = uij  * uij;
 800                 uij3     = uij2 * uij;
 801
 802                 diff2    = uij2-lij2;
 803
 804                 lij_inv  = gmx_invsqrt(lij2);
 805                 sk2      = sk*sk;
 806                 sk2_rinv = sk2*rinv;
 807                 prod     = 0.25*sk2_rinv;
 808
 809                 log_term = log(uij*lij_inv);
 810                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 811                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 812
 813                 if (rai < sk-dr)
 814                 {
 815                     tmp = tmp + 2.0 * (rai_inv-lij);
 816                 }
 817
 818                 /* duij    = 1.0; */
 819                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 820                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 821                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 822
 823                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 824
 825                 sum_ai += 0.5*tmp;
 826             }
 827             else
 828             {
 829                 dadxi = 0.0;
 830             }
 831
 832             /* ai -> aj interaction */
 833             if (raj < dr + sk_ai)
 834             {
 835                 lij     = 1.0/(dr-sk_ai);
 836                 dlij    = 1.0;
 837                 raj_inv = 1.0/raj;
 838
 839                 if (raj > dr-sk_ai)
 840                 {
 841                     lij  = raj_inv;
 842                     dlij = 0.0;
 843                 }
 844
 845                 lij2     = lij  * lij;
 846                 lij3     = lij2 * lij;
 847
 848                 uij      = 1.0/(dr+sk_ai);
 849                 uij2     = uij  * uij;
 850                 uij3     = uij2 * uij;
 851
 852                 diff2    = uij2-lij2;
 853
 854                 lij_inv  = gmx_invsqrt(lij2);
 855                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 856                 sk2_rinv = sk2*rinv;
 857                 prod     = 0.25 * sk2_rinv;
 858
 859                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 860                 log_term = log(uij*lij_inv);
 861
 862                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 863
 864                 if (raj < sk_ai-dr)
 865                 {
 866                     tmp     = tmp + 2.0 * (raj_inv-lij);
 867                 }
 868
 869                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 870                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 871                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 872
 873                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 874
 875                 born->gpol_hct_work[k] += 0.5*tmp;
 876             }
 877             else
 878             {
 879                 dadxj = 0.0;
 880             }
 881             fr->dadx[n++] = dadxi;
 882             fr->dadx[n++] = dadxj;
 883         }
 884         born->gpol_hct_work[i] += sum_ai;
 885     }
 886
 887     /* Parallel summations would go here if ever implemented with DD */
 888
 889     if (gb_algorithm == egbHCT)
 890     {
 891         /* HCT */
 892         for (i = 0; i < natoms; i++)
 893         {
 894             if (born->use[i] != 0)
 895             {
 896                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
 897                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 898                 min_rad = rai + born->gb_doffset;
 899                 rad     = 1.0/sum_ai;
 900
 901                 born->bRad[i]   = rad > min_rad ? rad : min_rad;
 902                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 903             }
 904         }
 905
 906     }
 907     else
 908     {
 909         /* OBC */
 910         /* Calculate the radii */
 911         for (i = 0; i < natoms; i++)
 912         {
 913             if (born->use[i] != 0)
 914             {
 915                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 916                 rai_inv2   = 1.0/rai;
 917                 rai        = rai-doffset;
 918                 rai_inv    = 1.0/rai;
 919                 sum_ai     = rai * born->gpol_hct_work[i];
 920                 sum_ai2    = sum_ai  * sum_ai;
 921                 sum_ai3    = sum_ai2 * sum_ai;
 922
 923                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
 924                 born->bRad[i] = rai_inv - tsum*rai_inv2;
 925                 born->bRad[i] = 1.0 / born->bRad[i];
 926
 927                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 928
 929                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
 930                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
 931             }
 932         }
 933     }
 934     return 0;
 935 }
 936
 937
 938
 939
 940
 941 int
 942 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
 943                                 t_mdatoms *            mdatoms,
 944                                 gmx_genborn_t *        born,
 945                                 real *                 x,
 946                                 real *                 f,
 947                                 int                    gb_algorithm,
 948                                 void *                 work)
 949 {
 950     gmx_allvsallgb2_data_t *aadata;
 951     int                     natoms;
 952     int                     ni0, ni1;
 953     int                     nj0, nj1, nj2;
 954     int                     i, j, k, n;
 955     int                     idx;
 956     int              *      mask;
 957
 958     real                    ix, iy, iz;
 959     real                    fix, fiy, fiz;
 960     real                    jx, jy, jz;
 961     real                    dx, dy, dz;
 962     real                    tx, ty, tz;
 963     real                    rbai, rbaj, fgb, fgb_ai, rbi;
 964     real              *     rb;
 965     real              *     dadx;
 966
 967     natoms              = mdatoms->nr;
 968     ni0                 = 0;
 969     ni1                 = mdatoms->homenr;
 970     dadx                = fr->dadx;
 971
 972     aadata = (gmx_allvsallgb2_data_t *)work;
 973
 974     n  = 0;
 975     rb = born->work;
 976
 977     /* Loop to get the proper form for the Born radius term */
 978     if (gb_algorithm == egbSTILL)
 979     {
 980         for (i = 0; i < natoms; i++)
 981         {
 982             rbi   = born->bRad[i];
 983             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
 984         }
 985     }
 986     else if (gb_algorithm == egbHCT)
 987     {
 988         for (i = 0; i < natoms; i++)
 989         {
 990             rbi   = born->bRad[i];
 991             rb[i] = rbi * rbi * fr->dvda[i];
 992         }
 993     }
 994     else if (gb_algorithm == egbOBC)
 995     {
 996         for (idx = 0; idx < natoms; idx++)
 997         {
 998             rbi     = born->bRad[idx];
 999             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1000         }
1001     }
1002
1003     for (i = ni0; i < ni1; i++)
1004     {
1005         /* We assume shifts are NOT used for all-vs-all interactions */
1006
1007         /* Load i atom data */
1008         ix                = x[3*i];
1009         iy                = x[3*i+1];
1010         iz                = x[3*i+2];
1011
1012         fix               = 0;
1013         fiy               = 0;
1014         fiz               = 0;
1015
1016         rbai              = rb[i];
1017
1018         /* Load limits for loop over neighbors */
1019         nj0              = aadata->jindex_gb[3*i];
1020         nj1              = aadata->jindex_gb[3*i+1];
1021         nj2              = aadata->jindex_gb[3*i+2];
1022
1023         mask             = aadata->exclusion_mask_gb[i];
1024
1025         /* Prologue part, including exclusion mask */
1026         for (j = nj0; j < nj1; j++, mask++)
1027         {
1028             if (*mask != 0)
1029             {
1030                 k = j%natoms;
1031
1032                 /* load j atom coordinates */
1033                 jx                = x[3*k];
1034                 jy                = x[3*k+1];
1035                 jz                = x[3*k+2];
1036
1037                 /* Calculate distance */
1038                 dx                = ix - jx;
1039                 dy                = iy - jy;
1040                 dz                = iz - jz;
1041
1042                 rbaj              = rb[k];
1043
1044                 fgb     = rbai*dadx[n++];
1045                 fgb_ai  = rbaj*dadx[n++];
1046
1047                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1048                 fgb     = fgb + fgb_ai;
1049
1050                 tx      = fgb * dx;
1051                 ty      = fgb * dy;
1052                 tz      = fgb * dz;
1053
1054                 fix     = fix + tx;
1055                 fiy     = fiy + ty;
1056                 fiz     = fiz + tz;
1057
1058                 /* Update force on atom aj */
1059                 f[3*k]   = f[3*k] - tx;
1060                 f[3*k+1] = f[3*k+1] - ty;
1061                 f[3*k+2] = f[3*k+2] - tz;
1062             }
1063         }
1064
1065         /* Main part, no exclusions */
1066         for (j = nj1; j < nj2; j++)
1067         {
1068             k = j%natoms;
1069
1070             /* load j atom coordinates */
1071             jx                = x[3*k];
1072             jy                = x[3*k+1];
1073             jz                = x[3*k+2];
1074
1075             /* Calculate distance */
1076             dx                = ix - jx;
1077             dy                = iy - jy;
1078             dz                = iz - jz;
1079
1080             rbaj              = rb[k];
1081
1082             fgb     = rbai*dadx[n++];
1083             fgb_ai  = rbaj*dadx[n++];
1084
1085             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1086             fgb     = fgb + fgb_ai;
1087
1088             tx      = fgb * dx;
1089             ty      = fgb * dy;
1090             tz      = fgb * dz;
1091
1092             fix     = fix + tx;
1093             fiy     = fiy + ty;
1094             fiz     = fiz + tz;
1095
1096             /* Update force on atom aj */
1097             f[3*k]   = f[3*k] - tx;
1098             f[3*k+1] = f[3*k+1] - ty;
1099             f[3*k+2] = f[3*k+2] - tz;
1100         }
1101         /* Update force and shift forces on atom ai */
1102         f[3*i]   = f[3*i] + fix;
1103         f[3*i+1] = f[3*i+1] + fiy;
1104         f[3*i+2] = f[3*i+2] + fiz;
1105     }
1106
1107     return 0;
1108 }