src/gromacs/mdlib/genborn_allvsall.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2009, The GROMACS Development Team.
   6  * Copyright (c) 2010,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include "types/simple.h"
  43
  44 #include "gromacs/math/vec.h"
  45 #include "gromacs/utility/smalloc.h"
  46
  47 #include "network.h"
  48 #include "gromacs/math/units.h"
  49 #include "genborn.h"
  50 #include "genborn_allvsall.h"
  51
  52
  53 typedef struct
  54 {
  55     int *      jindex_gb;
  56     int **     exclusion_mask_gb;
  57 }
  58 gmx_allvsallgb2_data_t;
  59
  60 static int
  61 calc_maxoffset(int i, int natoms)
  62 {
  63     int maxoffset;
  64
  65     if ((natoms % 2) == 1)
  66     {
  67         /* Odd number of atoms, easy */
  68         maxoffset = natoms/2;
  69     }
  70     else if ((natoms % 4) == 0)
  71     {
  72         /* Multiple of four is hard */
  73         if (i < natoms/2)
  74         {
  75             if ((i % 2) == 0)
  76             {
  77                 maxoffset = natoms/2;
  78             }
  79             else
  80             {
  81                 maxoffset = natoms/2-1;
  82             }
  83         }
  84         else
  85         {
  86             if ((i % 2) == 1)
  87             {
  88                 maxoffset = natoms/2;
  89             }
  90             else
  91             {
  92                 maxoffset = natoms/2-1;
  93             }
  94         }
  95     }
  96     else
  97     {
  98         /* natoms/2 = odd */
  99         if ((i % 2) == 0)
 100         {
 101             maxoffset = natoms/2;
 102         }
 103         else
 104         {
 105             maxoffset = natoms/2-1;
 106         }
 107     }
 108
 109     return maxoffset;
 110 }
 111
 112 static void
 113 setup_gb_exclusions_and_indices(gmx_allvsallgb2_data_t     *   aadata,
 114                                 t_ilist     *                  ilist,
 115                                 int                            natoms,
 116                                 gmx_bool                       bInclude12,
 117                                 gmx_bool                       bInclude13,
 118                                 gmx_bool                       bInclude14)
 119 {
 120     int i, j, k, tp;
 121     int a1, a2;
 122     int nj0, nj1;
 123     int max_offset;
 124     int max_excl_offset;
 125     int nj;
 126
 127     /* This routine can appear to be a bit complex, but it is mostly book-keeping.
 128      * To enable the fast all-vs-all kernel we need to be able to stream through all coordinates
 129      * whether they should interact or not.
 130      *
 131      * To avoid looping over the exclusions, we create a simple mask that is 1 if the interaction
 132      * should be present, otherwise 0. Since exclusions typically only occur when i & j are close,
 133      * we create a jindex array with three elements per i atom: the starting point, the point to
 134      * which we need to check exclusions, and the end point.
 135      * This way we only have to allocate a short exclusion mask per i atom.
 136      */
 137
 138     /* Allocate memory for jindex arrays */
 139     snew(aadata->jindex_gb, 3*natoms);
 140
 141     /* Pointer to lists with exclusion masks */
 142     snew(aadata->exclusion_mask_gb, natoms);
 143
 144     for (i = 0; i < natoms; i++)
 145     {
 146         /* Start */
 147         aadata->jindex_gb[3*i]       = i+1;
 148         max_offset                   = calc_maxoffset(i, natoms);
 149
 150         /* first check the max range of atoms to EXCLUDE */
 151         max_excl_offset = 0;
 152         if (!bInclude12)
 153         {
 154             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 155             {
 156                 a1 = ilist[F_GB12].iatoms[j+1];
 157                 a2 = ilist[F_GB12].iatoms[j+2];
 158
 159                 if (a1 == i)
 160                 {
 161                     k = a2-a1;
 162                 }
 163                 else if (a2 == i)
 164                 {
 165                     k = a1+natoms-a2;
 166                 }
 167                 else
 168                 {
 169                     continue;
 170                 }
 171                 if (k > 0 && k <= max_offset)
 172                 {
 173                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 174                 }
 175             }
 176         }
 177         if (!bInclude13)
 178         {
 179             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 180             {
 181                 a1 = ilist[F_GB13].iatoms[j+1];
 182                 a2 = ilist[F_GB13].iatoms[j+2];
 183
 184
 185                 if (a1 == i)
 186                 {
 187                     k = a2-a1;
 188                 }
 189                 else if (a2 == i)
 190                 {
 191                     k = a1+natoms-a2;
 192                 }
 193                 else
 194                 {
 195                     continue;
 196                 }
 197                 if (k > 0 && k <= max_offset)
 198                 {
 199                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 200                 }
 201             }
 202         }
 203         if (!bInclude14)
 204         {
 205             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 206             {
 207                 a1 = ilist[F_GB14].iatoms[j+1];
 208                 a2 = ilist[F_GB14].iatoms[j+2];
 209
 210
 211                 if (a1 == i)
 212                 {
 213                     k = a2-a1;
 214                 }
 215                 else if (a2 == i)
 216                 {
 217                     k = a1+natoms-a2;
 218                 }
 219                 else
 220                 {
 221                     continue;
 222                 }
 223                 if (k > 0 && k <= max_offset)
 224                 {
 225                     max_excl_offset = (k > max_excl_offset) ? k : max_excl_offset;
 226                 }
 227             }
 228         }
 229         max_excl_offset = (max_offset < max_excl_offset) ? max_offset : max_excl_offset;
 230
 231         aadata->jindex_gb[3*i+1] = i+1+max_excl_offset;
 232
 233         snew(aadata->exclusion_mask_gb[i], max_excl_offset);
 234
 235         /* Include everything by default */
 236         for (j = 0; j < max_excl_offset; j++)
 237         {
 238             /* Use all-ones to mark interactions that should be present, compatible with SSE */
 239             aadata->exclusion_mask_gb[i][j] = 0xFFFFFFFF;
 240         }
 241         /* Go through exclusions again */
 242         if (!bInclude12)
 243         {
 244             for (j = 0; j < ilist[F_GB12].nr; j += 3)
 245             {
 246                 a1 = ilist[F_GB12].iatoms[j+1];
 247                 a2 = ilist[F_GB12].iatoms[j+2];
 248
 249                 if (a1 == i)
 250                 {
 251                     k = a2-a1;
 252                 }
 253                 else if (a2 == i)
 254                 {
 255                     k = a1+natoms-a2;
 256                 }
 257                 else
 258                 {
 259                     continue;
 260                 }
 261                 if (k > 0 && k <= max_offset)
 262                 {
 263                     aadata->exclusion_mask_gb[i][k-1] = 0;
 264                 }
 265             }
 266         }
 267         if (!bInclude13)
 268         {
 269             for (j = 0; j < ilist[F_GB13].nr; j += 3)
 270             {
 271                 a1 = ilist[F_GB13].iatoms[j+1];
 272                 a2 = ilist[F_GB13].iatoms[j+2];
 273
 274                 if (a1 == i)
 275                 {
 276                     k = a2-a1;
 277                 }
 278                 else if (a2 == i)
 279                 {
 280                     k = a1+natoms-a2;
 281                 }
 282                 else
 283                 {
 284                     continue;
 285                 }
 286                 if (k > 0 && k <= max_offset)
 287                 {
 288                     aadata->exclusion_mask_gb[i][k-1] = 0;
 289                 }
 290             }
 291         }
 292         if (!bInclude14)
 293         {
 294             for (j = 0; j < ilist[F_GB14].nr; j += 3)
 295             {
 296                 a1 = ilist[F_GB14].iatoms[j+1];
 297                 a2 = ilist[F_GB14].iatoms[j+2];
 298
 299                 if (a1 == i)
 300                 {
 301                     k = a2-a1;
 302                 }
 303                 else if (a2 == i)
 304                 {
 305                     k = a1+natoms-a2;
 306                 }
 307                 else
 308                 {
 309                     continue;
 310                 }
 311                 if (k > 0 && k <= max_offset)
 312                 {
 313                     aadata->exclusion_mask_gb[i][k-1] = 0;
 314                 }
 315             }
 316         }
 317
 318         /* End */
 319
 320         /* End */
 321         aadata->jindex_gb[3*i+2] = i+1+max_offset;
 322     }
 323 }
 324
 325
 326 static void
 327 genborn_allvsall_setup(gmx_allvsallgb2_data_t     **  p_aadata,
 328                        t_ilist     *                  ilist,
 329                        int                            natoms,
 330                        gmx_bool                       bInclude12,
 331                        gmx_bool                       bInclude13,
 332                        gmx_bool                       bInclude14)
 333 {
 334     int                     i, j, idx;
 335     gmx_allvsallgb2_data_t *aadata;
 336     real                   *p;
 337
 338     snew(aadata, 1);
 339     *p_aadata = aadata;
 340
 341     setup_gb_exclusions_and_indices(aadata, ilist, natoms, bInclude12, bInclude13, bInclude14);
 342 }
 343
 344
 345
 346 int
 347 genborn_allvsall_calc_still_radii(t_forcerec *           fr,
 348                                   t_mdatoms *            mdatoms,
 349                                   gmx_genborn_t *        born,
 350                                   gmx_localtop_t *       top,
 351                                   real *                 x,
 352                                   void *                 work)
 353 {
 354     gmx_allvsallgb2_data_t *aadata;
 355     int                     natoms;
 356     int                     ni0, ni1;
 357     int                     nj0, nj1, nj2;
 358     int                     i, j, k, n;
 359     int              *      mask;
 360
 361     real                    ix, iy, iz;
 362     real                    jx, jy, jz;
 363     real                    dx, dy, dz;
 364     real                    rsq, rinv;
 365     real                    gpi, rai, vai;
 366     real                    prod_ai;
 367     real                    irsq, idr4, idr6;
 368     real                    raj, rvdw, ratio;
 369     real                    vaj, ccf, dccf, theta, cosq;
 370     real                    term, prod, icf4, icf6, gpi2, factor, sinq;
 371
 372     natoms              = mdatoms->nr;
 373     ni0                 = 0;
 374     ni1                 = mdatoms->homenr;
 375     factor              = 0.5*ONE_4PI_EPS0;
 376     n                   = 0;
 377
 378     aadata = *((gmx_allvsallgb2_data_t **)work);
 379
 380     if (aadata == NULL)
 381     {
 382         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 383                                FALSE, FALSE, TRUE);
 384         *((gmx_allvsallgb2_data_t **)work) = aadata;
 385     }
 386
 387
 388     for (i = 0; i < born->nr; i++)
 389     {
 390         born->gpol_still_work[i] = 0;
 391     }
 392
 393
 394     for (i = ni0; i < ni1; i++)
 395     {
 396         /* We assume shifts are NOT used for all-vs-all interactions */
 397
 398         /* Load i atom data */
 399         ix                = x[3*i];
 400         iy                = x[3*i+1];
 401         iz                = x[3*i+2];
 402
 403         gpi               = 0.0;
 404
 405         rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 406         vai     = born->vsolv[i];
 407         prod_ai = STILL_P4*vai;
 408
 409         /* Load limits for loop over neighbors */
 410         nj0              = aadata->jindex_gb[3*i];
 411         nj1              = aadata->jindex_gb[3*i+1];
 412         nj2              = aadata->jindex_gb[3*i+2];
 413
 414         mask             = aadata->exclusion_mask_gb[i];
 415
 416         /* Prologue part, including exclusion mask */
 417         for (j = nj0; j < nj1; j++, mask++)
 418         {
 419             if (*mask != 0)
 420             {
 421                 k = j%natoms;
 422
 423                 /* load j atom coordinates */
 424                 jx                = x[3*k];
 425                 jy                = x[3*k+1];
 426                 jz                = x[3*k+2];
 427
 428                 /* Calculate distance */
 429                 dx                = ix - jx;
 430                 dy                = iy - jy;
 431                 dz                = iz - jz;
 432                 rsq               = dx*dx+dy*dy+dz*dz;
 433
 434                 /* Calculate 1/r and 1/r2 */
 435                 rinv              = gmx_invsqrt(rsq);
 436                 irsq              = rinv*rinv;
 437                 idr4              = irsq*irsq;
 438                 idr6              = idr4*irsq;
 439
 440                 raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 441
 442                 rvdw  = rai + raj;
 443
 444                 ratio = rsq / (rvdw * rvdw);
 445                 vaj   = born->vsolv[k];
 446
 447
 448                 if (ratio > STILL_P5INV)
 449                 {
 450                     ccf  = 1.0;
 451                     dccf = 0.0;
 452                 }
 453                 else
 454                 {
 455                     theta = ratio*STILL_PIP5;
 456                     cosq  = cos(theta);
 457                     term  = 0.5*(1.0-cosq);
 458                     ccf   = term*term;
 459                     sinq  = 1.0 - cosq*cosq;
 460                     dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 461                 }
 462
 463                 prod          = STILL_P4*vaj;
 464                 icf4          = ccf*idr4;
 465                 icf6          = (4*ccf-dccf)*idr6;
 466
 467                 born->gpol_still_work[k] += prod_ai*icf4;
 468                 gpi                       = gpi+prod*icf4;
 469
 470                 /* Save ai->aj and aj->ai chain rule terms */
 471                 fr->dadx[n++]   = prod*icf6;
 472                 fr->dadx[n++]   = prod_ai*icf6;
 473
 474                 /* 27 flops, plus one cos(x) - estimate at 20 flops  => 47 */
 475
 476             }
 477         }
 478
 479         /* Main part, no exclusions */
 480         for (j = nj1; j < nj2; j++)
 481         {
 482             k = j%natoms;
 483
 484             /* load j atom coordinates */
 485             jx                = x[3*k];
 486             jy                = x[3*k+1];
 487             jz                = x[3*k+2];
 488
 489             /* Calculate distance */
 490             dx                = ix - jx;
 491             dy                = iy - jy;
 492             dz                = iz - jz;
 493             rsq               = dx*dx+dy*dy+dz*dz;
 494
 495             /* Calculate 1/r and 1/r2 */
 496             rinv              = gmx_invsqrt(rsq);
 497             irsq              = rinv*rinv;
 498             idr4              = irsq*irsq;
 499             idr6              = idr4*irsq;
 500
 501             raj = top->atomtypes.gb_radius[mdatoms->typeA[k]];
 502
 503             rvdw  = rai + raj;
 504
 505             ratio = rsq / (rvdw * rvdw);
 506             vaj   = born->vsolv[k];
 507
 508             if (ratio > STILL_P5INV)
 509             {
 510                 ccf  = 1.0;
 511                 dccf = 0.0;
 512             }
 513             else
 514             {
 515                 theta = ratio*STILL_PIP5;
 516                 cosq  = cos(theta);
 517                 term  = 0.5*(1.0-cosq);
 518                 ccf   = term*term;
 519                 sinq  = 1.0 - cosq*cosq;
 520                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 521             }
 522
 523             prod          = STILL_P4*vaj;
 524             icf4          = ccf*idr4;
 525             icf6          = (4*ccf-dccf)*idr6;
 526
 527             born->gpol_still_work[k] += prod_ai*icf4;
 528             gpi                       = gpi+prod*icf4;
 529
 530             /* Save ai->aj and aj->ai chain rule terms */
 531             fr->dadx[n++]   = prod*icf6;
 532             fr->dadx[n++]   = prod_ai*icf6;
 533         }
 534         born->gpol_still_work[i] += gpi;
 535     }
 536
 537     /* Parallel summations would go here if ever implemented with DD */
 538
 539     /* Calculate the radii */
 540     for (i = 0; i < natoms; i++)
 541     {
 542         if (born->use[i] != 0)
 543         {
 544             gpi             = born->gpol[i]+born->gpol_still_work[i];
 545             gpi2            = gpi * gpi;
 546             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 547             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 548         }
 549     }
 550
 551     return 0;
 552 }
 553
 554
 555
 556 int
 557 genborn_allvsall_calc_hct_obc_radii(t_forcerec *           fr,
 558                                     t_mdatoms *            mdatoms,
 559                                     gmx_genborn_t *        born,
 560                                     int                    gb_algorithm,
 561                                     gmx_localtop_t *       top,
 562                                     real *                 x,
 563                                     void *                 work)
 564 {
 565     gmx_allvsallgb2_data_t *aadata;
 566     int                     natoms;
 567     int                     ni0, ni1;
 568     int                     nj0, nj1, nj2;
 569     int                     i, j, k, n;
 570     int              *      mask;
 571
 572     real                    ix, iy, iz;
 573     real                    jx, jy, jz;
 574     real                    dx, dy, dz;
 575     real                    rsq, rinv;
 576     real                    prod, raj;
 577     real                    rai, doffset, rai_inv, rai_inv2, sk_ai, sk2_ai, sum_ai;
 578     real                    dr, sk, lij, dlij, lij2, lij3, uij2, uij3, diff2, uij, log_term;
 579     real                    lij_inv, sk2, sk2_rinv, tmp, t1, t2, t3, raj_inv, sum_ai2, sum_ai3, tsum;
 580     real                    tchain;
 581     real                    dadxi, dadxj;
 582     real                    rad, min_rad;
 583
 584     natoms              = mdatoms->nr;
 585     ni0                 = 0;
 586     ni1                 = mdatoms->homenr;
 587
 588     n       = 0;
 589     prod    = 0;
 590     raj     = 0;
 591     doffset = born->gb_doffset;
 592
 593     aadata = *((gmx_allvsallgb2_data_t **)work);
 594
 595     if (aadata == NULL)
 596     {
 597         genborn_allvsall_setup(&aadata, top->idef.il, mdatoms->nr,
 598                                TRUE, TRUE, TRUE);
 599         *((gmx_allvsallgb2_data_t **)work) = aadata;
 600     }
 601
 602     for (i = 0; i < born->nr; i++)
 603     {
 604         born->gpol_hct_work[i] = 0;
 605     }
 606
 607     for (i = ni0; i < ni1; i++)
 608     {
 609         /* We assume shifts are NOT used for all-vs-all interactions */
 610
 611         /* Load i atom data */
 612         ix                = x[3*i];
 613         iy                = x[3*i+1];
 614         iz                = x[3*i+2];
 615
 616         rai      = top->atomtypes.gb_radius[mdatoms->typeA[i]]-doffset;
 617         rai_inv  = 1.0/rai;
 618
 619         sk_ai    = born->param[i];
 620         sk2_ai   = sk_ai*sk_ai;
 621
 622         sum_ai   = 0;
 623
 624         /* Load limits for loop over neighbors */
 625         nj0              = aadata->jindex_gb[3*i];
 626         nj1              = aadata->jindex_gb[3*i+1];
 627         nj2              = aadata->jindex_gb[3*i+2];
 628
 629         mask             = aadata->exclusion_mask_gb[i];
 630
 631         /* Prologue part, including exclusion mask */
 632         for (j = nj0; j < nj1; j++, mask++)
 633         {
 634             if (*mask != 0)
 635             {
 636                 k = j%natoms;
 637
 638                 /* load j atom coordinates */
 639                 jx                = x[3*k];
 640                 jy                = x[3*k+1];
 641                 jz                = x[3*k+2];
 642
 643                 /* Calculate distance */
 644                 dx                = ix - jx;
 645                 dy                = iy - jy;
 646                 dz                = iz - jz;
 647                 rsq               = dx*dx+dy*dy+dz*dz;
 648
 649                 /* Calculate 1/r and 1/r2 */
 650                 rinv              = gmx_invsqrt(rsq);
 651                 dr                = rsq*rinv;
 652
 653                 /* sk is precalculated in init_gb() */
 654                 sk    = born->param[k];
 655                 raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 656
 657                 /* aj -> ai interaction */
 658
 659
 660                 if (rai < dr+sk)
 661                 {
 662                     lij       = 1.0/(dr-sk);
 663                     dlij      = 1.0;
 664
 665                     if (rai > dr-sk)
 666                     {
 667                         lij  = rai_inv;
 668                         dlij = 0.0;
 669                     }
 670
 671                     uij      = 1.0/(dr+sk);
 672                     lij2     = lij  * lij;
 673                     lij3     = lij2 * lij;
 674                     uij2     = uij  * uij;
 675                     uij3     = uij2 * uij;
 676
 677                     diff2    = uij2-lij2;
 678
 679                     lij_inv  = gmx_invsqrt(lij2);
 680                     sk2      = sk*sk;
 681                     sk2_rinv = sk2*rinv;
 682                     prod     = 0.25*sk2_rinv;
 683
 684                     log_term = log(uij*lij_inv);
 685                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 686                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 687
 688                     if (rai < sk-dr)
 689                     {
 690                         tmp = tmp + 2.0 * (rai_inv-lij);
 691                     }
 692
 693                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 694                     t2      = -0.5*uij2 - prod*uij3 + 0.25*(uij*rinv+uij3*dr);
 695
 696                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 697
 698                     dadxi = (dlij*t1+t2+t3)*rinv;
 699
 700                     sum_ai += 0.5*tmp;
 701                 }
 702                 else
 703                 {
 704                     dadxi = 0.0;
 705                 }
 706
 707                 /* ai -> aj interaction */
 708                 if (raj < dr + sk_ai)
 709                 {
 710                     lij     = 1.0/(dr-sk_ai);
 711                     dlij    = 1.0;
 712                     raj_inv = 1.0/raj;
 713
 714                     if (raj > dr-sk_ai)
 715                     {
 716                         lij  = raj_inv;
 717                         dlij = 0.0;
 718                     }
 719
 720                     lij2     = lij  * lij;
 721                     lij3     = lij2 * lij;
 722
 723                     uij      = 1.0/(dr+sk_ai);
 724                     uij2     = uij  * uij;
 725                     uij3     = uij2 * uij;
 726
 727                     diff2    = uij2-lij2;
 728
 729                     lij_inv  = gmx_invsqrt(lij2);
 730                     sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 731                     sk2_rinv = sk2*rinv;
 732                     prod     = 0.25 * sk2_rinv;
 733
 734                     /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 735                     log_term = log(uij*lij_inv);
 736
 737                     tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 738
 739                     if (raj < sk_ai-dr)
 740                     {
 741                         tmp     = tmp + 2.0 * (raj_inv-lij);
 742                     }
 743
 744                     t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 745                     t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 746                     t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 747
 748                     dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule  */
 749
 750                     born->gpol_hct_work[k] += 0.5*tmp;
 751                 }
 752                 else
 753                 {
 754                     dadxj = 0.0;
 755                 }
 756                 fr->dadx[n++] = dadxi;
 757                 fr->dadx[n++] = dadxj;
 758
 759             }
 760         }
 761
 762         /* Main part, no exclusions */
 763         for (j = nj1; j < nj2; j++)
 764         {
 765             k = j%natoms;
 766
 767             /* load j atom coordinates */
 768             jx                = x[3*k];
 769             jy                = x[3*k+1];
 770             jz                = x[3*k+2];
 771
 772             /* Calculate distance */
 773             dx                = ix - jx;
 774             dy                = iy - jy;
 775             dz                = iz - jz;
 776             rsq               = dx*dx+dy*dy+dz*dz;
 777
 778             /* Calculate 1/r and 1/r2 */
 779             rinv              = gmx_invsqrt(rsq);
 780             dr                = rsq*rinv;
 781
 782             /* sk is precalculated in init_gb() */
 783             sk    = born->param[k];
 784             raj   = top->atomtypes.gb_radius[mdatoms->typeA[k]]-doffset;
 785
 786             /* aj -> ai interaction */
 787             if (rai < dr+sk)
 788             {
 789                 lij       = 1.0/(dr-sk);
 790                 dlij      = 1.0;
 791
 792                 if (rai > dr-sk)
 793                 {
 794                     lij  = rai_inv;
 795                     dlij = 0.0;
 796                 }
 797
 798                 uij      = 1.0/(dr+sk);
 799                 lij2     = lij  * lij;
 800                 lij3     = lij2 * lij;
 801                 uij2     = uij  * uij;
 802                 uij3     = uij2 * uij;
 803
 804                 diff2    = uij2-lij2;
 805
 806                 lij_inv  = gmx_invsqrt(lij2);
 807                 sk2      = sk*sk;
 808                 sk2_rinv = sk2*rinv;
 809                 prod     = 0.25*sk2_rinv;
 810
 811                 log_term = log(uij*lij_inv);
 812                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 813                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 814
 815                 if (rai < sk-dr)
 816                 {
 817                     tmp = tmp + 2.0 * (rai_inv-lij);
 818                 }
 819
 820                 /* duij    = 1.0; */
 821                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 822                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 823                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 824
 825                 dadxi = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 826
 827                 sum_ai += 0.5*tmp;
 828             }
 829             else
 830             {
 831                 dadxi = 0.0;
 832             }
 833
 834             /* ai -> aj interaction */
 835             if (raj < dr + sk_ai)
 836             {
 837                 lij     = 1.0/(dr-sk_ai);
 838                 dlij    = 1.0;
 839                 raj_inv = 1.0/raj;
 840
 841                 if (raj > dr-sk_ai)
 842                 {
 843                     lij  = raj_inv;
 844                     dlij = 0.0;
 845                 }
 846
 847                 lij2     = lij  * lij;
 848                 lij3     = lij2 * lij;
 849
 850                 uij      = 1.0/(dr+sk_ai);
 851                 uij2     = uij  * uij;
 852                 uij3     = uij2 * uij;
 853
 854                 diff2    = uij2-lij2;
 855
 856                 lij_inv  = gmx_invsqrt(lij2);
 857                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 858                 sk2_rinv = sk2*rinv;
 859                 prod     = 0.25 * sk2_rinv;
 860
 861                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
 862                 log_term = log(uij*lij_inv);
 863
 864                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
 865
 866                 if (raj < sk_ai-dr)
 867                 {
 868                     tmp     = tmp + 2.0 * (raj_inv-lij);
 869                 }
 870
 871                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 872                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 873                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 874
 875                 dadxj = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule      */
 876
 877                 born->gpol_hct_work[k] += 0.5*tmp;
 878             }
 879             else
 880             {
 881                 dadxj = 0.0;
 882             }
 883             fr->dadx[n++] = dadxi;
 884             fr->dadx[n++] = dadxj;
 885         }
 886         born->gpol_hct_work[i] += sum_ai;
 887     }
 888
 889     /* Parallel summations would go here if ever implemented with DD */
 890
 891     if (gb_algorithm == egbHCT)
 892     {
 893         /* HCT */
 894         for (i = 0; i < natoms; i++)
 895         {
 896             if (born->use[i] != 0)
 897             {
 898                 rai     = top->atomtypes.gb_radius[mdatoms->typeA[i]]-born->gb_doffset;
 899                 sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 900                 min_rad = rai + born->gb_doffset;
 901                 rad     = 1.0/sum_ai;
 902
 903                 born->bRad[i]   = rad > min_rad ? rad : min_rad;
 904                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 905             }
 906         }
 907
 908     }
 909     else
 910     {
 911         /* OBC */
 912         /* Calculate the radii */
 913         for (i = 0; i < natoms; i++)
 914         {
 915             if (born->use[i] != 0)
 916             {
 917                 rai        = top->atomtypes.gb_radius[mdatoms->typeA[i]];
 918                 rai_inv2   = 1.0/rai;
 919                 rai        = rai-doffset;
 920                 rai_inv    = 1.0/rai;
 921                 sum_ai     = rai * born->gpol_hct_work[i];
 922                 sum_ai2    = sum_ai  * sum_ai;
 923                 sum_ai3    = sum_ai2 * sum_ai;
 924
 925                 tsum          = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
 926                 born->bRad[i] = rai_inv - tsum*rai_inv2;
 927                 born->bRad[i] = 1.0 / born->bRad[i];
 928
 929                 fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 930
 931                 tchain         = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
 932                 born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
 933             }
 934         }
 935     }
 936     return 0;
 937 }
 938
 939
 940
 941
 942
 943 int
 944 genborn_allvsall_calc_chainrule(t_forcerec *           fr,
 945                                 t_mdatoms *            mdatoms,
 946                                 gmx_genborn_t *        born,
 947                                 real *                 x,
 948                                 real *                 f,
 949                                 int                    gb_algorithm,
 950                                 void *                 work)
 951 {
 952     gmx_allvsallgb2_data_t *aadata;
 953     int                     natoms;
 954     int                     ni0, ni1;
 955     int                     nj0, nj1, nj2;
 956     int                     i, j, k, n;
 957     int                     idx;
 958     int              *      mask;
 959
 960     real                    ix, iy, iz;
 961     real                    fix, fiy, fiz;
 962     real                    jx, jy, jz;
 963     real                    dx, dy, dz;
 964     real                    tx, ty, tz;
 965     real                    rbai, rbaj, fgb, fgb_ai, rbi;
 966     real              *     rb;
 967     real              *     dadx;
 968
 969     natoms              = mdatoms->nr;
 970     ni0                 = 0;
 971     ni1                 = mdatoms->homenr;
 972     dadx                = fr->dadx;
 973
 974     aadata = (gmx_allvsallgb2_data_t *)work;
 975
 976     n  = 0;
 977     rb = born->work;
 978
 979     /* Loop to get the proper form for the Born radius term */
 980     if (gb_algorithm == egbSTILL)
 981     {
 982         for (i = 0; i < natoms; i++)
 983         {
 984             rbi   = born->bRad[i];
 985             rb[i] = (2 * rbi * rbi * fr->dvda[i])/ONE_4PI_EPS0;
 986         }
 987     }
 988     else if (gb_algorithm == egbHCT)
 989     {
 990         for (i = 0; i < natoms; i++)
 991         {
 992             rbi   = born->bRad[i];
 993             rb[i] = rbi * rbi * fr->dvda[i];
 994         }
 995     }
 996     else if (gb_algorithm == egbOBC)
 997     {
 998         for (idx = 0; idx < natoms; idx++)
 999         {
1000             rbi     = born->bRad[idx];
1001             rb[idx] = rbi * rbi * born->drobc[idx] * fr->dvda[idx];
1002         }
1003     }
1004
1005     for (i = ni0; i < ni1; i++)
1006     {
1007         /* We assume shifts are NOT used for all-vs-all interactions */
1008
1009         /* Load i atom data */
1010         ix                = x[3*i];
1011         iy                = x[3*i+1];
1012         iz                = x[3*i+2];
1013
1014         fix               = 0;
1015         fiy               = 0;
1016         fiz               = 0;
1017
1018         rbai              = rb[i];
1019
1020         /* Load limits for loop over neighbors */
1021         nj0              = aadata->jindex_gb[3*i];
1022         nj1              = aadata->jindex_gb[3*i+1];
1023         nj2              = aadata->jindex_gb[3*i+2];
1024
1025         mask             = aadata->exclusion_mask_gb[i];
1026
1027         /* Prologue part, including exclusion mask */
1028         for (j = nj0; j < nj1; j++, mask++)
1029         {
1030             if (*mask != 0)
1031             {
1032                 k = j%natoms;
1033
1034                 /* load j atom coordinates */
1035                 jx                = x[3*k];
1036                 jy                = x[3*k+1];
1037                 jz                = x[3*k+2];
1038
1039                 /* Calculate distance */
1040                 dx                = ix - jx;
1041                 dy                = iy - jy;
1042                 dz                = iz - jz;
1043
1044                 rbaj              = rb[k];
1045
1046                 fgb     = rbai*dadx[n++];
1047                 fgb_ai  = rbaj*dadx[n++];
1048
1049                 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1050                 fgb     = fgb + fgb_ai;
1051
1052                 tx      = fgb * dx;
1053                 ty      = fgb * dy;
1054                 tz      = fgb * dz;
1055
1056                 fix     = fix + tx;
1057                 fiy     = fiy + ty;
1058                 fiz     = fiz + tz;
1059
1060                 /* Update force on atom aj */
1061                 f[3*k]   = f[3*k] - tx;
1062                 f[3*k+1] = f[3*k+1] - ty;
1063                 f[3*k+2] = f[3*k+2] - tz;
1064             }
1065         }
1066
1067         /* Main part, no exclusions */
1068         for (j = nj1; j < nj2; j++)
1069         {
1070             k = j%natoms;
1071
1072             /* load j atom coordinates */
1073             jx                = x[3*k];
1074             jy                = x[3*k+1];
1075             jz                = x[3*k+2];
1076
1077             /* Calculate distance */
1078             dx                = ix - jx;
1079             dy                = iy - jy;
1080             dz                = iz - jz;
1081
1082             rbaj              = rb[k];
1083
1084             fgb     = rbai*dadx[n++];
1085             fgb_ai  = rbaj*dadx[n++];
1086
1087             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1088             fgb     = fgb + fgb_ai;
1089
1090             tx      = fgb * dx;
1091             ty      = fgb * dy;
1092             tz      = fgb * dz;
1093
1094             fix     = fix + tx;
1095             fiy     = fiy + ty;
1096             fiz     = fiz + tz;
1097
1098             /* Update force on atom aj */
1099             f[3*k]   = f[3*k] - tx;
1100             f[3*k+1] = f[3*k+1] - ty;
1101             f[3*k+2] = f[3*k+2] - tz;
1102         }
1103         /* Update force and shift forces on atom ai */
1104         f[3*i]   = f[3*i] + fix;
1105         f[3*i+1] = f[3*i+1] + fiy;
1106         f[3*i+2] = f[3*i+2] + fiz;
1107     }
1108
1109     return 0;
1110 }