src/mdlib/vsite.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <stdio.h>
  43 #include "typedefs.h"
  44 #include "vsite.h"
  45 #include "macros.h"
  46 #include "smalloc.h"
  47 #include "nrnb.h"
  48 #include "vec.h"
  49 #include "mvdata.h"
  50 #include "network.h"
  51 #include "mshift.h"
  52 #include "pbc.h"
  53 #include "domdec.h"
  54 #include "partdec.h"
  55 #include "mtop_util.h"
  56 #include "gmx_omp_nthreads.h"
  57 #include "gmx_omp.h"
  58
  59 /* Routines to send/recieve coordinates and force
  60  * of constructing atoms.
  61  */
  62
  63 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
  64 {
  65         rvec *sendbuf;
  66         rvec *recvbuf;
  67         int i,ia;
  68
  69         sendbuf = vsitecomm->send_buf;
  70         recvbuf = vsitecomm->recv_buf;
  71
  72
  73                 /* Prepare pulse left by copying to send buffer */
  74                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
  75                 {
  76                         ia = vsitecomm->left_export_construct[i];
  77                         copy_rvec(x[ia],sendbuf[i]);
  78                 }
  79
  80                 /* Pulse coordinates left */
  81                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
  82
  83                 /* Copy from receive buffer to coordinate array */
  84                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
  85                 {
  86                         ia = vsitecomm->right_import_construct[i];
  87                         copy_rvec(recvbuf[i],x[ia]);
  88                 }
  89
  90                 /* Prepare pulse right by copying to send buffer */
  91                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
  92                 {
  93                         ia = vsitecomm->right_export_construct[i];
  94                         copy_rvec(x[ia],sendbuf[i]);
  95                 }
  96
  97                 /* Pulse coordinates right */
  98                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
  99
 100                 /* Copy from receive buffer to coordinate array */
 101                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 102                 {
 103                         ia = vsitecomm->left_import_construct[i];
 104                         copy_rvec(recvbuf[i],x[ia]);
 105                 }
 106 }
 107
 108
 109 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 110 {
 111         rvec *sendbuf;
 112         rvec *recvbuf;
 113         int i,ia;
 114
 115         sendbuf = vsitecomm->send_buf;
 116         recvbuf = vsitecomm->recv_buf;
 117
 118                 /* Prepare pulse right by copying to send buffer */
 119                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 120                 {
 121                         ia = vsitecomm->right_import_construct[i];
 122                         copy_rvec(f[ia],sendbuf[i]);
 123                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 124                 }
 125
 126                 /* Pulse forces right */
 127                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
 128
 129                 /* Copy from receive buffer to coordinate array */
 130                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
 131                 {
 132                         ia = vsitecomm->left_export_construct[i];
 133                         rvec_inc(f[ia],recvbuf[i]);
 134                 }
 135
 136                 /* Prepare pulse left by copying to send buffer */
 137                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 138                 {
 139                         ia = vsitecomm->left_import_construct[i];
 140                         copy_rvec(f[ia],sendbuf[i]);
 141                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 142                 }
 143
 144                 /* Pulse coordinates left */
 145                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
 146
 147                 /* Copy from receive buffer to coordinate array */
 148                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
 149                 {
 150                         ia = vsitecomm->right_export_construct[i];
 151                         rvec_inc(f[ia],recvbuf[i]);
 152                 }
 153
 154         /* All forces are now on the home processors */
 155 }
 156
 157
 158 static void
 159 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 160 {
 161         int i,ia;
 162
 163         for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 164         {
 165                 ia = vsitecomm->left_import_construct[i];
 166                 clear_rvec(f[ia]);
 167         }
 168         for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 169         {
 170                 ia = vsitecomm->right_import_construct[i];
 171                 clear_rvec(f[ia]);
 172         }
 173 }
 174
 175
 176
 177 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 178 {
 179   if (pbc) {
 180     return pbc_dx_aiuc(pbc,xi,xj,dx);
 181   }
 182   else {
 183     rvec_sub(xi,xj,dx);
 184     return CENTRAL;
 185   }
 186 }
 187
 188 /* Vsite construction routines */
 189
 190 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
 191 {
 192   real b;
 193   rvec dx;
 194
 195   b=1.0-a;
 196   /* 1 flop */
 197
 198   if (pbc) {
 199     pbc_dx_aiuc(pbc,xj,xi,dx);
 200     x[XX] = xi[XX] + a*dx[XX];
 201     x[YY] = xi[YY] + a*dx[YY];
 202     x[ZZ] = xi[ZZ] + a*dx[ZZ];
 203   } else {
 204     x[XX] = b*xi[XX] + a*xj[XX];
 205     x[YY] = b*xi[YY] + a*xj[YY];
 206     x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 207     /* 9 Flops */
 208   }
 209
 210   /* TOTAL: 10 flops */
 211 }
 212
 213 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 214                           t_pbc *pbc)
 215 {
 216   real c;
 217   rvec dxj,dxk;
 218
 219   c=1.0-a-b;
 220   /* 2 flops */
 221
 222   if (pbc) {
 223     pbc_dx_aiuc(pbc,xj,xi,dxj);
 224     pbc_dx_aiuc(pbc,xk,xi,dxk);
 225     x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 226     x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 227     x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 228   } else {
 229     x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 230     x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 231     x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 232   /* 15 Flops */
 233   }
 234
 235   /* TOTAL: 17 flops */
 236 }
 237
 238 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 239                             t_pbc *pbc)
 240 {
 241   rvec xij,xjk,temp;
 242   real c;
 243
 244   pbc_rvec_sub(pbc,xj,xi,xij);
 245   pbc_rvec_sub(pbc,xk,xj,xjk);
 246   /* 6 flops */
 247
 248   /* temp goes from i to a point on the line jk */
 249   temp[XX] = xij[XX] + a*xjk[XX];
 250   temp[YY] = xij[YY] + a*xjk[YY];
 251   temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 252   /* 6 flops */
 253
 254   c=b*gmx_invsqrt(iprod(temp,temp));
 255   /* 6 + 10 flops */
 256
 257   x[XX] = xi[XX] + c*temp[XX];
 258   x[YY] = xi[YY] + c*temp[YY];
 259   x[ZZ] = xi[ZZ] + c*temp[ZZ];
 260   /* 6 Flops */
 261
 262   /* TOTAL: 34 flops */
 263 }
 264
 265 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
 266 {
 267   rvec xij,xjk,xp;
 268   real a1,b1,c1,invdij;
 269
 270   pbc_rvec_sub(pbc,xj,xi,xij);
 271   pbc_rvec_sub(pbc,xk,xj,xjk);
 272   /* 6 flops */
 273
 274   invdij = gmx_invsqrt(iprod(xij,xij));
 275   c1 = invdij * invdij * iprod(xij,xjk);
 276   xp[XX] = xjk[XX] - c1*xij[XX];
 277   xp[YY] = xjk[YY] - c1*xij[YY];
 278   xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 279   a1 = a*invdij;
 280   b1 = b*gmx_invsqrt(iprod(xp,xp));
 281   /* 45 */
 282
 283   x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 284   x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 285   x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 286   /* 12 Flops */
 287
 288   /* TOTAL: 63 flops */
 289 }
 290
 291 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
 292                              real a,real b,real c,t_pbc *pbc)
 293 {
 294   rvec xij,xik,temp;
 295
 296   pbc_rvec_sub(pbc,xj,xi,xij);
 297   pbc_rvec_sub(pbc,xk,xi,xik);
 298   cprod(xij,xik,temp);
 299   /* 15 Flops */
 300
 301   x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 302   x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 303   x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 304   /* 18 Flops */
 305
 306   /* TOTAL: 33 flops */
 307 }
 308
 309 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 310                             real a,real b,real c,t_pbc *pbc)
 311 {
 312   rvec xij,xjk,xjl,temp;
 313   real d;
 314
 315   pbc_rvec_sub(pbc,xj,xi,xij);
 316   pbc_rvec_sub(pbc,xk,xj,xjk);
 317   pbc_rvec_sub(pbc,xl,xj,xjl);
 318   /* 9 flops */
 319
 320   /* temp goes from i to a point on the plane jkl */
 321   temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 322   temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 323   temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 324   /* 12 flops */
 325
 326   d=c*gmx_invsqrt(iprod(temp,temp));
 327   /* 6 + 10 flops */
 328
 329   x[XX] = xi[XX] + d*temp[XX];
 330   x[YY] = xi[YY] + d*temp[YY];
 331   x[ZZ] = xi[ZZ] + d*temp[ZZ];
 332   /* 6 Flops */
 333
 334   /* TOTAL: 43 flops */
 335 }
 336
 337
 338 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 339                              real a,real b,real c,t_pbc *pbc)
 340 {
 341     rvec xij,xik,xil,ra,rb,rja,rjb,rm;
 342     real d;
 343
 344     pbc_rvec_sub(pbc,xj,xi,xij);
 345     pbc_rvec_sub(pbc,xk,xi,xik);
 346     pbc_rvec_sub(pbc,xl,xi,xil);
 347     /* 9 flops */
 348
 349     ra[XX] = a*xik[XX];
 350     ra[YY] = a*xik[YY];
 351     ra[ZZ] = a*xik[ZZ];
 352
 353     rb[XX] = b*xil[XX];
 354     rb[YY] = b*xil[YY];
 355     rb[ZZ] = b*xil[ZZ];
 356
 357     /* 6 flops */
 358
 359     rvec_sub(ra,xij,rja);
 360     rvec_sub(rb,xij,rjb);
 361     /* 6 flops */
 362
 363     cprod(rja,rjb,rm);
 364     /* 9 flops */
 365
 366     d=c*gmx_invsqrt(norm2(rm));
 367     /* 5+5+1 flops */
 368
 369     x[XX] = xi[XX] + d*rm[XX];
 370     x[YY] = xi[YY] + d*rm[YY];
 371     x[ZZ] = xi[ZZ] + d*rm[ZZ];
 372     /* 6 Flops */
 373
 374     /* TOTAL: 47 flops */
 375 }
 376
 377
 378 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 379                          rvec *x, t_pbc *pbc)
 380 {
 381   rvec xs,x1,dx;
 382   dvec dsum;
 383   int  n3,av,ai,i;
 384   real a;
 385
 386   n3 = 3*ip[ia[0]].vsiten.n;
 387   av = ia[1];
 388   ai = ia[2];
 389   copy_rvec(x[ai],x1);
 390   clear_dvec(dsum);
 391   for(i=3; i<n3; i+=3) {
 392     ai = ia[i+2];
 393     a = ip[ia[i]].vsiten.a;
 394     if (pbc) {
 395       pbc_dx_aiuc(pbc,x[ai],x1,dx);
 396     } else {
 397       rvec_sub(x[ai],x1,dx);
 398     }
 399     dsum[XX] += a*dx[XX];
 400     dsum[YY] += a*dx[YY];
 401     dsum[ZZ] += a*dx[ZZ];
 402     /* 9 Flops */
 403   }
 404
 405   x[av][XX] = x1[XX] + dsum[XX];
 406   x[av][YY] = x1[YY] + dsum[YY];
 407   x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 408
 409   return n3;
 410 }
 411
 412
 413 void construct_vsites_thread(gmx_vsite_t *vsite,
 414                              rvec x[],t_nrnb *nrnb,
 415                              real dt,rvec *v,
 416                              t_iparams ip[],t_ilist ilist[],
 417                              t_pbc *pbc_null)
 418 {
 419     gmx_bool  bPBCAll;
 420     rvec      xpbc,xv,vv,dx;
 421     real      a1,b1,c1,inv_dt;
 422     int       i,inc,ii,nra,nr,tp,ftype;
 423     t_iatom   avsite,ai,aj,ak,al,pbc_atom;
 424     t_iatom   *ia;
 425     t_pbc     *pbc_null2;
 426     int       *vsite_pbc,ishift;
 427     rvec      reftmp,vtmp,rtmp;
 428
 429     if (v != NULL)
 430     {
 431         inv_dt = 1.0/dt;
 432     }
 433     else
 434     {
 435         inv_dt = 1.0;
 436     }
 437
 438     bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
 439
 440     pbc_null2 = NULL;
 441     vsite_pbc = NULL;
 442     for(ftype=0; (ftype<F_NRE); ftype++)
 443     {
 444         if ((interaction_function[ftype].flags & IF_VSITE) &&
 445             ilist[ftype].nr > 0)
 446         {
 447             nra    = interaction_function[ftype].nratoms;
 448             inc    = 1 + nra;
 449             nr     = ilist[ftype].nr;
 450             ia     = ilist[ftype].iatoms;
 451
 452             if (bPBCAll)
 453             {
 454                 pbc_null2 = pbc_null;
 455             }
 456             else if (pbc_null != NULL)
 457             {
 458                 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 459             }
 460
 461             for(i=0; i<nr; )
 462             {
 463                 tp   = ia[0];
 464
 465                 /* The vsite and constructing atoms */
 466                 avsite = ia[1];
 467                 ai   = ia[2];
 468                 aj   = ia[3];
 469
 470                 /* Constants for constructing vsites */
 471                 a1   = ip[tp].vsite.a;
 472                 /* Check what kind of pbc we need to use */
 473                 if (bPBCAll)
 474                 {
 475                     /* No charge groups, vsite follows its own pbc */
 476                     pbc_atom = avsite;
 477                     copy_rvec(x[avsite],xpbc);
 478                 }
 479                 else if (vsite_pbc != NULL)
 480                 {
 481                     pbc_atom = vsite_pbc[i/(1+nra)];
 482                     if (pbc_atom > -2)
 483                     {
 484                         if (pbc_atom >= 0)
 485                         {
 486                             /* We need to copy the coordinates here,
 487                              * single for single atom cg's pbc_atom
 488                              * is the vsite itself.
 489                              */
 490                             copy_rvec(x[pbc_atom],xpbc);
 491                         }
 492                         pbc_null2 = pbc_null;
 493                     }
 494                     else
 495                     {
 496                         pbc_null2 = NULL;
 497                     }
 498                 }
 499                 else
 500                 {
 501                     pbc_atom = -2;
 502                 }
 503                 /* Copy the old position */
 504                 copy_rvec(x[avsite],xv);
 505
 506                 /* Construct the vsite depending on type */
 507                 switch (ftype)
 508                 {
 509                 case F_VSITE2:
 510                     constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
 511                     break;
 512                 case F_VSITE3:
 513                     ak = ia[4];
 514                     b1 = ip[tp].vsite.b;
 515                     constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 516                     break;
 517                 case F_VSITE3FD:
 518                     ak = ia[4];
 519                     b1 = ip[tp].vsite.b;
 520                     constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 521                     break;
 522                 case F_VSITE3FAD:
 523                     ak = ia[4];
 524                     b1 = ip[tp].vsite.b;
 525                     constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 526                     break;
 527                 case F_VSITE3OUT:
 528                     ak = ia[4];
 529                     b1 = ip[tp].vsite.b;
 530                     c1 = ip[tp].vsite.c;
 531                     constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
 532                     break;
 533                 case F_VSITE4FD:
 534                     ak = ia[4];
 535                     al = ia[5];
 536                     b1 = ip[tp].vsite.b;
 537                     c1 = ip[tp].vsite.c;
 538                     constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 539                                     pbc_null2);
 540                     break;
 541                 case F_VSITE4FDN:
 542                     ak = ia[4];
 543                     al = ia[5];
 544                     b1 = ip[tp].vsite.b;
 545                     c1 = ip[tp].vsite.c;
 546                     constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 547                                      pbc_null2);
 548                     break;
 549                 case F_VSITEN:
 550                     inc = constr_vsiten(ia,ip,x,pbc_null2);
 551                     break;
 552                 default:
 553                     gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 554                               ftype,__FILE__,__LINE__);
 555                 }
 556
 557                 if (pbc_atom >= 0)
 558                 {
 559                     /* Match the pbc of this vsite to the rest of its charge group */
 560                     ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
 561                     if (ishift != CENTRAL)
 562                     {
 563                         rvec_add(xpbc,dx,x[avsite]);
 564                     }
 565                 }
 566                 if (v != NULL)
 567                 {
 568                     /* Calculate velocity of vsite... */
 569                     rvec_sub(x[avsite],xv,vv);
 570                     svmul(inv_dt,vv,v[avsite]);
 571                 }
 572
 573                 /* Increment loop variables */
 574                 i  += inc;
 575                 ia += inc;
 576             }
 577         }
 578     }
 579 }
 580
 581 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
 582                       rvec x[],t_nrnb *nrnb,
 583                       real dt,rvec *v,
 584                       t_iparams ip[],t_ilist ilist[],
 585                       int ePBC,gmx_bool bMolPBC,t_graph *graph,
 586                       t_commrec *cr,matrix box)
 587 {
 588     t_pbc     pbc,*pbc_null;
 589     gmx_bool  bDomDec;
 590     int       nthreads;
 591
 592     bDomDec = cr && DOMAINDECOMP(cr);
 593
 594     /* We only need to do pbc when we have inter-cg vsites */
 595     if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
 596     {
 597         /* This is wasting some CPU time as we now do this multiple times
 598          * per MD step. But how often do we have vsites with full pbc?
 599          */
 600         pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
 601     }
 602     else
 603     {
 604         pbc_null = NULL;
 605     }
 606
 607     if (cr)
 608     {
 609         if (bDomDec)
 610         {
 611             dd_move_x_vsites(cr->dd,box,x);
 612         }
 613         else if (vsite->bPDvsitecomm)
 614         {
 615             /* I'm not sure whether the periodicity and shift are guaranteed
 616              * to be consistent between different nodes when running e.g. polymers
 617              * in parallel. In this special case we thus unshift/shift,
 618              * but only when necessary. This is to make sure the coordinates
 619              * we move don't end up a box away...
 620              */
 621             if (graph != NULL)
 622             {
 623                 unshift_self(graph,box,x);
 624             }
 625
 626             move_construct_x(vsite->vsitecomm,x,cr);
 627
 628             if (graph != NULL)
 629             {
 630                 shift_self(graph,box,x);
 631             }
 632         }
 633     }
 634
 635     if (vsite->nthreads == 1)
 636     {
 637         construct_vsites_thread(vsite,
 638                                 x,nrnb,dt,v,
 639                                 ip,ilist,
 640                                 pbc_null);
 641     }
 642     else
 643     {
 644 #pragma omp parallel num_threads(vsite->nthreads)
 645         {
 646             construct_vsites_thread(vsite,
 647                                     x,nrnb,dt,v,
 648                                     ip,vsite->tdata[gmx_omp_get_thread_num()].ilist,
 649                                     pbc_null);
 650         }
 651         /* Now we can construct the vsites that might depend on other vsites */
 652         construct_vsites_thread(vsite,
 653                                 x,nrnb,dt,v,
 654                                 ip,vsite->tdata[vsite->nthreads].ilist,
 655                                 pbc_null);
 656     }
 657 }
 658
 659 static void spread_vsite2(t_iatom ia[],real a,
 660                             rvec x[],rvec f[],rvec fshift[],
 661                             t_pbc *pbc,t_graph *g)
 662 {
 663   rvec    fi,fj,dx;
 664   t_iatom av,ai,aj;
 665   ivec    di;
 666   real    b;
 667   int     siv,sij;
 668
 669   av = ia[1];
 670   ai = ia[2];
 671   aj = ia[3];
 672
 673   svmul(1-a,f[av],fi);
 674   svmul(  a,f[av],fj);
 675   /* 7 flop */
 676
 677   rvec_inc(f[ai],fi);
 678   rvec_inc(f[aj],fj);
 679   /* 6 Flops */
 680
 681   if (g) {
 682     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
 683     siv = IVEC2IS(di);
 684     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 685     sij = IVEC2IS(di);
 686   } else if (pbc) {
 687     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 688     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 689   } else {
 690     siv = CENTRAL;
 691     sij = CENTRAL;
 692   }
 693
 694   if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
 695     rvec_inc(fshift[siv],f[av]);
 696     rvec_dec(fshift[CENTRAL],fi);
 697     rvec_dec(fshift[sij],fj);
 698   }
 699
 700   /* TOTAL: 13 flops */
 701 }
 702
 703 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
 704                            gmx_mtop_t *mtop,rvec x[])
 705 {
 706   int as,mb,mol;
 707   gmx_molblock_t *molb;
 708   gmx_moltype_t  *molt;
 709
 710   as = 0;
 711   for(mb=0; mb<mtop->nmolblock; mb++) {
 712     molb = &mtop->molblock[mb];
 713     molt = &mtop->moltype[molb->type];
 714     for(mol=0; mol<molb->nmol; mol++) {
 715       construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
 716                        mtop->ffparams.iparams,molt->ilist,
 717                        epbcNONE,TRUE,NULL,NULL,NULL);
 718       as += molt->atoms.nr;
 719     }
 720   }
 721 }
 722
 723 static void spread_vsite3(t_iatom ia[],real a,real b,
 724                             rvec x[],rvec f[],rvec fshift[],
 725                             t_pbc *pbc,t_graph *g)
 726 {
 727   rvec    fi,fj,fk,dx;
 728   atom_id av,ai,aj,ak;
 729   ivec    di;
 730   int     siv,sij,sik;
 731
 732   av = ia[1];
 733   ai = ia[2];
 734   aj = ia[3];
 735   ak = ia[4];
 736
 737   svmul(1-a-b,f[av],fi);
 738   svmul(    a,f[av],fj);
 739   svmul(    b,f[av],fk);
 740   /* 11 flops */
 741
 742   rvec_inc(f[ai],fi);
 743   rvec_inc(f[aj],fj);
 744   rvec_inc(f[ak],fk);
 745   /* 9 Flops */
 746
 747   if (g) {
 748     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
 749     siv = IVEC2IS(di);
 750     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 751     sij = IVEC2IS(di);
 752     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
 753     sik = IVEC2IS(di);
 754   } else if (pbc) {
 755     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 756     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 757     sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
 758   } else {
 759     siv = CENTRAL;
 760     sij = CENTRAL;
 761     sik = CENTRAL;
 762   }
 763
 764   if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
 765     rvec_inc(fshift[siv],f[av]);
 766     rvec_dec(fshift[CENTRAL],fi);
 767     rvec_dec(fshift[sij],fj);
 768     rvec_dec(fshift[sik],fk);
 769   }
 770
 771   /* TOTAL: 20 flops */
 772 }
 773
 774 static void spread_vsite3FD(t_iatom ia[],real a,real b,
 775                             rvec x[],rvec f[],rvec fshift[],
 776                             gmx_bool VirCorr,matrix dxdf,
 777                             t_pbc *pbc,t_graph *g)
 778 {
 779   real fx,fy,fz,c,invl,fproj,a1;
 780   rvec xvi,xij,xjk,xix,fv,temp;
 781   t_iatom av,ai,aj,ak;
 782   int     svi,sji,skj,d;
 783   ivec    di;
 784
 785   av = ia[1];
 786   ai = ia[2];
 787   aj = ia[3];
 788   ak = ia[4];
 789   copy_rvec(f[av],fv);
 790
 791   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 792   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 793   /* 6 flops */
 794
 795   /* xix goes from i to point x on the line jk */
 796   xix[XX]=xij[XX]+a*xjk[XX];
 797   xix[YY]=xij[YY]+a*xjk[YY];
 798   xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
 799   /* 6 flops */
 800
 801   invl=gmx_invsqrt(iprod(xix,xix));
 802   c=b*invl;
 803   /* 4 + ?10? flops */
 804
 805   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 806
 807   temp[XX]=c*(fv[XX]-fproj*xix[XX]);
 808   temp[YY]=c*(fv[YY]-fproj*xix[YY]);
 809   temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
 810   /* 16 */
 811
 812   /* c is already calculated in constr_vsite3FD
 813      storing c somewhere will save 26 flops!     */
 814
 815   a1=1-a;
 816   f[ai][XX] += fv[XX] - temp[XX];
 817   f[ai][YY] += fv[YY] - temp[YY];
 818   f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 819   f[aj][XX] += a1*temp[XX];
 820   f[aj][YY] += a1*temp[YY];
 821   f[aj][ZZ] += a1*temp[ZZ];
 822   f[ak][XX] += a*temp[XX];
 823   f[ak][YY] += a*temp[YY];
 824   f[ak][ZZ] += a*temp[ZZ];
 825   /* 19 Flops */
 826
 827   if (g) {
 828     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 829     svi = IVEC2IS(di);
 830     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 831     sji = IVEC2IS(di);
 832     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 833     skj = IVEC2IS(di);
 834   } else if (pbc) {
 835     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 836   } else {
 837     svi = CENTRAL;
 838   }
 839
 840   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 841     rvec_dec(fshift[svi],fv);
 842     fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 843     fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 844     fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 845     fshift[    sji][XX] += temp[XX];
 846     fshift[    sji][YY] += temp[YY];
 847     fshift[    sji][ZZ] += temp[ZZ];
 848     fshift[    skj][XX] += a*temp[XX];
 849     fshift[    skj][YY] += a*temp[YY];
 850     fshift[    skj][ZZ] += a*temp[ZZ];
 851   }
 852
 853     if (VirCorr)
 854     {
 855         /* When VirCorr=TRUE, the virial for the current forces is not
 856          * calculated from the redistributed forces. This means that
 857          * the effect of non-linear virtual site constructions on the virial
 858          * needs to be added separately. This contribution can be calculated
 859          * in many ways, but the simplest and cheapest way is to use
 860          * the first constructing atom ai as a reference position in space:
 861          * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
 862          */
 863         rvec xiv;
 864         int  i,j;
 865
 866         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 867
 868         for(i=0; i<DIM; i++)
 869         {
 870             for(j=0; j<DIM; j++)
 871             {
 872                 /* As xix is a linear combination of j and k, use that here */
 873                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 874             }
 875         }
 876     }
 877
 878   /* TOTAL: 61 flops */
 879 }
 880
 881 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
 882                              rvec x[],rvec f[],rvec fshift[],
 883                              gmx_bool VirCorr,matrix dxdf,
 884                              t_pbc *pbc,t_graph *g)
 885 {
 886   rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
 887   real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
 888   t_iatom av,ai,aj,ak;
 889   int     svi,sji,skj,d;
 890   ivec    di;
 891
 892   av = ia[1];
 893   ai = ia[2];
 894   aj = ia[3];
 895   ak = ia[4];
 896   copy_rvec(f[ia[1]],fv);
 897
 898   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 899   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 900   /* 6 flops */
 901
 902   invdij = gmx_invsqrt(iprod(xij,xij));
 903   invdij2 = invdij * invdij;
 904   c1 = iprod(xij,xjk) * invdij2;
 905   xperp[XX] = xjk[XX] - c1*xij[XX];
 906   xperp[YY] = xjk[YY] - c1*xij[YY];
 907   xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 908   /* xperp in plane ijk, perp. to ij */
 909   invdp = gmx_invsqrt(iprod(xperp,xperp));
 910   a1 = a*invdij;
 911   b1 = b*invdp;
 912   /* 45 flops */
 913
 914   /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 915      storing them somewhere will save 45 flops!     */
 916
 917   fproj=iprod(xij  ,fv)*invdij2;
 918   svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
 919   svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
 920   svmul(b1*fproj,                   xperp,f3);
 921   /* 23 flops */
 922
 923   rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
 924   rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
 925   for (d=0; (d<DIM); d++) {
 926     f1[d]*=a1;
 927     f2[d]*=b1;
 928   }
 929   /* 12 flops */
 930
 931   c2=1+c1;
 932   f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 933   f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 934   f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 935   f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 936   f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 937   f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 938   f[ak][XX] +=                      f2[XX];
 939   f[ak][YY] +=                      f2[YY];
 940   f[ak][ZZ] +=                      f2[ZZ];
 941   /* 30 Flops */
 942
 943   if (g) {
 944     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 945     svi = IVEC2IS(di);
 946     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 947     sji = IVEC2IS(di);
 948     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 949     skj = IVEC2IS(di);
 950   } else if (pbc) {
 951     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 952   } else {
 953     svi = CENTRAL;
 954   }
 955
 956   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 957     rvec_dec(fshift[svi],fv);
 958     fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 959     fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 960     fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 961     fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 962     fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 963     fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 964     fshift[    skj][XX] +=                          f2[XX];
 965     fshift[    skj][YY] +=                          f2[YY];
 966     fshift[    skj][ZZ] +=                          f2[ZZ];
 967   }
 968
 969     if (VirCorr)
 970     {
 971         rvec xiv;
 972         int  i,j;
 973
 974         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 975
 976         for(i=0; i<DIM; i++)
 977         {
 978             for(j=0; j<DIM; j++)
 979             {
 980                 /* Note that xik=xij+xjk, so we have to add xij*f2 */
 981                 dxdf[i][j] +=
 982                     - xiv[i]*fv[j]
 983                     + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
 984                     + xjk[i]*f2[j];
 985             }
 986         }
 987     }
 988
 989   /* TOTAL: 113 flops */
 990 }
 991
 992 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
 993                              rvec x[],rvec f[],rvec fshift[],
 994                              gmx_bool VirCorr,matrix dxdf,
 995                              t_pbc *pbc,t_graph *g)
 996 {
 997   rvec    xvi,xij,xik,fv,fj,fk;
 998   real    cfx,cfy,cfz;
 999   atom_id av,ai,aj,ak;
1000   ivec    di;
1001   int     svi,sji,ski;
1002
1003   av = ia[1];
1004   ai = ia[2];
1005   aj = ia[3];
1006   ak = ia[4];
1007
1008   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1009   ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1010   /* 6 Flops */
1011
1012   copy_rvec(f[av],fv);
1013
1014   cfx = c*fv[XX];
1015   cfy = c*fv[YY];
1016   cfz = c*fv[ZZ];
1017   /* 3 Flops */
1018
1019   fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
1020   fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
1021   fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
1022
1023   fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
1024   fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
1025   fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
1026   /* 30 Flops */
1027
1028   f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
1029   f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
1030   f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1031   rvec_inc(f[aj],fj);
1032   rvec_inc(f[ak],fk);
1033   /* 15 Flops */
1034
1035   if (g) {
1036     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1037     svi = IVEC2IS(di);
1038     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1039     sji = IVEC2IS(di);
1040     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1041     ski = IVEC2IS(di);
1042   } else if (pbc) {
1043     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1044   } else {
1045     svi = CENTRAL;
1046   }
1047
1048   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
1049     rvec_dec(fshift[svi],fv);
1050     fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
1051     fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
1052     fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1053     rvec_inc(fshift[sji],fj);
1054     rvec_inc(fshift[ski],fk);
1055   }
1056
1057     if (VirCorr)
1058     {
1059         rvec xiv;
1060         int  i,j;
1061
1062         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1063
1064         for(i=0; i<DIM; i++)
1065         {
1066             for(j=0; j<DIM; j++)
1067             {
1068                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
1069             }
1070         }
1071     }
1072
1073   /* TOTAL: 54 flops */
1074 }
1075
1076 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1077                             rvec x[],rvec f[],rvec fshift[],
1078                             gmx_bool VirCorr,matrix dxdf,
1079                             t_pbc *pbc,t_graph *g)
1080 {
1081   real    d,invl,fproj,a1;
1082   rvec    xvi,xij,xjk,xjl,xix,fv,temp;
1083   atom_id av,ai,aj,ak,al;
1084   ivec    di;
1085   int     svi,sji,skj,slj,m;
1086
1087   av = ia[1];
1088   ai = ia[2];
1089   aj = ia[3];
1090   ak = ia[4];
1091   al = ia[5];
1092
1093   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1094   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1095   slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1096   /* 9 flops */
1097
1098   /* xix goes from i to point x on the plane jkl */
1099   for(m=0; m<DIM; m++)
1100     xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1101   /* 12 flops */
1102
1103   invl=gmx_invsqrt(iprod(xix,xix));
1104   d=c*invl;
1105   /* 4 + ?10? flops */
1106
1107   copy_rvec(f[av],fv);
1108
1109   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1110
1111   for(m=0; m<DIM; m++)
1112     temp[m] = d*(fv[m] - fproj*xix[m]);
1113   /* 16 */
1114
1115   /* c is already calculated in constr_vsite3FD
1116      storing c somewhere will save 35 flops!     */
1117
1118   a1 = 1 - a - b;
1119   for(m=0; m<DIM; m++) {
1120     f[ai][m] += fv[m] - temp[m];
1121     f[aj][m] += a1*temp[m];
1122     f[ak][m] += a*temp[m];
1123     f[al][m] += b*temp[m];
1124   }
1125   /* 26 Flops */
1126
1127   if (g) {
1128     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1129     svi = IVEC2IS(di);
1130     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1131     sji = IVEC2IS(di);
1132     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1133     skj = IVEC2IS(di);
1134     ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1135     slj = IVEC2IS(di);
1136   } else if (pbc) {
1137     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1138   } else {
1139     svi = CENTRAL;
1140   }
1141
1142   if (fshift &&
1143       (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1144     rvec_dec(fshift[svi],fv);
1145     for(m=0; m<DIM; m++) {
1146       fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1147       fshift[    sji][m] += temp[m];
1148       fshift[    skj][m] += a*temp[m];
1149       fshift[    slj][m] += b*temp[m];
1150     }
1151   }
1152
1153     if (VirCorr)
1154     {
1155         rvec xiv;
1156         int  i,j;
1157
1158         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1159
1160         for(i=0; i<DIM; i++)
1161         {
1162             for(j=0; j<DIM; j++)
1163             {
1164                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1165             }
1166         }
1167     }
1168
1169   /* TOTAL: 77 flops */
1170 }
1171
1172
1173 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1174                              rvec x[],rvec f[],rvec fshift[],
1175                              gmx_bool VirCorr,matrix dxdf,
1176                              t_pbc *pbc,t_graph *g)
1177 {
1178     rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1179     rvec fv,fj,fk,fl;
1180     real invrm,denom;
1181     real cfx,cfy,cfz;
1182     ivec di;
1183     int  av,ai,aj,ak,al;
1184     int  svi,sij,sik,sil;
1185
1186     /* DEBUG: check atom indices */
1187     av = ia[1];
1188     ai = ia[2];
1189     aj = ia[3];
1190     ak = ia[4];
1191     al = ia[5];
1192
1193     copy_rvec(f[av],fv);
1194
1195     sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1196     sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1197     sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1198     /* 9 flops */
1199
1200     ra[XX] = a*xik[XX];
1201     ra[YY] = a*xik[YY];
1202     ra[ZZ] = a*xik[ZZ];
1203
1204     rb[XX] = b*xil[XX];
1205     rb[YY] = b*xil[YY];
1206     rb[ZZ] = b*xil[ZZ];
1207
1208     /* 6 flops */
1209
1210     rvec_sub(ra,xij,rja);
1211     rvec_sub(rb,xij,rjb);
1212     rvec_sub(rb,ra,rab);
1213     /* 9 flops */
1214
1215     cprod(rja,rjb,rm);
1216     /* 9 flops */
1217
1218     invrm=gmx_invsqrt(norm2(rm));
1219     denom=invrm*invrm;
1220     /* 5+5+2 flops */
1221
1222     cfx = c*invrm*fv[XX];
1223     cfy = c*invrm*fv[YY];
1224     cfz = c*invrm*fv[ZZ];
1225     /* 6 Flops */
1226
1227     cprod(rm,rab,rt);
1228     /* 9 flops */
1229
1230     rt[XX] *= denom;
1231     rt[YY] *= denom;
1232     rt[ZZ] *= denom;
1233     /* 3flops */
1234
1235     fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1236     fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1237     fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
1238     /* 30 flops */
1239
1240     cprod(rjb,rm,rt);
1241     /* 9 flops */
1242
1243     rt[XX] *= denom*a;
1244     rt[YY] *= denom*a;
1245     rt[ZZ] *= denom*a;
1246     /* 3flops */
1247
1248     fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1249     fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1250     fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1251     /* 36 flops */
1252
1253     cprod(rm,rja,rt);
1254     /* 9 flops */
1255
1256     rt[XX] *= denom*b;
1257     rt[YY] *= denom*b;
1258     rt[ZZ] *= denom*b;
1259     /* 3flops */
1260
1261     fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1262     fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1263     fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1264     /* 36 flops */
1265
1266     f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1267     f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1268     f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1269     rvec_inc(f[aj],fj);
1270     rvec_inc(f[ak],fk);
1271     rvec_inc(f[al],fl);
1272     /* 21 flops */
1273
1274     if (g) {
1275         ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1276         svi = IVEC2IS(di);
1277         ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1278         sij = IVEC2IS(di);
1279         ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1280         sik = IVEC2IS(di);
1281         ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1282         sil = IVEC2IS(di);
1283     } else if (pbc) {
1284         svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1285     } else {
1286         svi = CENTRAL;
1287     }
1288
1289     if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1290         rvec_dec(fshift[svi],fv);
1291         fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1292         fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1293         fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1294         rvec_inc(fshift[sij],fj);
1295         rvec_inc(fshift[sik],fk);
1296         rvec_inc(fshift[sil],fl);
1297     }
1298
1299     if (VirCorr)
1300     {
1301         rvec xiv;
1302         int  i,j;
1303
1304         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1305
1306         for(i=0; i<DIM; i++)
1307         {
1308             for(j=0; j<DIM; j++)
1309             {
1310                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1311             }
1312         }
1313     }
1314
1315     /* Total: 207 flops (Yuck!) */
1316 }
1317
1318
1319 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1320                          rvec x[],rvec f[],rvec fshift[],
1321                          t_pbc *pbc,t_graph *g)
1322 {
1323   rvec xv,dx,fi;
1324   int  n3,av,i,ai;
1325   real a;
1326   ivec di;
1327   int  siv;
1328
1329   n3 = 3*ip[ia[0]].vsiten.n;
1330   av = ia[1];
1331   copy_rvec(x[av],xv);
1332
1333   for(i=0; i<n3; i+=3) {
1334     ai = ia[i+2];
1335     if (g) {
1336       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1337       siv = IVEC2IS(di);
1338     } else if (pbc) {
1339       siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1340     } else {
1341       siv = CENTRAL;
1342     }
1343     a = ip[ia[i]].vsiten.a;
1344     svmul(a,f[av],fi);
1345     rvec_inc(f[ai],fi);
1346     if (fshift && siv != CENTRAL) {
1347       rvec_inc(fshift[siv],fi);
1348       rvec_dec(fshift[CENTRAL],fi);
1349     }
1350     /* 6 Flops */
1351   }
1352
1353   return n3;
1354 }
1355
1356
1357 static int vsite_count(const t_ilist *ilist,int ftype)
1358 {
1359     if (ftype == F_VSITEN)
1360     {
1361         return ilist[ftype].nr/3;
1362     }
1363     else
1364     {
1365         return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
1366     }
1367 }
1368
1369 static void spread_vsite_f_thread(gmx_vsite_t *vsite,
1370                                   rvec x[],rvec f[],rvec *fshift,
1371                                   gmx_bool VirCorr,matrix dxdf,
1372                                   t_iparams ip[],t_ilist ilist[],
1373                                   t_graph *g,t_pbc *pbc_null)
1374 {
1375     gmx_bool  bPBCAll;
1376     real      a1,b1,c1;
1377     int       i,inc,m,nra,nr,tp,ftype;
1378     t_iatom   *ia;
1379     t_pbc     *pbc_null2;
1380     int       *vsite_pbc;
1381
1382     if (VirCorr)
1383     {
1384         clear_mat(dxdf);
1385     }
1386
1387     bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
1388
1389     /* this loop goes backwards to be able to build *
1390      * higher type vsites from lower types         */
1391     pbc_null2 = NULL;
1392     vsite_pbc = NULL;
1393     for(ftype=F_NRE-1; (ftype>=0); ftype--)
1394     {
1395         if ((interaction_function[ftype].flags & IF_VSITE) &&
1396             ilist[ftype].nr > 0)
1397         {
1398             nra    = interaction_function[ftype].nratoms;
1399             inc    = 1 + nra;
1400             nr     = ilist[ftype].nr;
1401             ia     = ilist[ftype].iatoms;
1402
1403             if (bPBCAll)
1404             {
1405                 pbc_null2 = pbc_null;
1406             }
1407             else if (pbc_null != NULL)
1408             {
1409                 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1410             }
1411
1412             for(i=0; i<nr; )
1413             {
1414                 if (vsite_pbc != NULL)
1415                 {
1416                     if (vsite_pbc[i/(1+nra)] > -2)
1417                     {
1418                         pbc_null2 = pbc_null;
1419                     }
1420                     else
1421                     {
1422                         pbc_null2 = NULL;
1423                     }
1424                 }
1425
1426                 tp   = ia[0];
1427
1428                 /* Constants for constructing */
1429                 a1   = ip[tp].vsite.a;
1430                 /* Construct the vsite depending on type */
1431                 switch (ftype)
1432                 {
1433                 case F_VSITE2:
1434                     spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1435                     break;
1436                 case F_VSITE3:
1437                     b1 = ip[tp].vsite.b;
1438                     spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1439                     break;
1440                 case F_VSITE3FD:
1441                     b1 = ip[tp].vsite.b;
1442                     spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1443                     break;
1444                 case F_VSITE3FAD:
1445                     b1 = ip[tp].vsite.b;
1446                     spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1447                     break;
1448                 case F_VSITE3OUT:
1449                     b1 = ip[tp].vsite.b;
1450                     c1 = ip[tp].vsite.c;
1451                     spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1452                     break;
1453                 case F_VSITE4FD:
1454                     b1 = ip[tp].vsite.b;
1455                     c1 = ip[tp].vsite.c;
1456                     spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1457                     break;
1458                 case F_VSITE4FDN:
1459                     b1 = ip[tp].vsite.b;
1460                     c1 = ip[tp].vsite.c;
1461                     spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1462                     break;
1463                 case F_VSITEN:
1464                     inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1465                     break;
1466                 default:
1467                     gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1468                               ftype,__FILE__,__LINE__);
1469                 }
1470                 clear_rvec(f[ia[1]]);
1471
1472                 /* Increment loop variables */
1473                 i  += inc;
1474                 ia += inc;
1475             }
1476         }
1477     }
1478 }
1479
1480 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1481                     rvec x[],rvec f[],rvec *fshift,
1482                     gmx_bool VirCorr,matrix vir,
1483                     t_nrnb *nrnb,t_idef *idef,
1484                     int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1485                     t_commrec *cr)
1486 {
1487     t_pbc pbc,*pbc_null;
1488     int   th;
1489
1490     /* We only need to do pbc when we have inter-cg vsites */
1491     if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
1492     {
1493         /* This is wasting some CPU time as we now do this multiple times
1494          * per MD step. But how often do we have vsites with full pbc?
1495          */
1496         pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1497     }
1498     else
1499     {
1500         pbc_null = NULL;
1501     }
1502
1503     if (DOMAINDECOMP(cr))
1504     {
1505         dd_clear_f_vsites(cr->dd,f);
1506     }
1507     else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1508     {
1509         pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1510     }
1511
1512     if (vsite->nthreads == 1)
1513     {
1514         spread_vsite_f_thread(vsite,
1515                               x,f,fshift,
1516                               VirCorr,vsite->tdata[0].dxdf,
1517                               idef->iparams,idef->il,
1518                               g,pbc_null);
1519     }
1520     else
1521     {
1522         /* First spread the vsites that might depend on other vsites */
1523         spread_vsite_f_thread(vsite,
1524                               x,f,fshift,
1525                               VirCorr,vsite->tdata[vsite->nthreads].dxdf,
1526                               idef->iparams,
1527                               vsite->tdata[vsite->nthreads].ilist,
1528                               g,pbc_null);
1529
1530 #pragma omp parallel num_threads(vsite->nthreads)
1531         {
1532             int  thread;
1533             rvec *fshift_t;
1534
1535             thread = gmx_omp_get_thread_num();
1536
1537             if (thread == 0 || fshift == NULL)
1538             {
1539                 fshift_t = fshift;
1540             }
1541             else
1542             {
1543                 fshift_t = vsite->tdata[thread].fshift;
1544             }
1545
1546             spread_vsite_f_thread(vsite,
1547                                   x,f,fshift_t,
1548                                   VirCorr,vsite->tdata[thread].dxdf,
1549                                   idef->iparams,
1550                                   vsite->tdata[thread].ilist,
1551                                   g,pbc_null);
1552         }
1553
1554         if (fshift != NULL)
1555         {
1556             int i;
1557
1558             for(th=1; th<vsite->nthreads; th++)
1559             {
1560                 for(i=0; i<SHIFTS; i++)
1561                 {
1562                     rvec_inc(fshift[i],vsite->tdata[th].fshift[i]);
1563                 }
1564             }
1565         }
1566     }
1567
1568     if (VirCorr)
1569     {
1570         int i,j;
1571
1572         for(th=0; th<(vsite->nthreads==1 ? 1 : vsite->nthreads+1); th++)
1573         {
1574             for(i=0; i<DIM; i++)
1575             {
1576                 for(j=0; j<DIM; j++)
1577                 {
1578                     vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
1579                 }
1580             }
1581         }
1582     }
1583
1584     if (DOMAINDECOMP(cr))
1585     {
1586         dd_move_f_vsites(cr->dd,f,fshift);
1587     }
1588     else if (vsite->bPDvsitecomm)
1589     {
1590         /* We only move forces here, and they are independent of shifts */
1591         move_construct_f(vsite->vsitecomm,f,cr);
1592     }
1593
1594     inc_nrnb(nrnb,eNR_VSITE2,   vsite_count(idef->il,F_VSITE2));
1595     inc_nrnb(nrnb,eNR_VSITE3,   vsite_count(idef->il,F_VSITE3));
1596     inc_nrnb(nrnb,eNR_VSITE3FD, vsite_count(idef->il,F_VSITE3FD));
1597     inc_nrnb(nrnb,eNR_VSITE3FAD,vsite_count(idef->il,F_VSITE3FAD));
1598     inc_nrnb(nrnb,eNR_VSITE3OUT,vsite_count(idef->il,F_VSITE3OUT));
1599     inc_nrnb(nrnb,eNR_VSITE4FD, vsite_count(idef->il,F_VSITE4FD));
1600     inc_nrnb(nrnb,eNR_VSITE4FDN,vsite_count(idef->il,F_VSITE4FDN));
1601     inc_nrnb(nrnb,eNR_VSITEN,   vsite_count(idef->il,F_VSITEN));
1602 }
1603
1604 static int *atom2cg(t_block *cgs)
1605 {
1606   int *a2cg,cg,i;
1607
1608   snew(a2cg,cgs->index[cgs->nr]);
1609   for(cg=0; cg<cgs->nr; cg++) {
1610     for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1611       a2cg[i] = cg;
1612   }
1613
1614   return a2cg;
1615 }
1616
1617 static int count_intercg_vsite(gmx_mtop_t *mtop,
1618                                gmx_bool *bHaveChargeGroups)
1619 {
1620     int  mb,mt,ftype,nral,i,cg,a;
1621     gmx_molblock_t *molb;
1622     gmx_moltype_t *molt;
1623     int  *a2cg;
1624     t_ilist *il;
1625     t_iatom *ia;
1626     int  n_intercg_vsite;
1627
1628     *bHaveChargeGroups = FALSE;
1629
1630     n_intercg_vsite = 0;
1631     for(mb=0; mb<mtop->nmolblock; mb++)
1632     {
1633         molb = &mtop->molblock[mb];
1634         molt = &mtop->moltype[molb->type];
1635
1636         if (molt->cgs.nr < molt->atoms.nr)
1637         {
1638             *bHaveChargeGroups = TRUE;
1639         }
1640
1641         a2cg = atom2cg(&molt->cgs);
1642         for(ftype=0; ftype<F_NRE; ftype++)
1643         {
1644             if (interaction_function[ftype].flags & IF_VSITE)
1645             {
1646                 nral = NRAL(ftype);
1647                 il = &molt->ilist[ftype];
1648                 ia  = il->iatoms;
1649                 for(i=0; i<il->nr; i+=1+nral)
1650                 {
1651                     cg = a2cg[ia[1+i]];
1652                     for(a=1; a<nral; a++)
1653                     {
1654                         if (a2cg[ia[1+a]] != cg) {
1655                             n_intercg_vsite += molb->nmol;
1656                             break;
1657                         }
1658                     }
1659                 }
1660             }
1661         }
1662         sfree(a2cg);
1663     }
1664
1665     return n_intercg_vsite;
1666 }
1667
1668 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1669                            t_atom *atom,t_mdatoms *md,
1670                            t_block *cgs,int *a2cg)
1671 {
1672   int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1673   t_ilist *il;
1674   t_iatom *ia;
1675   int  **vsite_pbc,*vsite_pbc_f;
1676   char *pbc_set;
1677   gmx_bool bViteOnlyCG_and_FirstAtom;
1678
1679   /* Make an array that tells if the pbc of an atom is set */
1680   snew(pbc_set,cgs->index[cgs->nr]);
1681   /* PBC is set for all non vsites */
1682   for(a=0; a<cgs->index[cgs->nr]; a++) {
1683     if ((atom && atom[a].ptype != eptVSite) ||
1684         (md   && md->ptype[a]  != eptVSite)) {
1685       pbc_set[a] = 1;
1686     }
1687   }
1688
1689   snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1690
1691   for(ftype=0; ftype<F_NRE; ftype++) {
1692     if (interaction_function[ftype].flags & IF_VSITE) {
1693       nral = NRAL(ftype);
1694       il = &ilist[ftype];
1695       ia  = il->iatoms;
1696
1697       snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1698       vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1699
1700       i = 0;
1701       while (i < il->nr) {
1702         vsi = i/(1+nral);
1703         vsite = ia[i+1];
1704         cg_v = a2cg[vsite];
1705         /* A value of -2 signals that this vsite and its contructing
1706          * atoms are all within the same cg, so no pbc is required.
1707          */
1708         vsite_pbc_f[vsi] = -2;
1709         /* Check if constructing atoms are outside the vsite's cg */
1710         nc3 = 0;
1711         if (ftype == F_VSITEN) {
1712           nc3 = 3*iparams[ia[i]].vsiten.n;
1713           for(j=0; j<nc3; j+=3) {
1714             if (a2cg[ia[i+j+2]] != cg_v)
1715               vsite_pbc_f[vsi] = -1;
1716           }
1717         } else {
1718           for(a=1; a<nral; a++) {
1719             if (a2cg[ia[i+1+a]] != cg_v)
1720               vsite_pbc_f[vsi] = -1;
1721           }
1722         }
1723         if (vsite_pbc_f[vsi] == -1) {
1724           /* Check if this is the first processed atom of a vsite only cg */
1725           bViteOnlyCG_and_FirstAtom = TRUE;
1726           for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1727             /* Non-vsites already have pbc set, so simply check for pbc_set */
1728             if (pbc_set[a]) {
1729               bViteOnlyCG_and_FirstAtom = FALSE;
1730               break;
1731             }
1732           }
1733           if (bViteOnlyCG_and_FirstAtom) {
1734             /* First processed atom of a vsite only charge group.
1735              * The pbc of the input coordinates to construct_vsites
1736              * should be preserved.
1737              */
1738             vsite_pbc_f[vsi] = vsite;
1739           } else if (cg_v != a2cg[ia[1+i+1]]) {
1740             /* This vsite has a different charge group index
1741              * than it's first constructing atom
1742              * and the charge group has more than one atom,
1743              * search for the first normal particle
1744              * or vsite that already had its pbc defined.
1745              * If nothing is found, use full pbc for this vsite.
1746              */
1747             for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1748               if (a != vsite && pbc_set[a]) {
1749                 vsite_pbc_f[vsi] = a;
1750                 if (gmx_debug_at)
1751                   fprintf(debug,"vsite %d match pbc with atom %d\n",
1752                           vsite+1,a+1);
1753                 break;
1754               }
1755             }
1756             if (gmx_debug_at)
1757               fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
1758                       vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1759                       vsite_pbc_f[vsi]+1);
1760           }
1761         }
1762         if (ftype == F_VSITEN) {
1763           /* The other entries in vsite_pbc_f are not used for center vsites */
1764           i += nc3;
1765         } else {
1766           i += 1+nral;
1767         }
1768
1769         /* This vsite now has its pbc defined */
1770         pbc_set[vsite] = 1;
1771       }
1772     }
1773   }
1774
1775   sfree(pbc_set);
1776
1777   return vsite_pbc;
1778 }
1779
1780
1781 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr,
1782                         gmx_bool bSerial_NoPBC)
1783 {
1784     int nvsite,i;
1785     int *a2cg,cg;
1786     gmx_vsite_t *vsite;
1787     int mt;
1788     gmx_moltype_t *molt;
1789     int nthreads;
1790
1791     /* check if there are vsites */
1792     nvsite = 0;
1793     for(i=0; i<F_NRE; i++)
1794     {
1795         if (interaction_function[i].flags & IF_VSITE)
1796         {
1797             nvsite += gmx_mtop_ftype_count(mtop,i);
1798         }
1799     }
1800
1801     if (nvsite == 0)
1802     {
1803         return NULL;
1804     }
1805
1806     snew(vsite,1);
1807
1808     vsite->n_intercg_vsite = count_intercg_vsite(mtop,
1809                                                  &vsite->bHaveChargeGroups);
1810
1811     /* If we don't have charge groups, the vsite follows its own pbc */
1812     if (!bSerial_NoPBC &&
1813         vsite->bHaveChargeGroups &&
1814         vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
1815     {
1816         vsite->nvsite_pbc_molt = mtop->nmoltype;
1817         snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1818         for(mt=0; mt<mtop->nmoltype; mt++)
1819         {
1820             molt = &mtop->moltype[mt];
1821             /* Make an atom to charge group index */
1822             a2cg = atom2cg(&molt->cgs);
1823             vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1824                                                       molt->ilist,
1825                                                       molt->atoms.atom,NULL,
1826                                                       &molt->cgs,a2cg);
1827             sfree(a2cg);
1828         }
1829
1830         snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1831         snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
1832     }
1833
1834     if (bSerial_NoPBC)
1835     {
1836         vsite->nthreads = 1;
1837     }
1838     else
1839     {
1840         vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
1841     }
1842     if (!bSerial_NoPBC)
1843     {
1844         /* We need one extra thread data structure for the overlap vsites */
1845         snew(vsite->tdata,vsite->nthreads+1);
1846     }
1847
1848     vsite->th_ind        = NULL;
1849     vsite->th_ind_nalloc = 0;
1850
1851     return vsite;
1852 }
1853
1854 static void prepare_vsite_thread(const t_ilist *ilist,
1855                                  gmx_vsite_thread_t *vsite_th)
1856 {
1857     int ftype;
1858
1859     for(ftype=0; ftype<F_NRE; ftype++)
1860     {
1861         if (interaction_function[ftype].flags & IF_VSITE)
1862         {
1863             if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
1864             {
1865                 vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
1866                 srenew(vsite_th->ilist[ftype].iatoms,vsite_th->ilist[ftype].nalloc);
1867             }
1868
1869             vsite_th->ilist[ftype].nr = 0;
1870         }
1871     }
1872 }
1873
1874 void split_vsites_over_threads(const t_ilist *ilist,
1875                                const t_mdatoms *mdatoms,
1876                                gmx_bool bLimitRange,
1877                                gmx_vsite_t *vsite)
1878 {
1879     int th;
1880     int vsite_atom_range,natperthread;
1881     int *th_ind;
1882     int ftype;
1883     t_iatom *iat;
1884     t_ilist *il_th;
1885     int nral1,inc,i,j;
1886
1887     if (vsite->nthreads == 1)
1888     {
1889         /* Nothing to do */
1890         return;
1891     }
1892
1893 #pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
1894     for(th=0; th<vsite->nthreads; th++)
1895     {
1896         prepare_vsite_thread(ilist,&vsite->tdata[th]);
1897     }
1898     /* Master threads does the (potential) overlap vsites */
1899     prepare_vsite_thread(ilist,&vsite->tdata[vsite->nthreads]);
1900
1901     /* The current way of distributing the vsites over threads in primitive.
1902      * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
1903      * without taking into account how the vsites are distributed.
1904      * Without domain decomposition we bLimitRange=TRUE and we at least
1905      * tighten the upper bound of the range (useful for common systems
1906      * such as a vsite-protein in 3-site water).
1907      */
1908     if (bLimitRange)
1909     {
1910         vsite_atom_range = -1;
1911         for(ftype=0; ftype<F_NRE; ftype++)
1912         {
1913             if ((interaction_function[ftype].flags & IF_VSITE) &&
1914                 ftype != F_VSITEN)
1915             {
1916                 nral1 = 1 + NRAL(ftype);
1917                 iat   = ilist[ftype].iatoms;
1918                 for(i=0; i<ilist[ftype].nr; i+=nral1)
1919                 {
1920                     for(j=i+1; j<i+nral1; j++)
1921                     {
1922                         vsite_atom_range = max(vsite_atom_range,iat[j]);
1923                     }
1924                 }
1925             }
1926         }
1927         vsite_atom_range++;
1928     }
1929     else
1930     {
1931         vsite_atom_range = mdatoms->homenr;
1932     }
1933     natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
1934
1935     if (debug)
1936     {
1937         fprintf(debug,"virtual site thread dist: natoms %d, range %d, natperthread %d\n",mdatoms->nr,vsite_atom_range,natperthread);
1938     }
1939
1940     /* To simplify the vsite assignment, we make an index which tells us
1941      * to which thread particles, both non-vsites and vsites, are assigned.
1942      */
1943     if (mdatoms->nr > vsite->th_ind_nalloc)
1944     {
1945         vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
1946         srenew(vsite->th_ind,vsite->th_ind_nalloc);
1947     }
1948     th_ind = vsite->th_ind;
1949     th = 0;
1950     for(i=0; i<mdatoms->nr; i++)
1951     {
1952         if (mdatoms->ptype[i] == eptVSite)
1953         {
1954             /* vsites are not assigned to a thread yet */
1955             th_ind[i] = -1;
1956         }
1957         else
1958         {
1959             /* assign non-vsite particles to thread th */
1960             th_ind[i] = th;
1961         }
1962         if (i == (th + 1)*natperthread && th < vsite->nthreads)
1963         {
1964             th++;
1965         }
1966     }
1967
1968     for(ftype=0; ftype<F_NRE; ftype++)
1969     {
1970         if ((interaction_function[ftype].flags & IF_VSITE) &&
1971             ftype != F_VSITEN)
1972         {
1973             nral1 = 1 + NRAL(ftype);
1974             inc   = nral1;
1975             iat   = ilist[ftype].iatoms;
1976             for(i=0; i<ilist[ftype].nr; )
1977             {
1978                 th = iat[1+i]/natperthread;
1979                 /* We would like to assign this vsite the thread th,
1980                  * but it might depend on atoms outside the atom range of th
1981                  * or on another vsite not assigned to thread th.
1982                  */
1983                 if (ftype != F_VSITEN)
1984                 {
1985                     for(j=i+2; j<i+nral1; j++)
1986                     {
1987                         if (th_ind[iat[j]] != th)
1988                         {
1989                             /* Some constructing atoms are not assigned to
1990                              * thread th, move this vsite to a separate batch.
1991                              */
1992                             th = vsite->nthreads;
1993                         }
1994                     }
1995                 }
1996                 else
1997                 {
1998                     inc = iat[i];
1999                     for(j=i+2; j<i+inc; j+=3)
2000                     {
2001                         if (th_ind[iat[j]] != th)
2002                         {
2003                             th = vsite->nthreads;
2004                         }
2005                     }
2006                 }
2007                 /* Copy this vsite to the thread data struct of thread th */
2008                 il_th = &vsite->tdata[th].ilist[ftype];
2009                 for(j=i; j<i+inc; j++)
2010                 {
2011                     il_th->iatoms[il_th->nr++] = iat[j];
2012                 }
2013                 /* Update this vsite's thread index entry */
2014                 th_ind[iat[1+i]] = th;
2015
2016                 i += inc;
2017             }
2018         }
2019     }
2020
2021     if (debug)
2022     {
2023         for(ftype=0; ftype<F_NRE; ftype++)
2024         {
2025             if ((interaction_function[ftype].flags & IF_VSITE) &&
2026             ilist[ftype].nr > 0)
2027             {
2028                 fprintf(debug,"%-20s thread dist:",
2029                         interaction_function[ftype].longname);
2030                 for(th=0; th<vsite->nthreads+1; th++)
2031                 {
2032                     fprintf(debug," %4d",vsite->tdata[th].ilist[ftype].nr);
2033                 }
2034                 fprintf(debug,"\n");
2035             }
2036         }
2037     }
2038 }
2039
2040 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
2041                    t_commrec *cr)
2042 {
2043     int *a2cg;
2044
2045     if (vsite->n_intercg_vsite > 0)
2046     {
2047         if (vsite->bHaveChargeGroups)
2048         {
2049             /* Make an atom to charge group index */
2050             a2cg = atom2cg(&top->cgs);
2051             vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
2052                                                  top->idef.il,NULL,md,
2053                                                  &top->cgs,a2cg);
2054             sfree(a2cg);
2055         }
2056
2057         if (PARTDECOMP(cr))
2058         {
2059             snew(vsite->vsitecomm,1);
2060             vsite->bPDvsitecomm =
2061                 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
2062         }
2063     }
2064
2065     if (vsite->nthreads > 1)
2066     {
2067         if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
2068         {
2069             gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
2070         }
2071
2072         split_vsites_over_threads(top->idef.il,md,!DOMAINDECOMP(cr),vsite);
2073     }
2074 }