src/gromacs/mdlib/vsite.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <stdio.h>
  41 #include "typedefs.h"
  42 #include "vsite.h"
  43 #include "macros.h"
  44 #include "smalloc.h"
  45 #include "nrnb.h"
  46 #include "vec.h"
  47 #include "mvdata.h"
  48 #include "network.h"
  49 #include "mshift.h"
  50 #include "pbc.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54 #include "gmx_omp_nthreads.h"
  55 #include "gmx_omp.h"
  56
  57 /* Routines to send/recieve coordinates and force
  58  * of constructing atoms.
  59  */
  60
  61 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
  62 {
  63         rvec *sendbuf;
  64         rvec *recvbuf;
  65         int i,ia;
  66
  67         sendbuf = vsitecomm->send_buf;
  68         recvbuf = vsitecomm->recv_buf;
  69
  70
  71                 /* Prepare pulse left by copying to send buffer */
  72                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
  73                 {
  74                         ia = vsitecomm->left_export_construct[i];
  75                         copy_rvec(x[ia],sendbuf[i]);
  76                 }
  77
  78                 /* Pulse coordinates left */
  79                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
  80
  81                 /* Copy from receive buffer to coordinate array */
  82                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
  83                 {
  84                         ia = vsitecomm->right_import_construct[i];
  85                         copy_rvec(recvbuf[i],x[ia]);
  86                 }
  87
  88                 /* Prepare pulse right by copying to send buffer */
  89                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
  90                 {
  91                         ia = vsitecomm->right_export_construct[i];
  92                         copy_rvec(x[ia],sendbuf[i]);
  93                 }
  94
  95                 /* Pulse coordinates right */
  96                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
  97
  98                 /* Copy from receive buffer to coordinate array */
  99                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 100                 {
 101                         ia = vsitecomm->left_import_construct[i];
 102                         copy_rvec(recvbuf[i],x[ia]);
 103                 }
 104 }
 105
 106
 107 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 108 {
 109         rvec *sendbuf;
 110         rvec *recvbuf;
 111         int i,ia;
 112
 113         sendbuf = vsitecomm->send_buf;
 114         recvbuf = vsitecomm->recv_buf;
 115
 116                 /* Prepare pulse right by copying to send buffer */
 117                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 118                 {
 119                         ia = vsitecomm->right_import_construct[i];
 120                         copy_rvec(f[ia],sendbuf[i]);
 121                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 122                 }
 123
 124                 /* Pulse forces right */
 125                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
 126
 127                 /* Copy from receive buffer to coordinate array */
 128                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
 129                 {
 130                         ia = vsitecomm->left_export_construct[i];
 131                         rvec_inc(f[ia],recvbuf[i]);
 132                 }
 133
 134                 /* Prepare pulse left by copying to send buffer */
 135                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 136                 {
 137                         ia = vsitecomm->left_import_construct[i];
 138                         copy_rvec(f[ia],sendbuf[i]);
 139                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 140                 }
 141
 142                 /* Pulse coordinates left */
 143                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
 144
 145                 /* Copy from receive buffer to coordinate array */
 146                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
 147                 {
 148                         ia = vsitecomm->right_export_construct[i];
 149                         rvec_inc(f[ia],recvbuf[i]);
 150                 }
 151
 152         /* All forces are now on the home processors */
 153 }
 154
 155
 156 static void
 157 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 158 {
 159         int i,ia;
 160
 161         for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 162         {
 163                 ia = vsitecomm->left_import_construct[i];
 164                 clear_rvec(f[ia]);
 165         }
 166         for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 167         {
 168                 ia = vsitecomm->right_import_construct[i];
 169                 clear_rvec(f[ia]);
 170         }
 171 }
 172
 173
 174
 175 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 176 {
 177   if (pbc) {
 178     return pbc_dx_aiuc(pbc,xi,xj,dx);
 179   }
 180   else {
 181     rvec_sub(xi,xj,dx);
 182     return CENTRAL;
 183   }
 184 }
 185
 186 /* Vsite construction routines */
 187
 188 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
 189 {
 190   real b;
 191   rvec dx;
 192
 193   b=1.0-a;
 194   /* 1 flop */
 195
 196   if (pbc) {
 197     pbc_dx_aiuc(pbc,xj,xi,dx);
 198     x[XX] = xi[XX] + a*dx[XX];
 199     x[YY] = xi[YY] + a*dx[YY];
 200     x[ZZ] = xi[ZZ] + a*dx[ZZ];
 201   } else {
 202     x[XX] = b*xi[XX] + a*xj[XX];
 203     x[YY] = b*xi[YY] + a*xj[YY];
 204     x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 205     /* 9 Flops */
 206   }
 207
 208   /* TOTAL: 10 flops */
 209 }
 210
 211 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 212                           t_pbc *pbc)
 213 {
 214   real c;
 215   rvec dxj,dxk;
 216
 217   c=1.0-a-b;
 218   /* 2 flops */
 219
 220   if (pbc) {
 221     pbc_dx_aiuc(pbc,xj,xi,dxj);
 222     pbc_dx_aiuc(pbc,xk,xi,dxk);
 223     x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 224     x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 225     x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 226   } else {
 227     x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 228     x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 229     x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 230   /* 15 Flops */
 231   }
 232
 233   /* TOTAL: 17 flops */
 234 }
 235
 236 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 237                             t_pbc *pbc)
 238 {
 239   rvec xij,xjk,temp;
 240   real c;
 241
 242   pbc_rvec_sub(pbc,xj,xi,xij);
 243   pbc_rvec_sub(pbc,xk,xj,xjk);
 244   /* 6 flops */
 245
 246   /* temp goes from i to a point on the line jk */
 247   temp[XX] = xij[XX] + a*xjk[XX];
 248   temp[YY] = xij[YY] + a*xjk[YY];
 249   temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 250   /* 6 flops */
 251
 252   c=b*gmx_invsqrt(iprod(temp,temp));
 253   /* 6 + 10 flops */
 254
 255   x[XX] = xi[XX] + c*temp[XX];
 256   x[YY] = xi[YY] + c*temp[YY];
 257   x[ZZ] = xi[ZZ] + c*temp[ZZ];
 258   /* 6 Flops */
 259
 260   /* TOTAL: 34 flops */
 261 }
 262
 263 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
 264 {
 265   rvec xij,xjk,xp;
 266   real a1,b1,c1,invdij;
 267
 268   pbc_rvec_sub(pbc,xj,xi,xij);
 269   pbc_rvec_sub(pbc,xk,xj,xjk);
 270   /* 6 flops */
 271
 272   invdij = gmx_invsqrt(iprod(xij,xij));
 273   c1 = invdij * invdij * iprod(xij,xjk);
 274   xp[XX] = xjk[XX] - c1*xij[XX];
 275   xp[YY] = xjk[YY] - c1*xij[YY];
 276   xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 277   a1 = a*invdij;
 278   b1 = b*gmx_invsqrt(iprod(xp,xp));
 279   /* 45 */
 280
 281   x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 282   x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 283   x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 284   /* 12 Flops */
 285
 286   /* TOTAL: 63 flops */
 287 }
 288
 289 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
 290                              real a,real b,real c,t_pbc *pbc)
 291 {
 292   rvec xij,xik,temp;
 293
 294   pbc_rvec_sub(pbc,xj,xi,xij);
 295   pbc_rvec_sub(pbc,xk,xi,xik);
 296   cprod(xij,xik,temp);
 297   /* 15 Flops */
 298
 299   x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 300   x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 301   x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 302   /* 18 Flops */
 303
 304   /* TOTAL: 33 flops */
 305 }
 306
 307 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 308                             real a,real b,real c,t_pbc *pbc)
 309 {
 310   rvec xij,xjk,xjl,temp;
 311   real d;
 312
 313   pbc_rvec_sub(pbc,xj,xi,xij);
 314   pbc_rvec_sub(pbc,xk,xj,xjk);
 315   pbc_rvec_sub(pbc,xl,xj,xjl);
 316   /* 9 flops */
 317
 318   /* temp goes from i to a point on the plane jkl */
 319   temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 320   temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 321   temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 322   /* 12 flops */
 323
 324   d=c*gmx_invsqrt(iprod(temp,temp));
 325   /* 6 + 10 flops */
 326
 327   x[XX] = xi[XX] + d*temp[XX];
 328   x[YY] = xi[YY] + d*temp[YY];
 329   x[ZZ] = xi[ZZ] + d*temp[ZZ];
 330   /* 6 Flops */
 331
 332   /* TOTAL: 43 flops */
 333 }
 334
 335
 336 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 337                              real a,real b,real c,t_pbc *pbc)
 338 {
 339     rvec xij,xik,xil,ra,rb,rja,rjb,rm;
 340     real d;
 341
 342     pbc_rvec_sub(pbc,xj,xi,xij);
 343     pbc_rvec_sub(pbc,xk,xi,xik);
 344     pbc_rvec_sub(pbc,xl,xi,xil);
 345     /* 9 flops */
 346
 347     ra[XX] = a*xik[XX];
 348     ra[YY] = a*xik[YY];
 349     ra[ZZ] = a*xik[ZZ];
 350
 351     rb[XX] = b*xil[XX];
 352     rb[YY] = b*xil[YY];
 353     rb[ZZ] = b*xil[ZZ];
 354
 355     /* 6 flops */
 356
 357     rvec_sub(ra,xij,rja);
 358     rvec_sub(rb,xij,rjb);
 359     /* 6 flops */
 360
 361     cprod(rja,rjb,rm);
 362     /* 9 flops */
 363
 364     d=c*gmx_invsqrt(norm2(rm));
 365     /* 5+5+1 flops */
 366
 367     x[XX] = xi[XX] + d*rm[XX];
 368     x[YY] = xi[YY] + d*rm[YY];
 369     x[ZZ] = xi[ZZ] + d*rm[ZZ];
 370     /* 6 Flops */
 371
 372     /* TOTAL: 47 flops */
 373 }
 374
 375
 376 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 377                          rvec *x, t_pbc *pbc)
 378 {
 379   rvec xs,x1,dx;
 380   dvec dsum;
 381   int  n3,av,ai,i;
 382   real a;
 383
 384   n3 = 3*ip[ia[0]].vsiten.n;
 385   av = ia[1];
 386   ai = ia[2];
 387   copy_rvec(x[ai],x1);
 388   clear_dvec(dsum);
 389   for(i=3; i<n3; i+=3) {
 390     ai = ia[i+2];
 391     a = ip[ia[i]].vsiten.a;
 392     if (pbc) {
 393       pbc_dx_aiuc(pbc,x[ai],x1,dx);
 394     } else {
 395       rvec_sub(x[ai],x1,dx);
 396     }
 397     dsum[XX] += a*dx[XX];
 398     dsum[YY] += a*dx[YY];
 399     dsum[ZZ] += a*dx[ZZ];
 400     /* 9 Flops */
 401   }
 402
 403   x[av][XX] = x1[XX] + dsum[XX];
 404   x[av][YY] = x1[YY] + dsum[YY];
 405   x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 406
 407   return n3;
 408 }
 409
 410
 411 void construct_vsites_thread(gmx_vsite_t *vsite,
 412                              rvec x[],t_nrnb *nrnb,
 413                              real dt,rvec *v,
 414                              t_iparams ip[],t_ilist ilist[],
 415                              t_pbc *pbc_null)
 416 {
 417     gmx_bool  bPBCAll;
 418     rvec      xpbc,xv,vv,dx;
 419     real      a1,b1,c1,inv_dt;
 420     int       i,inc,ii,nra,nr,tp,ftype;
 421     t_iatom   avsite,ai,aj,ak,al,pbc_atom;
 422     t_iatom   *ia;
 423     t_pbc     *pbc_null2;
 424     int       *vsite_pbc,ishift;
 425     rvec      reftmp,vtmp,rtmp;
 426
 427     if (v != NULL)
 428     {
 429         inv_dt = 1.0/dt;
 430     }
 431     else
 432     {
 433         inv_dt = 1.0;
 434     }
 435
 436     bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
 437
 438     pbc_null2 = NULL;
 439     vsite_pbc = NULL;
 440     for(ftype=0; (ftype<F_NRE); ftype++)
 441     {
 442         if ((interaction_function[ftype].flags & IF_VSITE) &&
 443             ilist[ftype].nr > 0)
 444         {
 445             nra    = interaction_function[ftype].nratoms;
 446             inc    = 1 + nra;
 447             nr     = ilist[ftype].nr;
 448             ia     = ilist[ftype].iatoms;
 449
 450             if (bPBCAll)
 451             {
 452                 pbc_null2 = pbc_null;
 453             }
 454             else if (pbc_null != NULL)
 455             {
 456                 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 457             }
 458
 459             for(i=0; i<nr; )
 460             {
 461                 tp   = ia[0];
 462
 463                 /* The vsite and constructing atoms */
 464                 avsite = ia[1];
 465                 ai   = ia[2];
 466                 aj   = ia[3];
 467
 468                 /* Constants for constructing vsites */
 469                 a1   = ip[tp].vsite.a;
 470                 /* Check what kind of pbc we need to use */
 471                 if (bPBCAll)
 472                 {
 473                     /* No charge groups, vsite follows its own pbc */
 474                     pbc_atom = avsite;
 475                     copy_rvec(x[avsite],xpbc);
 476                 }
 477                 else if (vsite_pbc != NULL)
 478                 {
 479                     pbc_atom = vsite_pbc[i/(1+nra)];
 480                     if (pbc_atom > -2)
 481                     {
 482                         if (pbc_atom >= 0)
 483                         {
 484                             /* We need to copy the coordinates here,
 485                              * single for single atom cg's pbc_atom
 486                              * is the vsite itself.
 487                              */
 488                             copy_rvec(x[pbc_atom],xpbc);
 489                         }
 490                         pbc_null2 = pbc_null;
 491                     }
 492                     else
 493                     {
 494                         pbc_null2 = NULL;
 495                     }
 496                 }
 497                 else
 498                 {
 499                     pbc_atom = -2;
 500                 }
 501                 /* Copy the old position */
 502                 copy_rvec(x[avsite],xv);
 503
 504                 /* Construct the vsite depending on type */
 505                 switch (ftype)
 506                 {
 507                 case F_VSITE2:
 508                     constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
 509                     break;
 510                 case F_VSITE3:
 511                     ak = ia[4];
 512                     b1 = ip[tp].vsite.b;
 513                     constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 514                     break;
 515                 case F_VSITE3FD:
 516                     ak = ia[4];
 517                     b1 = ip[tp].vsite.b;
 518                     constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 519                     break;
 520                 case F_VSITE3FAD:
 521                     ak = ia[4];
 522                     b1 = ip[tp].vsite.b;
 523                     constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 524                     break;
 525                 case F_VSITE3OUT:
 526                     ak = ia[4];
 527                     b1 = ip[tp].vsite.b;
 528                     c1 = ip[tp].vsite.c;
 529                     constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
 530                     break;
 531                 case F_VSITE4FD:
 532                     ak = ia[4];
 533                     al = ia[5];
 534                     b1 = ip[tp].vsite.b;
 535                     c1 = ip[tp].vsite.c;
 536                     constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 537                                     pbc_null2);
 538                     break;
 539                 case F_VSITE4FDN:
 540                     ak = ia[4];
 541                     al = ia[5];
 542                     b1 = ip[tp].vsite.b;
 543                     c1 = ip[tp].vsite.c;
 544                     constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 545                                      pbc_null2);
 546                     break;
 547                 case F_VSITEN:
 548                     inc = constr_vsiten(ia,ip,x,pbc_null2);
 549                     break;
 550                 default:
 551                     gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 552                               ftype,__FILE__,__LINE__);
 553                 }
 554
 555                 if (pbc_atom >= 0)
 556                 {
 557                     /* Match the pbc of this vsite to the rest of its charge group */
 558                     ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
 559                     if (ishift != CENTRAL)
 560                     {
 561                         rvec_add(xpbc,dx,x[avsite]);
 562                     }
 563                 }
 564                 if (v != NULL)
 565                 {
 566                     /* Calculate velocity of vsite... */
 567                     rvec_sub(x[avsite],xv,vv);
 568                     svmul(inv_dt,vv,v[avsite]);
 569                 }
 570
 571                 /* Increment loop variables */
 572                 i  += inc;
 573                 ia += inc;
 574             }
 575         }
 576     }
 577 }
 578
 579 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
 580                       rvec x[],t_nrnb *nrnb,
 581                       real dt,rvec *v,
 582                       t_iparams ip[],t_ilist ilist[],
 583                       int ePBC,gmx_bool bMolPBC,t_graph *graph,
 584                       t_commrec *cr,matrix box)
 585 {
 586     t_pbc     pbc,*pbc_null;
 587     gmx_bool  bDomDec;
 588     int       nthreads;
 589
 590     bDomDec = cr && DOMAINDECOMP(cr);
 591
 592     /* We only need to do pbc when we have inter-cg vsites */
 593     if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
 594     {
 595         /* This is wasting some CPU time as we now do this multiple times
 596          * per MD step. But how often do we have vsites with full pbc?
 597          */
 598         pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
 599     }
 600     else
 601     {
 602         pbc_null = NULL;
 603     }
 604
 605     if (cr)
 606     {
 607         if (bDomDec)
 608         {
 609             dd_move_x_vsites(cr->dd,box,x);
 610         }
 611         else if (vsite->bPDvsitecomm)
 612         {
 613             /* I'm not sure whether the periodicity and shift are guaranteed
 614              * to be consistent between different nodes when running e.g. polymers
 615              * in parallel. In this special case we thus unshift/shift,
 616              * but only when necessary. This is to make sure the coordinates
 617              * we move don't end up a box away...
 618              */
 619             if (graph != NULL)
 620             {
 621                 unshift_self(graph,box,x);
 622             }
 623
 624             move_construct_x(vsite->vsitecomm,x,cr);
 625
 626             if (graph != NULL)
 627             {
 628                 shift_self(graph,box,x);
 629             }
 630         }
 631     }
 632
 633     if (vsite->nthreads == 1)
 634     {
 635         construct_vsites_thread(vsite,
 636                                 x,nrnb,dt,v,
 637                                 ip,ilist,
 638                                 pbc_null);
 639     }
 640     else
 641     {
 642 #pragma omp parallel num_threads(vsite->nthreads)
 643         {
 644             construct_vsites_thread(vsite,
 645                                     x,nrnb,dt,v,
 646                                     ip,vsite->tdata[gmx_omp_get_thread_num()].ilist,
 647                                     pbc_null);
 648         }
 649         /* Now we can construct the vsites that might depend on other vsites */
 650         construct_vsites_thread(vsite,
 651                                 x,nrnb,dt,v,
 652                                 ip,vsite->tdata[vsite->nthreads].ilist,
 653                                 pbc_null);
 654     }
 655 }
 656
 657 static void spread_vsite2(t_iatom ia[],real a,
 658                             rvec x[],rvec f[],rvec fshift[],
 659                             t_pbc *pbc,t_graph *g)
 660 {
 661   rvec    fi,fj,dx;
 662   t_iatom av,ai,aj;
 663   ivec    di;
 664   real    b;
 665   int     siv,sij;
 666
 667   av = ia[1];
 668   ai = ia[2];
 669   aj = ia[3];
 670
 671   svmul(1-a,f[av],fi);
 672   svmul(  a,f[av],fj);
 673   /* 7 flop */
 674
 675   rvec_inc(f[ai],fi);
 676   rvec_inc(f[aj],fj);
 677   /* 6 Flops */
 678
 679   if (g) {
 680     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
 681     siv = IVEC2IS(di);
 682     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 683     sij = IVEC2IS(di);
 684   } else if (pbc) {
 685     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 686     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 687   } else {
 688     siv = CENTRAL;
 689     sij = CENTRAL;
 690   }
 691
 692   if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
 693     rvec_inc(fshift[siv],f[av]);
 694     rvec_dec(fshift[CENTRAL],fi);
 695     rvec_dec(fshift[sij],fj);
 696   }
 697
 698   /* TOTAL: 13 flops */
 699 }
 700
 701 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
 702                            gmx_mtop_t *mtop,rvec x[])
 703 {
 704   int as,mb,mol;
 705   gmx_molblock_t *molb;
 706   gmx_moltype_t  *molt;
 707
 708   as = 0;
 709   for(mb=0; mb<mtop->nmolblock; mb++) {
 710     molb = &mtop->molblock[mb];
 711     molt = &mtop->moltype[molb->type];
 712     for(mol=0; mol<molb->nmol; mol++) {
 713       construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
 714                        mtop->ffparams.iparams,molt->ilist,
 715                        epbcNONE,TRUE,NULL,NULL,NULL);
 716       as += molt->atoms.nr;
 717     }
 718   }
 719 }
 720
 721 static void spread_vsite3(t_iatom ia[],real a,real b,
 722                             rvec x[],rvec f[],rvec fshift[],
 723                             t_pbc *pbc,t_graph *g)
 724 {
 725   rvec    fi,fj,fk,dx;
 726   atom_id av,ai,aj,ak;
 727   ivec    di;
 728   int     siv,sij,sik;
 729
 730   av = ia[1];
 731   ai = ia[2];
 732   aj = ia[3];
 733   ak = ia[4];
 734
 735   svmul(1-a-b,f[av],fi);
 736   svmul(    a,f[av],fj);
 737   svmul(    b,f[av],fk);
 738   /* 11 flops */
 739
 740   rvec_inc(f[ai],fi);
 741   rvec_inc(f[aj],fj);
 742   rvec_inc(f[ak],fk);
 743   /* 9 Flops */
 744
 745   if (g) {
 746     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
 747     siv = IVEC2IS(di);
 748     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 749     sij = IVEC2IS(di);
 750     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
 751     sik = IVEC2IS(di);
 752   } else if (pbc) {
 753     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 754     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 755     sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
 756   } else {
 757     siv = CENTRAL;
 758     sij = CENTRAL;
 759     sik = CENTRAL;
 760   }
 761
 762   if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
 763     rvec_inc(fshift[siv],f[av]);
 764     rvec_dec(fshift[CENTRAL],fi);
 765     rvec_dec(fshift[sij],fj);
 766     rvec_dec(fshift[sik],fk);
 767   }
 768
 769   /* TOTAL: 20 flops */
 770 }
 771
 772 static void spread_vsite3FD(t_iatom ia[],real a,real b,
 773                             rvec x[],rvec f[],rvec fshift[],
 774                             gmx_bool VirCorr,matrix dxdf,
 775                             t_pbc *pbc,t_graph *g)
 776 {
 777   real fx,fy,fz,c,invl,fproj,a1;
 778   rvec xvi,xij,xjk,xix,fv,temp;
 779   t_iatom av,ai,aj,ak;
 780   int     svi,sji,skj,d;
 781   ivec    di;
 782
 783   av = ia[1];
 784   ai = ia[2];
 785   aj = ia[3];
 786   ak = ia[4];
 787   copy_rvec(f[av],fv);
 788
 789   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 790   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 791   /* 6 flops */
 792
 793   /* xix goes from i to point x on the line jk */
 794   xix[XX]=xij[XX]+a*xjk[XX];
 795   xix[YY]=xij[YY]+a*xjk[YY];
 796   xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
 797   /* 6 flops */
 798
 799   invl=gmx_invsqrt(iprod(xix,xix));
 800   c=b*invl;
 801   /* 4 + ?10? flops */
 802
 803   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 804
 805   temp[XX]=c*(fv[XX]-fproj*xix[XX]);
 806   temp[YY]=c*(fv[YY]-fproj*xix[YY]);
 807   temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
 808   /* 16 */
 809
 810   /* c is already calculated in constr_vsite3FD
 811      storing c somewhere will save 26 flops!     */
 812
 813   a1=1-a;
 814   f[ai][XX] += fv[XX] - temp[XX];
 815   f[ai][YY] += fv[YY] - temp[YY];
 816   f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 817   f[aj][XX] += a1*temp[XX];
 818   f[aj][YY] += a1*temp[YY];
 819   f[aj][ZZ] += a1*temp[ZZ];
 820   f[ak][XX] += a*temp[XX];
 821   f[ak][YY] += a*temp[YY];
 822   f[ak][ZZ] += a*temp[ZZ];
 823   /* 19 Flops */
 824
 825   if (g) {
 826     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 827     svi = IVEC2IS(di);
 828     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 829     sji = IVEC2IS(di);
 830     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 831     skj = IVEC2IS(di);
 832   } else if (pbc) {
 833     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 834   } else {
 835     svi = CENTRAL;
 836   }
 837
 838   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 839     rvec_dec(fshift[svi],fv);
 840     fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 841     fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 842     fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 843     fshift[    sji][XX] += temp[XX];
 844     fshift[    sji][YY] += temp[YY];
 845     fshift[    sji][ZZ] += temp[ZZ];
 846     fshift[    skj][XX] += a*temp[XX];
 847     fshift[    skj][YY] += a*temp[YY];
 848     fshift[    skj][ZZ] += a*temp[ZZ];
 849   }
 850
 851     if (VirCorr)
 852     {
 853         /* When VirCorr=TRUE, the virial for the current forces is not
 854          * calculated from the redistributed forces. This means that
 855          * the effect of non-linear virtual site constructions on the virial
 856          * needs to be added separately. This contribution can be calculated
 857          * in many ways, but the simplest and cheapest way is to use
 858          * the first constructing atom ai as a reference position in space:
 859          * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
 860          */
 861         rvec xiv;
 862         int  i,j;
 863
 864         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 865
 866         for(i=0; i<DIM; i++)
 867         {
 868             for(j=0; j<DIM; j++)
 869             {
 870                 /* As xix is a linear combination of j and k, use that here */
 871                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 872             }
 873         }
 874     }
 875
 876   /* TOTAL: 61 flops */
 877 }
 878
 879 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
 880                              rvec x[],rvec f[],rvec fshift[],
 881                              gmx_bool VirCorr,matrix dxdf,
 882                              t_pbc *pbc,t_graph *g)
 883 {
 884   rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
 885   real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
 886   t_iatom av,ai,aj,ak;
 887   int     svi,sji,skj,d;
 888   ivec    di;
 889
 890   av = ia[1];
 891   ai = ia[2];
 892   aj = ia[3];
 893   ak = ia[4];
 894   copy_rvec(f[ia[1]],fv);
 895
 896   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 897   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 898   /* 6 flops */
 899
 900   invdij = gmx_invsqrt(iprod(xij,xij));
 901   invdij2 = invdij * invdij;
 902   c1 = iprod(xij,xjk) * invdij2;
 903   xperp[XX] = xjk[XX] - c1*xij[XX];
 904   xperp[YY] = xjk[YY] - c1*xij[YY];
 905   xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 906   /* xperp in plane ijk, perp. to ij */
 907   invdp = gmx_invsqrt(iprod(xperp,xperp));
 908   a1 = a*invdij;
 909   b1 = b*invdp;
 910   /* 45 flops */
 911
 912   /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 913      storing them somewhere will save 45 flops!     */
 914
 915   fproj=iprod(xij  ,fv)*invdij2;
 916   svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
 917   svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
 918   svmul(b1*fproj,                   xperp,f3);
 919   /* 23 flops */
 920
 921   rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
 922   rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
 923   for (d=0; (d<DIM); d++) {
 924     f1[d]*=a1;
 925     f2[d]*=b1;
 926   }
 927   /* 12 flops */
 928
 929   c2=1+c1;
 930   f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 931   f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 932   f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 933   f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 934   f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 935   f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 936   f[ak][XX] +=                      f2[XX];
 937   f[ak][YY] +=                      f2[YY];
 938   f[ak][ZZ] +=                      f2[ZZ];
 939   /* 30 Flops */
 940
 941   if (g) {
 942     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 943     svi = IVEC2IS(di);
 944     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 945     sji = IVEC2IS(di);
 946     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 947     skj = IVEC2IS(di);
 948   } else if (pbc) {
 949     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 950   } else {
 951     svi = CENTRAL;
 952   }
 953
 954   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 955     rvec_dec(fshift[svi],fv);
 956     fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 957     fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 958     fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 959     fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 960     fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 961     fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 962     fshift[    skj][XX] +=                          f2[XX];
 963     fshift[    skj][YY] +=                          f2[YY];
 964     fshift[    skj][ZZ] +=                          f2[ZZ];
 965   }
 966
 967     if (VirCorr)
 968     {
 969         rvec xiv;
 970         int  i,j;
 971
 972         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 973
 974         for(i=0; i<DIM; i++)
 975         {
 976             for(j=0; j<DIM; j++)
 977             {
 978                 /* Note that xik=xij+xjk, so we have to add xij*f2 */
 979                 dxdf[i][j] +=
 980                     - xiv[i]*fv[j]
 981                     + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
 982                     + xjk[i]*f2[j];
 983             }
 984         }
 985     }
 986
 987   /* TOTAL: 113 flops */
 988 }
 989
 990 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
 991                              rvec x[],rvec f[],rvec fshift[],
 992                              gmx_bool VirCorr,matrix dxdf,
 993                              t_pbc *pbc,t_graph *g)
 994 {
 995   rvec    xvi,xij,xik,fv,fj,fk;
 996   real    cfx,cfy,cfz;
 997   atom_id av,ai,aj,ak;
 998   ivec    di;
 999   int     svi,sji,ski;
1000
1001   av = ia[1];
1002   ai = ia[2];
1003   aj = ia[3];
1004   ak = ia[4];
1005
1006   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1007   ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1008   /* 6 Flops */
1009
1010   copy_rvec(f[av],fv);
1011
1012   cfx = c*fv[XX];
1013   cfy = c*fv[YY];
1014   cfz = c*fv[ZZ];
1015   /* 3 Flops */
1016
1017   fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
1018   fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
1019   fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
1020
1021   fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
1022   fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
1023   fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
1024   /* 30 Flops */
1025
1026   f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
1027   f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
1028   f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1029   rvec_inc(f[aj],fj);
1030   rvec_inc(f[ak],fk);
1031   /* 15 Flops */
1032
1033   if (g) {
1034     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1035     svi = IVEC2IS(di);
1036     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1037     sji = IVEC2IS(di);
1038     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1039     ski = IVEC2IS(di);
1040   } else if (pbc) {
1041     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1042   } else {
1043     svi = CENTRAL;
1044   }
1045
1046   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
1047     rvec_dec(fshift[svi],fv);
1048     fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
1049     fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
1050     fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
1051     rvec_inc(fshift[sji],fj);
1052     rvec_inc(fshift[ski],fk);
1053   }
1054
1055     if (VirCorr)
1056     {
1057         rvec xiv;
1058         int  i,j;
1059
1060         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1061
1062         for(i=0; i<DIM; i++)
1063         {
1064             for(j=0; j<DIM; j++)
1065             {
1066                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
1067             }
1068         }
1069     }
1070
1071   /* TOTAL: 54 flops */
1072 }
1073
1074 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1075                             rvec x[],rvec f[],rvec fshift[],
1076                             gmx_bool VirCorr,matrix dxdf,
1077                             t_pbc *pbc,t_graph *g)
1078 {
1079   real    d,invl,fproj,a1;
1080   rvec    xvi,xij,xjk,xjl,xix,fv,temp;
1081   atom_id av,ai,aj,ak,al;
1082   ivec    di;
1083   int     svi,sji,skj,slj,m;
1084
1085   av = ia[1];
1086   ai = ia[2];
1087   aj = ia[3];
1088   ak = ia[4];
1089   al = ia[5];
1090
1091   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1092   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1093   slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1094   /* 9 flops */
1095
1096   /* xix goes from i to point x on the plane jkl */
1097   for(m=0; m<DIM; m++)
1098     xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1099   /* 12 flops */
1100
1101   invl=gmx_invsqrt(iprod(xix,xix));
1102   d=c*invl;
1103   /* 4 + ?10? flops */
1104
1105   copy_rvec(f[av],fv);
1106
1107   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1108
1109   for(m=0; m<DIM; m++)
1110     temp[m] = d*(fv[m] - fproj*xix[m]);
1111   /* 16 */
1112
1113   /* c is already calculated in constr_vsite3FD
1114      storing c somewhere will save 35 flops!     */
1115
1116   a1 = 1 - a - b;
1117   for(m=0; m<DIM; m++) {
1118     f[ai][m] += fv[m] - temp[m];
1119     f[aj][m] += a1*temp[m];
1120     f[ak][m] += a*temp[m];
1121     f[al][m] += b*temp[m];
1122   }
1123   /* 26 Flops */
1124
1125   if (g) {
1126     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1127     svi = IVEC2IS(di);
1128     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1129     sji = IVEC2IS(di);
1130     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1131     skj = IVEC2IS(di);
1132     ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1133     slj = IVEC2IS(di);
1134   } else if (pbc) {
1135     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1136   } else {
1137     svi = CENTRAL;
1138   }
1139
1140   if (fshift &&
1141       (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1142     rvec_dec(fshift[svi],fv);
1143     for(m=0; m<DIM; m++) {
1144       fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1145       fshift[    sji][m] += temp[m];
1146       fshift[    skj][m] += a*temp[m];
1147       fshift[    slj][m] += b*temp[m];
1148     }
1149   }
1150
1151     if (VirCorr)
1152     {
1153         rvec xiv;
1154         int  i,j;
1155
1156         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1157
1158         for(i=0; i<DIM; i++)
1159         {
1160             for(j=0; j<DIM; j++)
1161             {
1162                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1163             }
1164         }
1165     }
1166
1167   /* TOTAL: 77 flops */
1168 }
1169
1170
1171 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1172                              rvec x[],rvec f[],rvec fshift[],
1173                              gmx_bool VirCorr,matrix dxdf,
1174                              t_pbc *pbc,t_graph *g)
1175 {
1176     rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1177     rvec fv,fj,fk,fl;
1178     real invrm,denom;
1179     real cfx,cfy,cfz;
1180     ivec di;
1181     int  av,ai,aj,ak,al;
1182     int  svi,sij,sik,sil;
1183
1184     /* DEBUG: check atom indices */
1185     av = ia[1];
1186     ai = ia[2];
1187     aj = ia[3];
1188     ak = ia[4];
1189     al = ia[5];
1190
1191     copy_rvec(f[av],fv);
1192
1193     sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1194     sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1195     sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1196     /* 9 flops */
1197
1198     ra[XX] = a*xik[XX];
1199     ra[YY] = a*xik[YY];
1200     ra[ZZ] = a*xik[ZZ];
1201
1202     rb[XX] = b*xil[XX];
1203     rb[YY] = b*xil[YY];
1204     rb[ZZ] = b*xil[ZZ];
1205
1206     /* 6 flops */
1207
1208     rvec_sub(ra,xij,rja);
1209     rvec_sub(rb,xij,rjb);
1210     rvec_sub(rb,ra,rab);
1211     /* 9 flops */
1212
1213     cprod(rja,rjb,rm);
1214     /* 9 flops */
1215
1216     invrm=gmx_invsqrt(norm2(rm));
1217     denom=invrm*invrm;
1218     /* 5+5+2 flops */
1219
1220     cfx = c*invrm*fv[XX];
1221     cfy = c*invrm*fv[YY];
1222     cfz = c*invrm*fv[ZZ];
1223     /* 6 Flops */
1224
1225     cprod(rm,rab,rt);
1226     /* 9 flops */
1227
1228     rt[XX] *= denom;
1229     rt[YY] *= denom;
1230     rt[ZZ] *= denom;
1231     /* 3flops */
1232
1233     fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1234     fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1235     fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
1236     /* 30 flops */
1237
1238     cprod(rjb,rm,rt);
1239     /* 9 flops */
1240
1241     rt[XX] *= denom*a;
1242     rt[YY] *= denom*a;
1243     rt[ZZ] *= denom*a;
1244     /* 3flops */
1245
1246     fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1247     fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1248     fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1249     /* 36 flops */
1250
1251     cprod(rm,rja,rt);
1252     /* 9 flops */
1253
1254     rt[XX] *= denom*b;
1255     rt[YY] *= denom*b;
1256     rt[ZZ] *= denom*b;
1257     /* 3flops */
1258
1259     fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1260     fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1261     fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1262     /* 36 flops */
1263
1264     f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1265     f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1266     f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1267     rvec_inc(f[aj],fj);
1268     rvec_inc(f[ak],fk);
1269     rvec_inc(f[al],fl);
1270     /* 21 flops */
1271
1272     if (g) {
1273         ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1274         svi = IVEC2IS(di);
1275         ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1276         sij = IVEC2IS(di);
1277         ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1278         sik = IVEC2IS(di);
1279         ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1280         sil = IVEC2IS(di);
1281     } else if (pbc) {
1282         svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1283     } else {
1284         svi = CENTRAL;
1285     }
1286
1287     if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1288         rvec_dec(fshift[svi],fv);
1289         fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1290         fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1291         fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1292         rvec_inc(fshift[sij],fj);
1293         rvec_inc(fshift[sik],fk);
1294         rvec_inc(fshift[sil],fl);
1295     }
1296
1297     if (VirCorr)
1298     {
1299         rvec xiv;
1300         int  i,j;
1301
1302         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1303
1304         for(i=0; i<DIM; i++)
1305         {
1306             for(j=0; j<DIM; j++)
1307             {
1308                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1309             }
1310         }
1311     }
1312
1313     /* Total: 207 flops (Yuck!) */
1314 }
1315
1316
1317 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1318                          rvec x[],rvec f[],rvec fshift[],
1319                          t_pbc *pbc,t_graph *g)
1320 {
1321   rvec xv,dx,fi;
1322   int  n3,av,i,ai;
1323   real a;
1324   ivec di;
1325   int  siv;
1326
1327   n3 = 3*ip[ia[0]].vsiten.n;
1328   av = ia[1];
1329   copy_rvec(x[av],xv);
1330
1331   for(i=0; i<n3; i+=3) {
1332     ai = ia[i+2];
1333     if (g) {
1334       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1335       siv = IVEC2IS(di);
1336     } else if (pbc) {
1337       siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1338     } else {
1339       siv = CENTRAL;
1340     }
1341     a = ip[ia[i]].vsiten.a;
1342     svmul(a,f[av],fi);
1343     rvec_inc(f[ai],fi);
1344     if (fshift && siv != CENTRAL) {
1345       rvec_inc(fshift[siv],fi);
1346       rvec_dec(fshift[CENTRAL],fi);
1347     }
1348     /* 6 Flops */
1349   }
1350
1351   return n3;
1352 }
1353
1354
1355 static int vsite_count(const t_ilist *ilist,int ftype)
1356 {
1357     if (ftype == F_VSITEN)
1358     {
1359         return ilist[ftype].nr/3;
1360     }
1361     else
1362     {
1363         return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
1364     }
1365 }
1366
1367 static void spread_vsite_f_thread(gmx_vsite_t *vsite,
1368                                   rvec x[],rvec f[],rvec *fshift,
1369                                   gmx_bool VirCorr,matrix dxdf,
1370                                   t_iparams ip[],t_ilist ilist[],
1371                                   t_graph *g,t_pbc *pbc_null)
1372 {
1373     gmx_bool  bPBCAll;
1374     real      a1,b1,c1;
1375     int       i,inc,m,nra,nr,tp,ftype;
1376     t_iatom   *ia;
1377     t_pbc     *pbc_null2;
1378     int       *vsite_pbc;
1379
1380     if (VirCorr)
1381     {
1382         clear_mat(dxdf);
1383     }
1384
1385     bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
1386
1387     /* this loop goes backwards to be able to build *
1388      * higher type vsites from lower types         */
1389     pbc_null2 = NULL;
1390     vsite_pbc = NULL;
1391     for(ftype=F_NRE-1; (ftype>=0); ftype--)
1392     {
1393         if ((interaction_function[ftype].flags & IF_VSITE) &&
1394             ilist[ftype].nr > 0)
1395         {
1396             nra    = interaction_function[ftype].nratoms;
1397             inc    = 1 + nra;
1398             nr     = ilist[ftype].nr;
1399             ia     = ilist[ftype].iatoms;
1400
1401             if (bPBCAll)
1402             {
1403                 pbc_null2 = pbc_null;
1404             }
1405             else if (pbc_null != NULL)
1406             {
1407                 vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1408             }
1409
1410             for(i=0; i<nr; )
1411             {
1412                 if (vsite_pbc != NULL)
1413                 {
1414                     if (vsite_pbc[i/(1+nra)] > -2)
1415                     {
1416                         pbc_null2 = pbc_null;
1417                     }
1418                     else
1419                     {
1420                         pbc_null2 = NULL;
1421                     }
1422                 }
1423
1424                 tp   = ia[0];
1425
1426                 /* Constants for constructing */
1427                 a1   = ip[tp].vsite.a;
1428                 /* Construct the vsite depending on type */
1429                 switch (ftype)
1430                 {
1431                 case F_VSITE2:
1432                     spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1433                     break;
1434                 case F_VSITE3:
1435                     b1 = ip[tp].vsite.b;
1436                     spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1437                     break;
1438                 case F_VSITE3FD:
1439                     b1 = ip[tp].vsite.b;
1440                     spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1441                     break;
1442                 case F_VSITE3FAD:
1443                     b1 = ip[tp].vsite.b;
1444                     spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1445                     break;
1446                 case F_VSITE3OUT:
1447                     b1 = ip[tp].vsite.b;
1448                     c1 = ip[tp].vsite.c;
1449                     spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1450                     break;
1451                 case F_VSITE4FD:
1452                     b1 = ip[tp].vsite.b;
1453                     c1 = ip[tp].vsite.c;
1454                     spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1455                     break;
1456                 case F_VSITE4FDN:
1457                     b1 = ip[tp].vsite.b;
1458                     c1 = ip[tp].vsite.c;
1459                     spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1460                     break;
1461                 case F_VSITEN:
1462                     inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1463                     break;
1464                 default:
1465                     gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1466                               ftype,__FILE__,__LINE__);
1467                 }
1468                 clear_rvec(f[ia[1]]);
1469
1470                 /* Increment loop variables */
1471                 i  += inc;
1472                 ia += inc;
1473             }
1474         }
1475     }
1476 }
1477
1478 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1479                     rvec x[],rvec f[],rvec *fshift,
1480                     gmx_bool VirCorr,matrix vir,
1481                     t_nrnb *nrnb,t_idef *idef,
1482                     int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1483                     t_commrec *cr)
1484 {
1485     t_pbc pbc,*pbc_null;
1486     int   th;
1487
1488     /* We only need to do pbc when we have inter-cg vsites */
1489     if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
1490     {
1491         /* This is wasting some CPU time as we now do this multiple times
1492          * per MD step. But how often do we have vsites with full pbc?
1493          */
1494         pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1495     }
1496     else
1497     {
1498         pbc_null = NULL;
1499     }
1500
1501     if (DOMAINDECOMP(cr))
1502     {
1503         dd_clear_f_vsites(cr->dd,f);
1504     }
1505     else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1506     {
1507         pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1508     }
1509
1510     if (vsite->nthreads == 1)
1511     {
1512         spread_vsite_f_thread(vsite,
1513                               x,f,fshift,
1514                               VirCorr,vsite->tdata[0].dxdf,
1515                               idef->iparams,idef->il,
1516                               g,pbc_null);
1517     }
1518     else
1519     {
1520         /* First spread the vsites that might depend on other vsites */
1521         spread_vsite_f_thread(vsite,
1522                               x,f,fshift,
1523                               VirCorr,vsite->tdata[vsite->nthreads].dxdf,
1524                               idef->iparams,
1525                               vsite->tdata[vsite->nthreads].ilist,
1526                               g,pbc_null);
1527
1528 #pragma omp parallel num_threads(vsite->nthreads)
1529         {
1530             int  thread;
1531             rvec *fshift_t;
1532
1533             thread = gmx_omp_get_thread_num();
1534
1535             if (thread == 0 || fshift == NULL)
1536             {
1537                 fshift_t = fshift;
1538             }
1539             else
1540             {
1541                 fshift_t = vsite->tdata[thread].fshift;
1542             }
1543
1544             spread_vsite_f_thread(vsite,
1545                                   x,f,fshift_t,
1546                                   VirCorr,vsite->tdata[thread].dxdf,
1547                                   idef->iparams,
1548                                   vsite->tdata[thread].ilist,
1549                                   g,pbc_null);
1550         }
1551
1552         if (fshift != NULL)
1553         {
1554             int i;
1555
1556             for(th=1; th<vsite->nthreads; th++)
1557             {
1558                 for(i=0; i<SHIFTS; i++)
1559                 {
1560                     rvec_inc(fshift[i],vsite->tdata[th].fshift[i]);
1561                 }
1562             }
1563         }
1564     }
1565
1566     if (VirCorr)
1567     {
1568         int i,j;
1569
1570         for(th=0; th<(vsite->nthreads==1 ? 1 : vsite->nthreads+1); th++)
1571         {
1572             for(i=0; i<DIM; i++)
1573             {
1574                 for(j=0; j<DIM; j++)
1575                 {
1576                     vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
1577                 }
1578             }
1579         }
1580     }
1581
1582     if (DOMAINDECOMP(cr))
1583     {
1584         dd_move_f_vsites(cr->dd,f,fshift);
1585     }
1586     else if (vsite->bPDvsitecomm)
1587     {
1588         /* We only move forces here, and they are independent of shifts */
1589         move_construct_f(vsite->vsitecomm,f,cr);
1590     }
1591
1592     inc_nrnb(nrnb,eNR_VSITE2,   vsite_count(idef->il,F_VSITE2));
1593     inc_nrnb(nrnb,eNR_VSITE3,   vsite_count(idef->il,F_VSITE3));
1594     inc_nrnb(nrnb,eNR_VSITE3FD, vsite_count(idef->il,F_VSITE3FD));
1595     inc_nrnb(nrnb,eNR_VSITE3FAD,vsite_count(idef->il,F_VSITE3FAD));
1596     inc_nrnb(nrnb,eNR_VSITE3OUT,vsite_count(idef->il,F_VSITE3OUT));
1597     inc_nrnb(nrnb,eNR_VSITE4FD, vsite_count(idef->il,F_VSITE4FD));
1598     inc_nrnb(nrnb,eNR_VSITE4FDN,vsite_count(idef->il,F_VSITE4FDN));
1599     inc_nrnb(nrnb,eNR_VSITEN,   vsite_count(idef->il,F_VSITEN));
1600 }
1601
1602 static int *atom2cg(t_block *cgs)
1603 {
1604   int *a2cg,cg,i;
1605
1606   snew(a2cg,cgs->index[cgs->nr]);
1607   for(cg=0; cg<cgs->nr; cg++) {
1608     for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1609       a2cg[i] = cg;
1610   }
1611
1612   return a2cg;
1613 }
1614
1615 static int count_intercg_vsite(gmx_mtop_t *mtop,
1616                                gmx_bool *bHaveChargeGroups)
1617 {
1618     int  mb,mt,ftype,nral,i,cg,a;
1619     gmx_molblock_t *molb;
1620     gmx_moltype_t *molt;
1621     int  *a2cg;
1622     t_ilist *il;
1623     t_iatom *ia;
1624     int  n_intercg_vsite;
1625
1626     *bHaveChargeGroups = FALSE;
1627
1628     n_intercg_vsite = 0;
1629     for(mb=0; mb<mtop->nmolblock; mb++)
1630     {
1631         molb = &mtop->molblock[mb];
1632         molt = &mtop->moltype[molb->type];
1633
1634         if (molt->cgs.nr < molt->atoms.nr)
1635         {
1636             *bHaveChargeGroups = TRUE;
1637         }
1638
1639         a2cg = atom2cg(&molt->cgs);
1640         for(ftype=0; ftype<F_NRE; ftype++)
1641         {
1642             if (interaction_function[ftype].flags & IF_VSITE)
1643             {
1644                 nral = NRAL(ftype);
1645                 il = &molt->ilist[ftype];
1646                 ia  = il->iatoms;
1647                 for(i=0; i<il->nr; i+=1+nral)
1648                 {
1649                     cg = a2cg[ia[1+i]];
1650                     for(a=1; a<nral; a++)
1651                     {
1652                         if (a2cg[ia[1+a]] != cg) {
1653                             n_intercg_vsite += molb->nmol;
1654                             break;
1655                         }
1656                     }
1657                 }
1658             }
1659         }
1660         sfree(a2cg);
1661     }
1662
1663     return n_intercg_vsite;
1664 }
1665
1666 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1667                            t_atom *atom,t_mdatoms *md,
1668                            t_block *cgs,int *a2cg)
1669 {
1670   int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1671   t_ilist *il;
1672   t_iatom *ia;
1673   int  **vsite_pbc,*vsite_pbc_f;
1674   char *pbc_set;
1675   gmx_bool bViteOnlyCG_and_FirstAtom;
1676
1677   /* Make an array that tells if the pbc of an atom is set */
1678   snew(pbc_set,cgs->index[cgs->nr]);
1679   /* PBC is set for all non vsites */
1680   for(a=0; a<cgs->index[cgs->nr]; a++) {
1681     if ((atom && atom[a].ptype != eptVSite) ||
1682         (md   && md->ptype[a]  != eptVSite)) {
1683       pbc_set[a] = 1;
1684     }
1685   }
1686
1687   snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1688
1689   for(ftype=0; ftype<F_NRE; ftype++) {
1690     if (interaction_function[ftype].flags & IF_VSITE) {
1691       nral = NRAL(ftype);
1692       il = &ilist[ftype];
1693       ia  = il->iatoms;
1694
1695       snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1696       vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1697
1698       i = 0;
1699       while (i < il->nr) {
1700         vsi = i/(1+nral);
1701         vsite = ia[i+1];
1702         cg_v = a2cg[vsite];
1703         /* A value of -2 signals that this vsite and its contructing
1704          * atoms are all within the same cg, so no pbc is required.
1705          */
1706         vsite_pbc_f[vsi] = -2;
1707         /* Check if constructing atoms are outside the vsite's cg */
1708         nc3 = 0;
1709         if (ftype == F_VSITEN) {
1710           nc3 = 3*iparams[ia[i]].vsiten.n;
1711           for(j=0; j<nc3; j+=3) {
1712             if (a2cg[ia[i+j+2]] != cg_v)
1713               vsite_pbc_f[vsi] = -1;
1714           }
1715         } else {
1716           for(a=1; a<nral; a++) {
1717             if (a2cg[ia[i+1+a]] != cg_v)
1718               vsite_pbc_f[vsi] = -1;
1719           }
1720         }
1721         if (vsite_pbc_f[vsi] == -1) {
1722           /* Check if this is the first processed atom of a vsite only cg */
1723           bViteOnlyCG_and_FirstAtom = TRUE;
1724           for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1725             /* Non-vsites already have pbc set, so simply check for pbc_set */
1726             if (pbc_set[a]) {
1727               bViteOnlyCG_and_FirstAtom = FALSE;
1728               break;
1729             }
1730           }
1731           if (bViteOnlyCG_and_FirstAtom) {
1732             /* First processed atom of a vsite only charge group.
1733              * The pbc of the input coordinates to construct_vsites
1734              * should be preserved.
1735              */
1736             vsite_pbc_f[vsi] = vsite;
1737           } else if (cg_v != a2cg[ia[1+i+1]]) {
1738             /* This vsite has a different charge group index
1739              * than it's first constructing atom
1740              * and the charge group has more than one atom,
1741              * search for the first normal particle
1742              * or vsite that already had its pbc defined.
1743              * If nothing is found, use full pbc for this vsite.
1744              */
1745             for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1746               if (a != vsite && pbc_set[a]) {
1747                 vsite_pbc_f[vsi] = a;
1748                 if (gmx_debug_at)
1749                   fprintf(debug,"vsite %d match pbc with atom %d\n",
1750                           vsite+1,a+1);
1751                 break;
1752               }
1753             }
1754             if (gmx_debug_at)
1755               fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
1756                       vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1757                       vsite_pbc_f[vsi]+1);
1758           }
1759         }
1760         if (ftype == F_VSITEN) {
1761           /* The other entries in vsite_pbc_f are not used for center vsites */
1762           i += nc3;
1763         } else {
1764           i += 1+nral;
1765         }
1766
1767         /* This vsite now has its pbc defined */
1768         pbc_set[vsite] = 1;
1769       }
1770     }
1771   }
1772
1773   sfree(pbc_set);
1774
1775   return vsite_pbc;
1776 }
1777
1778
1779 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr,
1780                         gmx_bool bSerial_NoPBC)
1781 {
1782     int nvsite,i;
1783     int *a2cg,cg;
1784     gmx_vsite_t *vsite;
1785     int mt;
1786     gmx_moltype_t *molt;
1787     int nthreads;
1788
1789     /* check if there are vsites */
1790     nvsite = 0;
1791     for(i=0; i<F_NRE; i++)
1792     {
1793         if (interaction_function[i].flags & IF_VSITE)
1794         {
1795             nvsite += gmx_mtop_ftype_count(mtop,i);
1796         }
1797     }
1798
1799     if (nvsite == 0)
1800     {
1801         return NULL;
1802     }
1803
1804     snew(vsite,1);
1805
1806     vsite->n_intercg_vsite = count_intercg_vsite(mtop,
1807                                                  &vsite->bHaveChargeGroups);
1808
1809     /* If we don't have charge groups, the vsite follows its own pbc */
1810     if (!bSerial_NoPBC &&
1811         vsite->bHaveChargeGroups &&
1812         vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
1813     {
1814         vsite->nvsite_pbc_molt = mtop->nmoltype;
1815         snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1816         for(mt=0; mt<mtop->nmoltype; mt++)
1817         {
1818             molt = &mtop->moltype[mt];
1819             /* Make an atom to charge group index */
1820             a2cg = atom2cg(&molt->cgs);
1821             vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1822                                                       molt->ilist,
1823                                                       molt->atoms.atom,NULL,
1824                                                       &molt->cgs,a2cg);
1825             sfree(a2cg);
1826         }
1827
1828         snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1829         snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
1830     }
1831
1832     if (bSerial_NoPBC)
1833     {
1834         vsite->nthreads = 1;
1835     }
1836     else
1837     {
1838         vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
1839     }
1840     if (!bSerial_NoPBC)
1841     {
1842         /* We need one extra thread data structure for the overlap vsites */
1843         snew(vsite->tdata,vsite->nthreads+1);
1844     }
1845
1846     vsite->th_ind        = NULL;
1847     vsite->th_ind_nalloc = 0;
1848
1849     return vsite;
1850 }
1851
1852 static void prepare_vsite_thread(const t_ilist *ilist,
1853                                  gmx_vsite_thread_t *vsite_th)
1854 {
1855     int ftype;
1856
1857     for(ftype=0; ftype<F_NRE; ftype++)
1858     {
1859         if (interaction_function[ftype].flags & IF_VSITE)
1860         {
1861             if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
1862             {
1863                 vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
1864                 srenew(vsite_th->ilist[ftype].iatoms,vsite_th->ilist[ftype].nalloc);
1865             }
1866
1867             vsite_th->ilist[ftype].nr = 0;
1868         }
1869     }
1870 }
1871
1872 void split_vsites_over_threads(const t_ilist *ilist,
1873                                const t_mdatoms *mdatoms,
1874                                gmx_bool bLimitRange,
1875                                gmx_vsite_t *vsite)
1876 {
1877     int th;
1878     int vsite_atom_range,natperthread;
1879     int *th_ind;
1880     int ftype;
1881     t_iatom *iat;
1882     t_ilist *il_th;
1883     int nral1,inc,i,j;
1884
1885     if (vsite->nthreads == 1)
1886     {
1887         /* Nothing to do */
1888         return;
1889     }
1890
1891 #pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
1892     for(th=0; th<vsite->nthreads; th++)
1893     {
1894         prepare_vsite_thread(ilist,&vsite->tdata[th]);
1895     }
1896     /* Master threads does the (potential) overlap vsites */
1897     prepare_vsite_thread(ilist,&vsite->tdata[vsite->nthreads]);
1898
1899     /* The current way of distributing the vsites over threads in primitive.
1900      * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
1901      * without taking into account how the vsites are distributed.
1902      * Without domain decomposition we bLimitRange=TRUE and we at least
1903      * tighten the upper bound of the range (useful for common systems
1904      * such as a vsite-protein in 3-site water).
1905      */
1906     if (bLimitRange)
1907     {
1908         vsite_atom_range = -1;
1909         for(ftype=0; ftype<F_NRE; ftype++)
1910         {
1911             if ((interaction_function[ftype].flags & IF_VSITE) &&
1912                 ftype != F_VSITEN)
1913             {
1914                 nral1 = 1 + NRAL(ftype);
1915                 iat   = ilist[ftype].iatoms;
1916                 for(i=0; i<ilist[ftype].nr; i+=nral1)
1917                 {
1918                     for(j=i+1; j<i+nral1; j++)
1919                     {
1920                         vsite_atom_range = max(vsite_atom_range,iat[j]);
1921                     }
1922                 }
1923             }
1924         }
1925         vsite_atom_range++;
1926     }
1927     else
1928     {
1929         vsite_atom_range = mdatoms->homenr;
1930     }
1931     natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
1932
1933     if (debug)
1934     {
1935         fprintf(debug,"virtual site thread dist: natoms %d, range %d, natperthread %d\n",mdatoms->nr,vsite_atom_range,natperthread);
1936     }
1937
1938     /* To simplify the vsite assignment, we make an index which tells us
1939      * to which thread particles, both non-vsites and vsites, are assigned.
1940      */
1941     if (mdatoms->nr > vsite->th_ind_nalloc)
1942     {
1943         vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
1944         srenew(vsite->th_ind,vsite->th_ind_nalloc);
1945     }
1946     th_ind = vsite->th_ind;
1947     th = 0;
1948     for(i=0; i<mdatoms->nr; i++)
1949     {
1950         if (mdatoms->ptype[i] == eptVSite)
1951         {
1952             /* vsites are not assigned to a thread yet */
1953             th_ind[i] = -1;
1954         }
1955         else
1956         {
1957             /* assign non-vsite particles to thread th */
1958             th_ind[i] = th;
1959         }
1960         if (i == (th + 1)*natperthread && th < vsite->nthreads)
1961         {
1962             th++;
1963         }
1964     }
1965
1966     for(ftype=0; ftype<F_NRE; ftype++)
1967     {
1968         if ((interaction_function[ftype].flags & IF_VSITE) &&
1969             ftype != F_VSITEN)
1970         {
1971             nral1 = 1 + NRAL(ftype);
1972             inc   = nral1;
1973             iat   = ilist[ftype].iatoms;
1974             for(i=0; i<ilist[ftype].nr; )
1975             {
1976                 th = iat[1+i]/natperthread;
1977                 /* We would like to assign this vsite the thread th,
1978                  * but it might depend on atoms outside the atom range of th
1979                  * or on another vsite not assigned to thread th.
1980                  */
1981                 if (ftype != F_VSITEN)
1982                 {
1983                     for(j=i+2; j<i+nral1; j++)
1984                     {
1985                         if (th_ind[iat[j]] != th)
1986                         {
1987                             /* Some constructing atoms are not assigned to
1988                              * thread th, move this vsite to a separate batch.
1989                              */
1990                             th = vsite->nthreads;
1991                         }
1992                     }
1993                 }
1994                 else
1995                 {
1996                     inc = iat[i];
1997                     for(j=i+2; j<i+inc; j+=3)
1998                     {
1999                         if (th_ind[iat[j]] != th)
2000                         {
2001                             th = vsite->nthreads;
2002                         }
2003                     }
2004                 }
2005                 /* Copy this vsite to the thread data struct of thread th */
2006                 il_th = &vsite->tdata[th].ilist[ftype];
2007                 for(j=i; j<i+inc; j++)
2008                 {
2009                     il_th->iatoms[il_th->nr++] = iat[j];
2010                 }
2011                 /* Update this vsite's thread index entry */
2012                 th_ind[iat[1+i]] = th;
2013
2014                 i += inc;
2015             }
2016         }
2017     }
2018
2019     if (debug)
2020     {
2021         for(ftype=0; ftype<F_NRE; ftype++)
2022         {
2023             if ((interaction_function[ftype].flags & IF_VSITE) &&
2024             ilist[ftype].nr > 0)
2025             {
2026                 fprintf(debug,"%-20s thread dist:",
2027                         interaction_function[ftype].longname);
2028                 for(th=0; th<vsite->nthreads+1; th++)
2029                 {
2030                     fprintf(debug," %4d",vsite->tdata[th].ilist[ftype].nr);
2031                 }
2032                 fprintf(debug,"\n");
2033             }
2034         }
2035     }
2036 }
2037
2038 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
2039                    t_commrec *cr)
2040 {
2041     int *a2cg;
2042
2043     if (vsite->n_intercg_vsite > 0)
2044     {
2045         if (vsite->bHaveChargeGroups)
2046         {
2047             /* Make an atom to charge group index */
2048             a2cg = atom2cg(&top->cgs);
2049             vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
2050                                                  top->idef.il,NULL,md,
2051                                                  &top->cgs,a2cg);
2052             sfree(a2cg);
2053         }
2054
2055         if (PARTDECOMP(cr))
2056         {
2057             snew(vsite->vsitecomm,1);
2058             vsite->bPDvsitecomm =
2059                 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
2060         }
2061     }
2062
2063     if (vsite->nthreads > 1)
2064     {
2065         if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
2066         {
2067             gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
2068         }
2069
2070         split_vsites_over_threads(top->idef.il,md,!DOMAINDECOMP(cr),vsite);
2071     }
2072 }