src/mdlib/vsite.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <stdio.h>
  41 #include "typedefs.h"
  42 #include "vsite.h"
  43 #include "macros.h"
  44 #include "smalloc.h"
  45 #include "nrnb.h"
  46 #include "vec.h"
  47 #include "mvdata.h"
  48 #include "network.h"
  49 #include "mshift.h"
  50 #include "pbc.h"
  51 #include "domdec.h"
  52 #include "partdec.h"
  53 #include "mtop_util.h"
  54
  55 /* Routines to send/recieve coordinates and force
  56  * of constructing atoms.
  57  */
  58
  59 static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
  60 {
  61         rvec *sendbuf;
  62         rvec *recvbuf;
  63         int i,ia;
  64
  65         sendbuf = vsitecomm->send_buf;
  66         recvbuf = vsitecomm->recv_buf;
  67
  68
  69                 /* Prepare pulse left by copying to send buffer */
  70                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
  71                 {
  72                         ia = vsitecomm->left_export_construct[i];
  73                         copy_rvec(x[ia],sendbuf[i]);
  74                 }
  75
  76                 /* Pulse coordinates left */
  77                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
  78
  79                 /* Copy from receive buffer to coordinate array */
  80                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
  81                 {
  82                         ia = vsitecomm->right_import_construct[i];
  83                         copy_rvec(recvbuf[i],x[ia]);
  84                 }
  85
  86                 /* Prepare pulse right by copying to send buffer */
  87                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
  88                 {
  89                         ia = vsitecomm->right_export_construct[i];
  90                         copy_rvec(x[ia],sendbuf[i]);
  91                 }
  92
  93                 /* Pulse coordinates right */
  94                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
  95
  96                 /* Copy from receive buffer to coordinate array */
  97                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
  98                 {
  99                         ia = vsitecomm->left_import_construct[i];
 100                         copy_rvec(recvbuf[i],x[ia]);
 101                 }
 102 }
 103
 104
 105 static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
 106 {
 107         rvec *sendbuf;
 108         rvec *recvbuf;
 109         int i,ia;
 110
 111         sendbuf = vsitecomm->send_buf;
 112         recvbuf = vsitecomm->recv_buf;
 113
 114                 /* Prepare pulse right by copying to send buffer */
 115                 for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 116                 {
 117                         ia = vsitecomm->right_import_construct[i];
 118                         copy_rvec(f[ia],sendbuf[i]);
 119                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 120                 }
 121
 122                 /* Pulse forces right */
 123                 gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
 124
 125                 /* Copy from receive buffer to coordinate array */
 126                 for(i=0;i<vsitecomm->left_export_nconstruct;i++)
 127                 {
 128                         ia = vsitecomm->left_export_construct[i];
 129                         rvec_inc(f[ia],recvbuf[i]);
 130                 }
 131
 132                 /* Prepare pulse left by copying to send buffer */
 133                 for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 134                 {
 135                         ia = vsitecomm->left_import_construct[i];
 136                         copy_rvec(f[ia],sendbuf[i]);
 137                         clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
 138                 }
 139
 140                 /* Pulse coordinates left */
 141                 gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
 142
 143                 /* Copy from receive buffer to coordinate array */
 144                 for(i=0;i<vsitecomm->right_export_nconstruct;i++)
 145                 {
 146                         ia = vsitecomm->right_export_construct[i];
 147                         rvec_inc(f[ia],recvbuf[i]);
 148                 }
 149
 150         /* All forces are now on the home processors */
 151 }
 152
 153
 154 static void
 155 pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
 156 {
 157         int i,ia;
 158
 159         for(i=0;i<vsitecomm->left_import_nconstruct;i++)
 160         {
 161                 ia = vsitecomm->left_import_construct[i];
 162                 clear_rvec(f[ia]);
 163         }
 164         for(i=0;i<vsitecomm->right_import_nconstruct;i++)
 165         {
 166                 ia = vsitecomm->right_import_construct[i];
 167                 clear_rvec(f[ia]);
 168         }
 169 }
 170
 171
 172
 173 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 174 {
 175   if (pbc) {
 176     return pbc_dx_aiuc(pbc,xi,xj,dx);
 177   }
 178   else {
 179     rvec_sub(xi,xj,dx);
 180     return CENTRAL;
 181   }
 182 }
 183
 184 /* Vsite construction routines */
 185
 186 static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
 187 {
 188   real b;
 189   rvec dx;
 190
 191   b=1.0-a;
 192   /* 1 flop */
 193
 194   if (pbc) {
 195     pbc_dx_aiuc(pbc,xj,xi,dx);
 196     x[XX] = xi[XX] + a*dx[XX];
 197     x[YY] = xi[YY] + a*dx[YY];
 198     x[ZZ] = xi[ZZ] + a*dx[ZZ];
 199   } else {
 200     x[XX] = b*xi[XX] + a*xj[XX];
 201     x[YY] = b*xi[YY] + a*xj[YY];
 202     x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
 203     /* 9 Flops */
 204   }
 205
 206   /* TOTAL: 10 flops */
 207 }
 208
 209 static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 210                           t_pbc *pbc)
 211 {
 212   real c;
 213   rvec dxj,dxk;
 214
 215   c=1.0-a-b;
 216   /* 2 flops */
 217
 218   if (pbc) {
 219     pbc_dx_aiuc(pbc,xj,xi,dxj);
 220     pbc_dx_aiuc(pbc,xk,xi,dxk);
 221     x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
 222     x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
 223     x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
 224   } else {
 225     x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
 226     x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
 227     x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
 228   /* 15 Flops */
 229   }
 230
 231   /* TOTAL: 17 flops */
 232 }
 233
 234 static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
 235                             t_pbc *pbc)
 236 {
 237   rvec xij,xjk,temp;
 238   real c;
 239
 240   pbc_rvec_sub(pbc,xj,xi,xij);
 241   pbc_rvec_sub(pbc,xk,xj,xjk);
 242   /* 6 flops */
 243
 244   /* temp goes from i to a point on the line jk */
 245   temp[XX] = xij[XX] + a*xjk[XX];
 246   temp[YY] = xij[YY] + a*xjk[YY];
 247   temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
 248   /* 6 flops */
 249
 250   c=b*gmx_invsqrt(iprod(temp,temp));
 251   /* 6 + 10 flops */
 252
 253   x[XX] = xi[XX] + c*temp[XX];
 254   x[YY] = xi[YY] + c*temp[YY];
 255   x[ZZ] = xi[ZZ] + c*temp[ZZ];
 256   /* 6 Flops */
 257
 258   /* TOTAL: 34 flops */
 259 }
 260
 261 static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
 262 {
 263   rvec xij,xjk,xp;
 264   real a1,b1,c1,invdij;
 265
 266   pbc_rvec_sub(pbc,xj,xi,xij);
 267   pbc_rvec_sub(pbc,xk,xj,xjk);
 268   /* 6 flops */
 269
 270   invdij = gmx_invsqrt(iprod(xij,xij));
 271   c1 = invdij * invdij * iprod(xij,xjk);
 272   xp[XX] = xjk[XX] - c1*xij[XX];
 273   xp[YY] = xjk[YY] - c1*xij[YY];
 274   xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 275   a1 = a*invdij;
 276   b1 = b*gmx_invsqrt(iprod(xp,xp));
 277   /* 45 */
 278
 279   x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
 280   x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
 281   x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
 282   /* 12 Flops */
 283
 284   /* TOTAL: 63 flops */
 285 }
 286
 287 static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
 288                              real a,real b,real c,t_pbc *pbc)
 289 {
 290   rvec xij,xik,temp;
 291
 292   pbc_rvec_sub(pbc,xj,xi,xij);
 293   pbc_rvec_sub(pbc,xk,xi,xik);
 294   cprod(xij,xik,temp);
 295   /* 15 Flops */
 296
 297   x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
 298   x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
 299   x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
 300   /* 18 Flops */
 301
 302   /* TOTAL: 33 flops */
 303 }
 304
 305 static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 306                             real a,real b,real c,t_pbc *pbc)
 307 {
 308   rvec xij,xjk,xjl,temp;
 309   real d;
 310
 311   pbc_rvec_sub(pbc,xj,xi,xij);
 312   pbc_rvec_sub(pbc,xk,xj,xjk);
 313   pbc_rvec_sub(pbc,xl,xj,xjl);
 314   /* 9 flops */
 315
 316   /* temp goes from i to a point on the plane jkl */
 317   temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
 318   temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
 319   temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
 320   /* 12 flops */
 321
 322   d=c*gmx_invsqrt(iprod(temp,temp));
 323   /* 6 + 10 flops */
 324
 325   x[XX] = xi[XX] + d*temp[XX];
 326   x[YY] = xi[YY] + d*temp[YY];
 327   x[ZZ] = xi[ZZ] + d*temp[ZZ];
 328   /* 6 Flops */
 329
 330   /* TOTAL: 43 flops */
 331 }
 332
 333
 334 static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
 335                              real a,real b,real c,t_pbc *pbc)
 336 {
 337     rvec xij,xik,xil,ra,rb,rja,rjb,rm;
 338     real d;
 339
 340     pbc_rvec_sub(pbc,xj,xi,xij);
 341     pbc_rvec_sub(pbc,xk,xi,xik);
 342     pbc_rvec_sub(pbc,xl,xi,xil);
 343     /* 9 flops */
 344
 345     ra[XX] = a*xik[XX];
 346     ra[YY] = a*xik[YY];
 347     ra[ZZ] = a*xik[ZZ];
 348
 349     rb[XX] = b*xil[XX];
 350     rb[YY] = b*xil[YY];
 351     rb[ZZ] = b*xil[ZZ];
 352
 353     /* 6 flops */
 354
 355     rvec_sub(ra,xij,rja);
 356     rvec_sub(rb,xij,rjb);
 357     /* 6 flops */
 358
 359     cprod(rja,rjb,rm);
 360     /* 9 flops */
 361
 362     d=c*gmx_invsqrt(norm2(rm));
 363     /* 5+5+1 flops */
 364
 365     x[XX] = xi[XX] + d*rm[XX];
 366     x[YY] = xi[YY] + d*rm[YY];
 367     x[ZZ] = xi[ZZ] + d*rm[ZZ];
 368     /* 6 Flops */
 369
 370     /* TOTAL: 47 flops */
 371 }
 372
 373
 374 static int constr_vsiten(t_iatom *ia, t_iparams ip[],
 375                          rvec *x, t_pbc *pbc)
 376 {
 377   rvec xs,x1,dx;
 378   dvec dsum;
 379   int  n3,av,ai,i;
 380   real a;
 381
 382   n3 = 3*ip[ia[0]].vsiten.n;
 383   av = ia[1];
 384   ai = ia[2];
 385   copy_rvec(x[ai],x1);
 386   clear_dvec(dsum);
 387   for(i=3; i<n3; i+=3) {
 388     ai = ia[i+2];
 389     a = ip[ia[i]].vsiten.a;
 390     if (pbc) {
 391       pbc_dx_aiuc(pbc,x[ai],x1,dx);
 392     } else {
 393       rvec_sub(x[ai],x1,dx);
 394     }
 395     dsum[XX] += a*dx[XX];
 396     dsum[YY] += a*dx[YY];
 397     dsum[ZZ] += a*dx[ZZ];
 398     /* 9 Flops */
 399   }
 400
 401   x[av][XX] = x1[XX] + dsum[XX];
 402   x[av][YY] = x1[YY] + dsum[YY];
 403   x[av][ZZ] = x1[ZZ] + dsum[ZZ];
 404
 405   return n3;
 406 }
 407
 408
 409 void construct_vsites(FILE *log,gmx_vsite_t *vsite,
 410                       rvec x[],t_nrnb *nrnb,
 411                       real dt,rvec *v,
 412                       t_iparams ip[],t_ilist ilist[],
 413                       int ePBC,gmx_bool bMolPBC,t_graph *graph,
 414                       t_commrec *cr,matrix box)
 415 {
 416   rvec      xpbc,xv,vv,dx;
 417   real      a1,b1,c1,inv_dt;
 418   int       i,inc,ii,nra,nr,tp,ftype;
 419   t_iatom   avsite,ai,aj,ak,al,pbc_atom;
 420   t_iatom   *ia;
 421   t_pbc     pbc,*pbc_null,*pbc_null2;
 422   gmx_bool      bDomDec;
 423   int       *vsite_pbc,ishift;
 424   rvec      reftmp,vtmp,rtmp;
 425
 426   bDomDec = cr && DOMAINDECOMP(cr);
 427
 428   /* We only need to do pbc when we have inter-cg vsites */
 429   if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite) {
 430     /* This is wasting some CPU time as we now do this multiple times
 431      * per MD step. But how often do we have vsites with full pbc?
 432      */
 433     pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
 434   } else {
 435     pbc_null = NULL;
 436   }
 437
 438   if (cr) {
 439     if (bDomDec) {
 440       dd_move_x_vsites(cr->dd,box,x);
 441     } else if (vsite->bPDvsitecomm) {
 442       /* I'm not sure whether the periodicity and shift are guaranteed
 443        * to be consistent between different nodes when running e.g. polymers
 444        * in parallel. In this special case we thus unshift/shift,
 445        * but only when necessary. This is to make sure the coordinates
 446        * we move don't end up a box away...
 447        */
 448                 if (graph)
 449                         unshift_self(graph,box,x);
 450
 451                 move_construct_x(vsite->vsitecomm,x,cr);
 452
 453                 if (graph)
 454                         shift_self(graph,box,x);
 455     }
 456   }
 457
 458   if (v) {
 459     inv_dt = 1.0/dt;
 460   } else {
 461     inv_dt = 1.0;
 462   }
 463
 464   pbc_null2 = NULL;
 465   for(ftype=0; (ftype<F_NRE); ftype++) {
 466     if (interaction_function[ftype].flags & IF_VSITE) {
 467       nra    = interaction_function[ftype].nratoms;
 468       nr     = ilist[ftype].nr;
 469       ia     = ilist[ftype].iatoms;
 470
 471       if (pbc_null) {
 472         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
 473       } else {
 474         vsite_pbc = NULL;
 475       }
 476
 477       for(i=0; (i<nr); ) {
 478         tp   = ia[0];
 479         /*
 480         if (ftype != idef->functype[tp])
 481           gmx_incons("Function types for vsites wrong");
 482         */
 483
 484         /* The vsite and constructing atoms */
 485         avsite = ia[1];
 486         ai   = ia[2];
 487
 488         /* Constants for constructing vsites */
 489         a1   = ip[tp].vsite.a;
 490         /* Check what kind of pbc we need to use */
 491         if (vsite_pbc) {
 492           pbc_atom = vsite_pbc[i/(1+nra)];
 493           if (pbc_atom > -2) {
 494             if (pbc_atom >= 0) {
 495               /* We need to copy the coordinates here,
 496                * single for single atom cg's pbc_atom is the vsite itself.
 497                */
 498               copy_rvec(x[pbc_atom],xpbc);
 499             }
 500             pbc_null2 = pbc_null;
 501           } else {
 502             pbc_null2 = NULL;
 503           }
 504         } else {
 505           pbc_atom = -2;
 506         }
 507         /* Copy the old position */
 508         copy_rvec(x[avsite],xv);
 509
 510         /* Construct the vsite depending on type */
 511         inc = nra+1;
 512         switch (ftype) {
 513         case F_VSITE2:
 514           aj = ia[3];
 515           constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
 516           break;
 517         case F_VSITE3:
 518           aj = ia[3];
 519           ak = ia[4];
 520           b1 = ip[tp].vsite.b;
 521           constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 522           break;
 523         case F_VSITE3FD:
 524           aj = ia[3];
 525           ak = ia[4];
 526           b1 = ip[tp].vsite.b;
 527           constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 528           break;
 529         case F_VSITE3FAD:
 530           aj = ia[3];
 531           ak = ia[4];
 532           b1 = ip[tp].vsite.b;
 533           constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
 534           break;
 535         case F_VSITE3OUT:
 536           aj = ia[3];
 537           ak = ia[4];
 538           b1 = ip[tp].vsite.b;
 539           c1 = ip[tp].vsite.c;
 540           constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
 541           break;
 542         case F_VSITE4FD:
 543           aj = ia[3];
 544           ak = ia[4];
 545           al = ia[5];
 546           b1 = ip[tp].vsite.b;
 547           c1 = ip[tp].vsite.c;
 548           constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 549                           pbc_null2);
 550           break;
 551         case F_VSITE4FDN:
 552           aj = ia[3];
 553           ak = ia[4];
 554           al = ia[5];
 555           b1 = ip[tp].vsite.b;
 556           c1 = ip[tp].vsite.c;
 557           constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
 558                         pbc_null2);
 559           break;
 560         case F_VSITEN:
 561           inc = constr_vsiten(ia,ip,x,pbc_null2);
 562           break;
 563         default:
 564           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
 565                       ftype,__FILE__,__LINE__);
 566         }
 567
 568         if (pbc_atom >= 0) {
 569           /* Match the pbc of this vsite to the rest of its charge group */
 570           ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
 571           if (ishift != CENTRAL)
 572             rvec_add(xpbc,dx,x[avsite]);
 573         }
 574         if (v) {
 575           /* Calculate velocity of vsite... */
 576           rvec_sub(x[avsite],xv,vv);
 577           svmul(inv_dt,vv,v[avsite]);
 578         }
 579
 580         /* Increment loop variables */
 581         i  += inc;
 582         ia += inc;
 583       }
 584     }
 585   }
 586 }
 587
 588 static void spread_vsite2(t_iatom ia[],real a,
 589                             rvec x[],rvec f[],rvec fshift[],
 590                             t_pbc *pbc,t_graph *g)
 591 {
 592   rvec    fi,fj,dx;
 593   t_iatom av,ai,aj;
 594   ivec    di;
 595   real    b;
 596   int     siv,sij;
 597
 598   av = ia[1];
 599   ai = ia[2];
 600   aj = ia[3];
 601
 602   svmul(1-a,f[av],fi);
 603   svmul(  a,f[av],fj);
 604   /* 7 flop */
 605
 606   rvec_inc(f[ai],fi);
 607   rvec_inc(f[aj],fj);
 608   /* 6 Flops */
 609
 610   if (g) {
 611     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
 612     siv = IVEC2IS(di);
 613     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 614     sij = IVEC2IS(di);
 615   } else if (pbc) {
 616     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 617     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 618   } else {
 619     siv = CENTRAL;
 620     sij = CENTRAL;
 621   }
 622
 623   if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
 624     rvec_inc(fshift[siv],f[av]);
 625     rvec_dec(fshift[CENTRAL],fi);
 626     rvec_dec(fshift[sij],fj);
 627   }
 628
 629   /* TOTAL: 13 flops */
 630 }
 631
 632 void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
 633                            gmx_mtop_t *mtop,rvec x[])
 634 {
 635   int as,mb,mol;
 636   gmx_molblock_t *molb;
 637   gmx_moltype_t  *molt;
 638
 639   as = 0;
 640   for(mb=0; mb<mtop->nmolblock; mb++) {
 641     molb = &mtop->molblock[mb];
 642     molt = &mtop->moltype[molb->type];
 643     for(mol=0; mol<molb->nmol; mol++) {
 644       construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
 645                        mtop->ffparams.iparams,molt->ilist,
 646                        epbcNONE,TRUE,NULL,NULL,NULL);
 647       as += molt->atoms.nr;
 648     }
 649   }
 650 }
 651
 652 static void spread_vsite3(t_iatom ia[],real a,real b,
 653                             rvec x[],rvec f[],rvec fshift[],
 654                             t_pbc *pbc,t_graph *g)
 655 {
 656   rvec    fi,fj,fk,dx;
 657   atom_id av,ai,aj,ak;
 658   ivec    di;
 659   int     siv,sij,sik;
 660
 661   av = ia[1];
 662   ai = ia[2];
 663   aj = ia[3];
 664   ak = ia[4];
 665
 666   svmul(1-a-b,f[av],fi);
 667   svmul(    a,f[av],fj);
 668   svmul(    b,f[av],fk);
 669   /* 11 flops */
 670
 671   rvec_inc(f[ai],fi);
 672   rvec_inc(f[aj],fj);
 673   rvec_inc(f[ak],fk);
 674   /* 9 Flops */
 675
 676   if (g) {
 677     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
 678     siv = IVEC2IS(di);
 679     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
 680     sij = IVEC2IS(di);
 681     ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
 682     sik = IVEC2IS(di);
 683   } else if (pbc) {
 684     siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
 685     sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
 686     sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
 687   } else {
 688     siv = CENTRAL;
 689     sij = CENTRAL;
 690     sik = CENTRAL;
 691   }
 692
 693   if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
 694     rvec_inc(fshift[siv],f[av]);
 695     rvec_dec(fshift[CENTRAL],fi);
 696     rvec_dec(fshift[sij],fj);
 697     rvec_dec(fshift[sik],fk);
 698   }
 699
 700   /* TOTAL: 20 flops */
 701 }
 702
 703 static void spread_vsite3FD(t_iatom ia[],real a,real b,
 704                             rvec x[],rvec f[],rvec fshift[],
 705                             gmx_bool VirCorr,matrix dxdf,
 706                             t_pbc *pbc,t_graph *g)
 707 {
 708   real fx,fy,fz,c,invl,fproj,a1;
 709   rvec xvi,xij,xjk,xix,fv,temp;
 710   t_iatom av,ai,aj,ak;
 711   int     svi,sji,skj,d;
 712   ivec    di;
 713
 714   av = ia[1];
 715   ai = ia[2];
 716   aj = ia[3];
 717   ak = ia[4];
 718   copy_rvec(f[av],fv);
 719
 720   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 721   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 722   /* 6 flops */
 723
 724   /* xix goes from i to point x on the line jk */
 725   xix[XX]=xij[XX]+a*xjk[XX];
 726   xix[YY]=xij[YY]+a*xjk[YY];
 727   xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
 728   /* 6 flops */
 729
 730   invl=gmx_invsqrt(iprod(xix,xix));
 731   c=b*invl;
 732   /* 4 + ?10? flops */
 733
 734   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
 735
 736   temp[XX]=c*(fv[XX]-fproj*xix[XX]);
 737   temp[YY]=c*(fv[YY]-fproj*xix[YY]);
 738   temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
 739   /* 16 */
 740
 741   /* c is already calculated in constr_vsite3FD
 742      storing c somewhere will save 26 flops!     */
 743
 744   a1=1-a;
 745   f[ai][XX] += fv[XX] - temp[XX];
 746   f[ai][YY] += fv[YY] - temp[YY];
 747   f[ai][ZZ] += fv[ZZ] - temp[ZZ];
 748   f[aj][XX] += a1*temp[XX];
 749   f[aj][YY] += a1*temp[YY];
 750   f[aj][ZZ] += a1*temp[ZZ];
 751   f[ak][XX] += a*temp[XX];
 752   f[ak][YY] += a*temp[YY];
 753   f[ak][ZZ] += a*temp[ZZ];
 754   /* 19 Flops */
 755
 756   if (g) {
 757     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 758     svi = IVEC2IS(di);
 759     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 760     sji = IVEC2IS(di);
 761     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 762     skj = IVEC2IS(di);
 763   } else if (pbc) {
 764     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 765   } else {
 766     svi = CENTRAL;
 767   }
 768
 769   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 770     rvec_dec(fshift[svi],fv);
 771     fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
 772     fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
 773     fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
 774     fshift[    sji][XX] += temp[XX];
 775     fshift[    sji][YY] += temp[YY];
 776     fshift[    sji][ZZ] += temp[ZZ];
 777     fshift[    skj][XX] += a*temp[XX];
 778     fshift[    skj][YY] += a*temp[YY];
 779     fshift[    skj][ZZ] += a*temp[ZZ];
 780   }
 781
 782     if (VirCorr)
 783     {
 784         /* When VirCorr=TRUE, the virial for the current forces is not
 785          * calculated from the redistributed forces. This means that
 786          * the effect of non-linear virtual site constructions on the virial
 787          * needs to be added separately. This contribution can be calculated
 788          * in many ways, but the simplest and cheapest way is to use
 789          * the first constructing atom ai as a reference position in space:
 790          * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
 791          */
 792         rvec xiv;
 793         int  i,j;
 794
 795         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 796
 797         for(i=0; i<DIM; i++)
 798         {
 799             for(j=0; j<DIM; j++)
 800             {
 801                 /* As xix is a linear combination of j and k, use that here */
 802                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
 803             }
 804         }
 805     }
 806
 807   /* TOTAL: 61 flops */
 808 }
 809
 810 static void spread_vsite3FAD(t_iatom ia[],real a,real b,
 811                              rvec x[],rvec f[],rvec fshift[],
 812                              gmx_bool VirCorr,matrix dxdf,
 813                              t_pbc *pbc,t_graph *g)
 814 {
 815   rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
 816   real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
 817   t_iatom av,ai,aj,ak;
 818   int     svi,sji,skj,d;
 819   ivec    di;
 820
 821   av = ia[1];
 822   ai = ia[2];
 823   aj = ia[3];
 824   ak = ia[4];
 825   copy_rvec(f[ia[1]],fv);
 826
 827   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 828   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
 829   /* 6 flops */
 830
 831   invdij = gmx_invsqrt(iprod(xij,xij));
 832   invdij2 = invdij * invdij;
 833   c1 = iprod(xij,xjk) * invdij2;
 834   xperp[XX] = xjk[XX] - c1*xij[XX];
 835   xperp[YY] = xjk[YY] - c1*xij[YY];
 836   xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
 837   /* xperp in plane ijk, perp. to ij */
 838   invdp = gmx_invsqrt(iprod(xperp,xperp));
 839   a1 = a*invdij;
 840   b1 = b*invdp;
 841   /* 45 flops */
 842
 843   /* a1, b1 and c1 are already calculated in constr_vsite3FAD
 844      storing them somewhere will save 45 flops!     */
 845
 846   fproj=iprod(xij  ,fv)*invdij2;
 847   svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
 848   svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
 849   svmul(b1*fproj,                   xperp,f3);
 850   /* 23 flops */
 851
 852   rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
 853   rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
 854   for (d=0; (d<DIM); d++) {
 855     f1[d]*=a1;
 856     f2[d]*=b1;
 857   }
 858   /* 12 flops */
 859
 860   c2=1+c1;
 861   f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
 862   f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
 863   f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
 864   f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
 865   f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
 866   f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
 867   f[ak][XX] +=                      f2[XX];
 868   f[ak][YY] +=                      f2[YY];
 869   f[ak][ZZ] +=                      f2[ZZ];
 870   /* 30 Flops */
 871
 872   if (g) {
 873     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 874     svi = IVEC2IS(di);
 875     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 876     sji = IVEC2IS(di);
 877     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
 878     skj = IVEC2IS(di);
 879   } else if (pbc) {
 880     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 881   } else {
 882     svi = CENTRAL;
 883   }
 884
 885   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
 886     rvec_dec(fshift[svi],fv);
 887     fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
 888     fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
 889     fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
 890     fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
 891     fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
 892     fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
 893     fshift[    skj][XX] +=                          f2[XX];
 894     fshift[    skj][YY] +=                          f2[YY];
 895     fshift[    skj][ZZ] +=                          f2[ZZ];
 896   }
 897
 898     if (VirCorr)
 899     {
 900         rvec xiv;
 901         int  i,j;
 902
 903         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 904
 905         for(i=0; i<DIM; i++)
 906         {
 907             for(j=0; j<DIM; j++)
 908             {
 909                 /* Note that xik=xij+xjk, so we have to add xij*f2 */
 910                 dxdf[i][j] +=
 911                     - xiv[i]*fv[j]
 912                     + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
 913                     + xjk[i]*f2[j];
 914             }
 915         }
 916     }
 917
 918   /* TOTAL: 113 flops */
 919 }
 920
 921 static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
 922                              rvec x[],rvec f[],rvec fshift[],
 923                              gmx_bool VirCorr,matrix dxdf,
 924                              t_pbc *pbc,t_graph *g)
 925 {
 926   rvec    xvi,xij,xik,fv,fj,fk;
 927   real    cfx,cfy,cfz;
 928   atom_id av,ai,aj,ak;
 929   ivec    di;
 930   int     svi,sji,ski;
 931
 932   av = ia[1];
 933   ai = ia[2];
 934   aj = ia[3];
 935   ak = ia[4];
 936
 937   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
 938   ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
 939   /* 6 Flops */
 940
 941   copy_rvec(f[av],fv);
 942
 943   cfx = c*fv[XX];
 944   cfy = c*fv[YY];
 945   cfz = c*fv[ZZ];
 946   /* 3 Flops */
 947
 948   fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
 949   fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
 950   fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
 951
 952   fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
 953   fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
 954   fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
 955   /* 30 Flops */
 956
 957   f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
 958   f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
 959   f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 960   rvec_inc(f[aj],fj);
 961   rvec_inc(f[ak],fk);
 962   /* 15 Flops */
 963
 964   if (g) {
 965     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
 966     svi = IVEC2IS(di);
 967     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
 968     sji = IVEC2IS(di);
 969     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
 970     ski = IVEC2IS(di);
 971   } else if (pbc) {
 972     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
 973   } else {
 974     svi = CENTRAL;
 975   }
 976
 977   if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
 978     rvec_dec(fshift[svi],fv);
 979     fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
 980     fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
 981     fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
 982     rvec_inc(fshift[sji],fj);
 983     rvec_inc(fshift[ski],fk);
 984   }
 985
 986     if (VirCorr)
 987     {
 988         rvec xiv;
 989         int  i,j;
 990
 991         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
 992
 993         for(i=0; i<DIM; i++)
 994         {
 995             for(j=0; j<DIM; j++)
 996             {
 997                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
 998             }
 999         }
1000     }
1001
1002   /* TOTAL: 54 flops */
1003 }
1004
1005 static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
1006                             rvec x[],rvec f[],rvec fshift[],
1007                             gmx_bool VirCorr,matrix dxdf,
1008                             t_pbc *pbc,t_graph *g)
1009 {
1010   real    d,invl,fproj,a1;
1011   rvec    xvi,xij,xjk,xjl,xix,fv,temp;
1012   atom_id av,ai,aj,ak,al;
1013   ivec    di;
1014   int     svi,sji,skj,slj,m;
1015
1016   av = ia[1];
1017   ai = ia[2];
1018   aj = ia[3];
1019   ak = ia[4];
1020   al = ia[5];
1021
1022   sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1023   skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
1024   slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
1025   /* 9 flops */
1026
1027   /* xix goes from i to point x on the plane jkl */
1028   for(m=0; m<DIM; m++)
1029     xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
1030   /* 12 flops */
1031
1032   invl=gmx_invsqrt(iprod(xix,xix));
1033   d=c*invl;
1034   /* 4 + ?10? flops */
1035
1036   copy_rvec(f[av],fv);
1037
1038   fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
1039
1040   for(m=0; m<DIM; m++)
1041     temp[m] = d*(fv[m] - fproj*xix[m]);
1042   /* 16 */
1043
1044   /* c is already calculated in constr_vsite3FD
1045      storing c somewhere will save 35 flops!     */
1046
1047   a1 = 1 - a - b;
1048   for(m=0; m<DIM; m++) {
1049     f[ai][m] += fv[m] - temp[m];
1050     f[aj][m] += a1*temp[m];
1051     f[ak][m] += a*temp[m];
1052     f[al][m] += b*temp[m];
1053   }
1054   /* 26 Flops */
1055
1056   if (g) {
1057     ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
1058     svi = IVEC2IS(di);
1059     ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1060     sji = IVEC2IS(di);
1061     ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
1062     skj = IVEC2IS(di);
1063     ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
1064     slj = IVEC2IS(di);
1065   } else if (pbc) {
1066     svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1067   } else {
1068     svi = CENTRAL;
1069   }
1070
1071   if (fshift &&
1072       (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
1073     rvec_dec(fshift[svi],fv);
1074     for(m=0; m<DIM; m++) {
1075       fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
1076       fshift[    sji][m] += temp[m];
1077       fshift[    skj][m] += a*temp[m];
1078       fshift[    slj][m] += b*temp[m];
1079     }
1080   }
1081
1082     if (VirCorr)
1083     {
1084         rvec xiv;
1085         int  i,j;
1086
1087         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1088
1089         for(i=0; i<DIM; i++)
1090         {
1091             for(j=0; j<DIM; j++)
1092             {
1093                 dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
1094             }
1095         }
1096     }
1097
1098   /* TOTAL: 77 flops */
1099 }
1100
1101
1102 static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
1103                              rvec x[],rvec f[],rvec fshift[],
1104                              gmx_bool VirCorr,matrix dxdf,
1105                              t_pbc *pbc,t_graph *g)
1106 {
1107     rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
1108     rvec fv,fj,fk,fl;
1109     real invrm,denom;
1110     real cfx,cfy,cfz;
1111     ivec di;
1112     int  av,ai,aj,ak,al;
1113     int  svi,sij,sik,sil;
1114
1115     /* DEBUG: check atom indices */
1116     av = ia[1];
1117     ai = ia[2];
1118     aj = ia[3];
1119     ak = ia[4];
1120     al = ia[5];
1121
1122     copy_rvec(f[av],fv);
1123
1124     sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
1125     sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
1126     sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
1127     /* 9 flops */
1128
1129     ra[XX] = a*xik[XX];
1130     ra[YY] = a*xik[YY];
1131     ra[ZZ] = a*xik[ZZ];
1132
1133     rb[XX] = b*xil[XX];
1134     rb[YY] = b*xil[YY];
1135     rb[ZZ] = b*xil[ZZ];
1136
1137     /* 6 flops */
1138
1139     rvec_sub(ra,xij,rja);
1140     rvec_sub(rb,xij,rjb);
1141     rvec_sub(rb,ra,rab);
1142     /* 9 flops */
1143
1144     cprod(rja,rjb,rm);
1145     /* 9 flops */
1146
1147     invrm=gmx_invsqrt(norm2(rm));
1148     denom=invrm*invrm;
1149     /* 5+5+2 flops */
1150
1151     cfx = c*invrm*fv[XX];
1152     cfy = c*invrm*fv[YY];
1153     cfz = c*invrm*fv[ZZ];
1154     /* 6 Flops */
1155
1156     cprod(rm,rab,rt);
1157     /* 9 flops */
1158
1159     rt[XX] *= denom;
1160     rt[YY] *= denom;
1161     rt[ZZ] *= denom;
1162     /* 3flops */
1163
1164     fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
1165     fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
1166     fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
1167     /* 30 flops */
1168
1169     cprod(rjb,rm,rt);
1170     /* 9 flops */
1171
1172     rt[XX] *= denom*a;
1173     rt[YY] *= denom*a;
1174     rt[ZZ] *= denom*a;
1175     /* 3flops */
1176
1177     fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
1178     fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
1179     fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1180     /* 36 flops */
1181
1182     cprod(rm,rja,rt);
1183     /* 9 flops */
1184
1185     rt[XX] *= denom*b;
1186     rt[YY] *= denom*b;
1187     rt[ZZ] *= denom*b;
1188     /* 3flops */
1189
1190     fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
1191     fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
1192     fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
1193     /* 36 flops */
1194
1195     f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1196     f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1197     f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1198     rvec_inc(f[aj],fj);
1199     rvec_inc(f[ak],fk);
1200     rvec_inc(f[al],fl);
1201     /* 21 flops */
1202
1203     if (g) {
1204         ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
1205         svi = IVEC2IS(di);
1206         ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
1207         sij = IVEC2IS(di);
1208         ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
1209         sik = IVEC2IS(di);
1210         ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
1211         sil = IVEC2IS(di);
1212     } else if (pbc) {
1213         svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
1214     } else {
1215         svi = CENTRAL;
1216     }
1217
1218     if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
1219         rvec_dec(fshift[svi],fv);
1220         fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
1221         fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
1222         fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
1223         rvec_inc(fshift[sij],fj);
1224         rvec_inc(fshift[sik],fk);
1225         rvec_inc(fshift[sil],fl);
1226     }
1227
1228     if (VirCorr)
1229     {
1230         rvec xiv;
1231         int  i,j;
1232
1233         pbc_rvec_sub(pbc,x[av],x[ai],xiv);
1234
1235         for(i=0; i<DIM; i++)
1236         {
1237             for(j=0; j<DIM; j++)
1238             {
1239                 dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
1240             }
1241         }
1242     }
1243
1244     /* Total: 207 flops (Yuck!) */
1245 }
1246
1247
1248 static int spread_vsiten(t_iatom ia[],t_iparams ip[],
1249                          rvec x[],rvec f[],rvec fshift[],
1250                          t_pbc *pbc,t_graph *g)
1251 {
1252   rvec xv,dx,fi;
1253   int  n3,av,i,ai;
1254   real a;
1255   ivec di;
1256   int  siv;
1257
1258   n3 = 3*ip[ia[0]].vsiten.n;
1259   av = ia[1];
1260   copy_rvec(x[av],xv);
1261
1262   for(i=0; i<n3; i+=3) {
1263     ai = ia[i+2];
1264     if (g) {
1265       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
1266       siv = IVEC2IS(di);
1267     } else if (pbc) {
1268       siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
1269     } else {
1270       siv = CENTRAL;
1271     }
1272     a = ip[ia[i]].vsiten.a;
1273     svmul(a,f[av],fi);
1274     rvec_inc(f[ai],fi);
1275     if (fshift && siv != CENTRAL) {
1276       rvec_inc(fshift[siv],fi);
1277       rvec_dec(fshift[CENTRAL],fi);
1278     }
1279     /* 6 Flops */
1280   }
1281
1282   return n3;
1283 }
1284
1285
1286 void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
1287                     rvec x[],rvec f[],rvec *fshift,
1288                     gmx_bool VirCorr,matrix vir,
1289                     t_nrnb *nrnb,t_idef *idef,
1290                     int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
1291                     t_commrec *cr)
1292 {
1293   real      a1,b1,c1;
1294   int       i,inc,m,nra,nr,tp,ftype;
1295   int       nd2,nd3,nd3FD,nd3FAD,nd3OUT,nd4FD,nd4FDN,ndN;
1296   t_iatom   *ia;
1297   t_iparams *ip;
1298   t_pbc     pbc,*pbc_null,*pbc_null2;
1299   int       *vsite_pbc;
1300   matrix    dxdf;
1301
1302   /* We only need to do pbc when we have inter-cg vsites */
1303   if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite) {
1304     /* This is wasting some CPU time as we now do this multiple times
1305      * per MD step. But how often do we have vsites with full pbc?
1306      */
1307     pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
1308   } else {
1309     pbc_null = NULL;
1310   }
1311
1312   if (DOMAINDECOMP(cr))
1313   {
1314     dd_clear_f_vsites(cr->dd,f);
1315   }
1316   else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
1317   {
1318     pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
1319   }
1320
1321     if (vir != NULL)
1322     {
1323         clear_mat(dxdf);
1324     }
1325
1326   ip     = idef->iparams;
1327
1328   nd2        = 0;
1329   nd3        = 0;
1330   nd3FD      = 0;
1331   nd3FAD     = 0;
1332   nd3OUT     = 0;
1333   nd4FD      = 0;
1334   nd4FDN     = 0;
1335   ndN        = 0;
1336
1337   /* this loop goes backwards to be able to build *
1338    * higher type vsites from lower types         */
1339   pbc_null2 = NULL;
1340   for(ftype=F_NRE-1; (ftype>=0); ftype--) {
1341     if (interaction_function[ftype].flags & IF_VSITE) {
1342       nra    = interaction_function[ftype].nratoms;
1343       nr     = idef->il[ftype].nr;
1344       ia     = idef->il[ftype].iatoms;
1345
1346       if (pbc_null) {
1347         vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
1348       } else {
1349         vsite_pbc = NULL;
1350       }
1351
1352       for(i=0; (i<nr); ) {
1353         /* Check if we need to apply pbc for this vsite */
1354         if (vsite_pbc) {
1355           if (vsite_pbc[i/(1+nra)] > -2)
1356             pbc_null2 = pbc_null;
1357           else
1358             pbc_null2 = NULL;
1359         }
1360
1361         tp   = ia[0];
1362         if (ftype != idef->functype[tp])
1363           gmx_incons("Functiontypes for vsites wrong");
1364
1365         /* Constants for constructing */
1366         a1   = ip[tp].vsite.a;
1367         /* Construct the vsite depending on type */
1368         inc = nra+1;
1369         switch (ftype) {
1370         case F_VSITE2:
1371           spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
1372           nd2++;
1373           break;
1374         case F_VSITE3:
1375           b1 = ip[tp].vsite.b;
1376           spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
1377           nd3++;
1378           break;
1379         case F_VSITE3FD:
1380           b1 = ip[tp].vsite.b;
1381           spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1382           nd3FD++;
1383           break;
1384         case F_VSITE3FAD:
1385           b1 = ip[tp].vsite.b;
1386           spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1387           nd3FAD++;
1388           break;
1389         case F_VSITE3OUT:
1390           b1 = ip[tp].vsite.b;
1391           c1 = ip[tp].vsite.c;
1392           spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1393           nd3OUT++;
1394           break;
1395         case F_VSITE4FD:
1396           b1 = ip[tp].vsite.b;
1397           c1 = ip[tp].vsite.c;
1398           spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1399           nd4FD++;
1400           break;
1401         case F_VSITE4FDN:
1402           b1 = ip[tp].vsite.b;
1403           c1 = ip[tp].vsite.c;
1404           spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
1405           nd4FDN++;
1406           break;
1407         case F_VSITEN:
1408           inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
1409           ndN += inc;
1410           break;
1411         default:
1412           gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
1413                       ftype,__FILE__,__LINE__);
1414         }
1415         clear_rvec(f[ia[1]]);
1416
1417         /* Increment loop variables */
1418         i  += inc;
1419         ia += inc;
1420       }
1421     }
1422   }
1423
1424     if (VirCorr)
1425     {
1426         int i,j;
1427
1428         for(i=0; i<DIM; i++)
1429         {
1430             for(j=0; j<DIM; j++)
1431             {
1432                 vir[i][j] += -0.5*dxdf[i][j];
1433             }
1434         }
1435     }
1436
1437   inc_nrnb(nrnb,eNR_VSITE2,   nd2     );
1438   inc_nrnb(nrnb,eNR_VSITE3,   nd3     );
1439   inc_nrnb(nrnb,eNR_VSITE3FD, nd3FD   );
1440   inc_nrnb(nrnb,eNR_VSITE3FAD,nd3FAD  );
1441   inc_nrnb(nrnb,eNR_VSITE3OUT,nd3OUT  );
1442   inc_nrnb(nrnb,eNR_VSITE4FD, nd4FD   );
1443   inc_nrnb(nrnb,eNR_VSITE4FDN,nd4FDN  );
1444   inc_nrnb(nrnb,eNR_VSITEN,   ndN     );
1445
1446   if (DOMAINDECOMP(cr)) {
1447     dd_move_f_vsites(cr->dd,f,fshift);
1448   } else if (vsite->bPDvsitecomm) {
1449     /* We only move forces here, and they are independent of shifts */
1450     move_construct_f(vsite->vsitecomm,f,cr);
1451   }
1452 }
1453
1454 static int *atom2cg(t_block *cgs)
1455 {
1456   int *a2cg,cg,i;
1457
1458   snew(a2cg,cgs->index[cgs->nr]);
1459   for(cg=0; cg<cgs->nr; cg++) {
1460     for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
1461       a2cg[i] = cg;
1462   }
1463
1464   return a2cg;
1465 }
1466
1467 static int count_intercg_vsite(gmx_mtop_t *mtop)
1468 {
1469   int  mb,mt,ftype,nral,i,cg,a;
1470   gmx_molblock_t *molb;
1471   gmx_moltype_t *molt;
1472   int  *a2cg;
1473   t_ilist *il;
1474   t_iatom *ia;
1475   int  n_intercg_vsite;
1476
1477   n_intercg_vsite = 0;
1478   for(mb=0; mb<mtop->nmolblock; mb++) {
1479     molb = &mtop->molblock[mb];
1480     molt = &mtop->moltype[molb->type];
1481     a2cg = atom2cg(&molt->cgs);
1482     for(ftype=0; ftype<F_NRE; ftype++) {
1483       if (interaction_function[ftype].flags & IF_VSITE) {
1484         nral = NRAL(ftype);
1485         il = &molt->ilist[ftype];
1486         ia  = il->iatoms;
1487         for(i=0; i<il->nr; i+=1+nral) {
1488           cg = a2cg[ia[1+i]];
1489           for(a=1; a<nral; a++) {
1490             if (a2cg[ia[1+a]] != cg) {
1491               n_intercg_vsite += molb->nmol;
1492               break;
1493             }
1494           }
1495         }
1496       }
1497     }
1498     sfree(a2cg);
1499   }
1500
1501   return n_intercg_vsite;
1502 }
1503
1504 static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
1505                            t_atom *atom,t_mdatoms *md,
1506                            t_block *cgs,int *a2cg)
1507 {
1508   int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
1509   t_ilist *il;
1510   t_iatom *ia;
1511   int  **vsite_pbc,*vsite_pbc_f;
1512   char *pbc_set;
1513   gmx_bool bViteOnlyCG_and_FirstAtom;
1514
1515   /* Make an array that tells if the pbc of an atom is set */
1516   snew(pbc_set,cgs->index[cgs->nr]);
1517   /* PBC is set for all non vsites */
1518   for(a=0; a<cgs->index[cgs->nr]; a++) {
1519     if ((atom && atom[a].ptype != eptVSite) ||
1520         (md   && md->ptype[a]  != eptVSite)) {
1521       pbc_set[a] = 1;
1522     }
1523   }
1524
1525   snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
1526
1527   for(ftype=0; ftype<F_NRE; ftype++) {
1528     if (interaction_function[ftype].flags & IF_VSITE) {
1529       nral = NRAL(ftype);
1530       il = &ilist[ftype];
1531       ia  = il->iatoms;
1532
1533       snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
1534       vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
1535
1536       i = 0;
1537       while (i < il->nr) {
1538         vsi = i/(1+nral);
1539         vsite = ia[i+1];
1540         cg_v = a2cg[vsite];
1541         /* A value of -2 signals that this vsite and its contructing
1542          * atoms are all within the same cg, so no pbc is required.
1543          */
1544         vsite_pbc_f[vsi] = -2;
1545         /* Check if constructing atoms are outside the vsite's cg */
1546         nc3 = 0;
1547         if (ftype == F_VSITEN) {
1548           nc3 = 3*iparams[ia[i]].vsiten.n;
1549           for(j=0; j<nc3; j+=3) {
1550             if (a2cg[ia[i+j+2]] != cg_v)
1551               vsite_pbc_f[vsi] = -1;
1552           }
1553         } else {
1554           for(a=1; a<nral; a++) {
1555             if (a2cg[ia[i+1+a]] != cg_v)
1556               vsite_pbc_f[vsi] = -1;
1557           }
1558         }
1559         if (vsite_pbc_f[vsi] == -1) {
1560           /* Check if this is the first processed atom of a vsite only cg */
1561           bViteOnlyCG_and_FirstAtom = TRUE;
1562           for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1563             /* Non-vsites already have pbc set, so simply check for pbc_set */
1564             if (pbc_set[a]) {
1565               bViteOnlyCG_and_FirstAtom = FALSE;
1566               break;
1567             }
1568           }
1569           if (bViteOnlyCG_and_FirstAtom) {
1570             /* First processed atom of a vsite only charge group.
1571              * The pbc of the input coordinates to construct_vsites
1572              * should be preserved.
1573              */
1574             vsite_pbc_f[vsi] = vsite;
1575           } else if (cg_v != a2cg[ia[1+i+1]]) {
1576             /* This vsite has a different charge group index
1577              * than it's first constructing atom
1578              * and the charge group has more than one atom,
1579              * search for the first normal particle
1580              * or vsite that already had its pbc defined.
1581              * If nothing is found, use full pbc for this vsite.
1582              */
1583             for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
1584               if (a != vsite && pbc_set[a]) {
1585                 vsite_pbc_f[vsi] = a;
1586                 if (gmx_debug_at)
1587                   fprintf(debug,"vsite %d match pbc with atom %d\n",
1588                           vsite+1,a+1);
1589                 break;
1590               }
1591             }
1592             if (gmx_debug_at)
1593               fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
1594                       vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
1595                       vsite_pbc_f[vsi]+1);
1596           }
1597         }
1598         if (ftype == F_VSITEN) {
1599           /* The other entries in vsite_pbc_f are not used for center vsites */
1600           i += nc3;
1601         } else {
1602           i += 1+nral;
1603         }
1604
1605         /* This vsite now has its pbc defined */
1606         pbc_set[vsite] = 1;
1607       }
1608     }
1609   }
1610
1611   sfree(pbc_set);
1612
1613   return vsite_pbc;
1614 }
1615
1616 gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr)
1617 {
1618   int nvsite,i;
1619   int *a2cg,cg;
1620   gmx_vsite_t *vsite;
1621   int mt;
1622   gmx_moltype_t *molt;
1623
1624   /* check if there are vsites */
1625   nvsite = 0;
1626   for(i=0; i<F_NRE; i++) {
1627     if (interaction_function[i].flags & IF_VSITE) {
1628       nvsite += gmx_mtop_ftype_count(mtop,i);
1629     }
1630   }
1631
1632   if (nvsite == 0) {
1633     return NULL;
1634   }
1635
1636   snew(vsite,1);
1637
1638   vsite->n_intercg_vsite = count_intercg_vsite(mtop);
1639
1640   if (vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr)) {
1641     vsite->nvsite_pbc_molt = mtop->nmoltype;
1642     snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
1643     for(mt=0; mt<mtop->nmoltype; mt++) {
1644       molt = &mtop->moltype[mt];
1645       /* Make an atom to charge group index */
1646       a2cg = atom2cg(&molt->cgs);
1647       vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
1648                                                 molt->ilist,
1649                                                 molt->atoms.atom,NULL,
1650                                                 &molt->cgs,a2cg);
1651       sfree(a2cg);
1652     }
1653
1654     snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
1655     snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
1656   }
1657
1658
1659   return vsite;
1660 }
1661
1662 void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
1663                    t_commrec *cr)
1664 {
1665   int *a2cg;
1666
1667   /* Make an atom to charge group index */
1668   a2cg = atom2cg(&top->cgs);
1669
1670   if (vsite->n_intercg_vsite > 0) {
1671     vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
1672                                          top->idef.il,NULL,md,
1673                                          &top->cgs,a2cg);
1674
1675     if (PARTDECOMP(cr)) {
1676       snew(vsite->vsitecomm,1);
1677                 vsite->bPDvsitecomm =
1678                 setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
1679     }
1680   }
1681
1682   sfree(a2cg);
1683 }