Merge release-4-5-patches into release-4-6

author Mark Abraham <mark.j.abraham@gmail.com>

Tue, 19 Feb 2013 13:01:57 +0000 (14:01 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 20 Feb 2013 09:35:31 +0000 (10:35 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Tue, 19 Feb 2013 13:01:57 +0000 (14:01 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 20 Feb 2013 09:35:31 +0000 (10:35 +0100)
diff --combined src/gmxlib/copyrite.c

index fe3a2df9fe5c829a9aa8825d9d2e21febaa52c1b,f4d65ab56bfd03c976b8e1496349b6276af65dea..ae32be48e89de743c9856b2f2b65f32fe07474bf
--- 1/src/gmxlib/copyrite.c
--- 2/src/gmxlib/copyrite.c
+++ b/src/gmxlib/copyrite.c
@@@ -1,55 -1,45 +1,55 @@@
   /*
- - * 
- - *                This source code is part of
- - * 
- - *                 G   R   O   M   A   C   S
- - * 
- - *          GROningen MAchine for Chemical Simulations
- - * 
- - *                        VERSION 3.2.0
- - * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * This file is part of the GROMACS molecular simulation package.
+ + *
    * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
    * Copyright (c) 2001-2004, The GROMACS development team,
    * check out http://www.gromacs.org for more information.
- -
- - * This program is free software; you can redistribute it and/or
- - * modify it under the terms of the GNU General Public License
- - * as published by the Free Software Foundation; either version 2
+ + * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ + * David van der Spoel, Berk Hess, Erik Lindahl, and including many
+ + * others, as listed in the AUTHORS file in the top-level source
+ + * directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
    * of the License, or (at your option) any later version.
- - * 
- - * If you want to redistribute modifications, please consider that
- - * scientific software is very special. Version control is crucial -
- - * bugs must be traceable. We will be happy to consider code for
- - * inclusion in the official distribution, but derived work must not
- - * be called official GROMACS. Details are found in the README & COPYING
- - * files - if they are missing, get the official version at www.gromacs.org.
- - * 
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
    * To help us fund GROMACS development, we humbly ask that you cite
- - * the papers on the package - you can find them in the top README file.
- - * 
- - * For more info, check our website at http://www.gromacs.org
- - * 
- - * And Hey:
- - * GROningen Mixture of Alchemy and Childrens' Stories
+ + * the research papers on the package. Check out http://www.gromacs.org.
    */
   #ifdef HAVE_CONFIG_H
   #include <config.h>
   #endif
   
- -#ifdef GMX_THREADS
+ +#ifdef GMX_THREAD_MPI
   #include <thread_mpi.h>
   #endif
   
+ +#ifdef HAVE_LIBMKL
+ +#include <mkl.h>
+ +#endif
+ +#ifdef GMX_FFT_FFTW3
+ +#include <fftw3.h>
+ +#endif
+ +
   /* This file is completely threadsafe - keep it that way! */
   
   #include <string.h>
@@@ -58,610 -48,513 +58,598 @@@
   #include "smalloc.h"
   #include "string2.h"
   #include "macros.h"
- -#include "time.h"
+ +#include <time.h>
   #include "random.h"
   #include "statutil.h"
   #include "copyrite.h"
   #include "strdb.h"
   #include "futil.h"
+ +#include "vec.h"
+ +#include "buildinfo.h"
+ +#include "gmx_cpuid.h"
   
- -static void pr_two(FILE *out,int c,int i)
+ +static void pr_two(FILE *out, int c, int i)
   {
- -  if (i < 10)
- -    fprintf(out,"%c0%1d",c,i);
- -  else
- -    fprintf(out,"%c%2d",c,i);
+ +    if (i < 10)
+ +    {
+ +        fprintf(out, "%c0%1d", c, i);
+ +    }
+ +    else
+ +    {
+ +        fprintf(out, "%c%2d", c, i);
+ +    }
   }
   
- -void pr_difftime(FILE *out,double dt)
+ +void pr_difftime(FILE *out, double dt)
   {
- -  int    ndays,nhours,nmins,nsecs;
- -  gmx_bool   bPrint,bPrinted;
- -
- -  ndays = dt/(24*3600);
- -  dt    = dt-24*3600*ndays;
- -  nhours= dt/3600;
- -  dt    = dt-3600*nhours;
- -  nmins = dt/60;
- -  dt    = dt-nmins*60;
- -  nsecs = dt;
- -  bPrint= (ndays > 0);
- -  bPrinted=bPrint;
- -  if (bPrint) 
- -    fprintf(out,"%d",ndays);
- -  bPrint=bPrint || (nhours > 0);
- -  if (bPrint) {
- -    if (bPrinted)
- -      pr_two(out,'d',nhours);
- -    else 
- -      fprintf(out,"%d",nhours);
- -  }
- -  bPrinted=bPrinted || bPrint;
- -  bPrint=bPrint || (nmins > 0);
- -  if (bPrint) {
+ +    int        ndays, nhours, nmins, nsecs;
+ +    gmx_bool   bPrint, bPrinted;
+ +
+ +    ndays    = dt/(24*3600);
+ +    dt       = dt-24*3600*ndays;
+ +    nhours   = dt/3600;
+ +    dt       = dt-3600*nhours;
+ +    nmins    = dt/60;
+ +    dt       = dt-nmins*60;
+ +    nsecs    = dt;
+ +    bPrint   = (ndays > 0);
+ +    bPrinted = bPrint;
+ +    if (bPrint)
+ +    {
+ +        fprintf(out, "%d", ndays);
+ +    }
+ +    bPrint = bPrint || (nhours > 0);
+ +    if (bPrint)
+ +    {
+ +        if (bPrinted)
+ +        {
+ +            pr_two(out, 'd', nhours);
+ +        }
+ +        else
+ +        {
+ +            fprintf(out, "%d", nhours);
+ +        }
+ +    }
+ +    bPrinted = bPrinted || bPrint;
+ +    bPrint   = bPrint || (nmins > 0);
+ +    if (bPrint)
+ +    {
+ +        if (bPrinted)
+ +        {
+ +            pr_two(out, 'h', nmins);
+ +        }
+ +        else
+ +        {
+ +            fprintf(out, "%d", nmins);
+ +        }
+ +    }
+ +    bPrinted = bPrinted || bPrint;
       if (bPrinted)
- -      pr_two(out,'h',nmins);
- -    else 
- -      fprintf(out,"%d",nmins);
- -  }
- -  bPrinted=bPrinted || bPrint;
- -  if (bPrinted)
- -    pr_two(out,':',nsecs);
- -  else
- -    fprintf(out,"%ds",nsecs);
- -  fprintf(out,"\n");
+ +    {
+ +        pr_two(out, ':', nsecs);
+ +    }
+ +    else
+ +    {
+ +        fprintf(out, "%ds", nsecs);
+ +    }
+ +    fprintf(out, "\n");
   }
   
   
   gmx_bool be_cool(void)
   {
- -  /* Yes, it is bad to check the environment variable every call,
- -   * but we dont call this routine often, and it avoids using 
- -   * a mutex for locking the variable...
- -   */
- -#ifdef GMX_FAHCORE
- -  /*be uncool*/
- -  return FALSE;
+ +    /* Yes, it is bad to check the environment variable every call,
+ +     * but we dont call this routine often, and it avoids using
+ +     * a mutex for locking the variable...
+ +     */
+ +#ifdef GMX_COOL_QUOTES
+ +    return (getenv("GMX_NO_QUOTES") == NULL);
   #else
- -  return (getenv("GMX_NO_QUOTES") == NULL);
+ +    /*be uncool*/
+ +    return FALSE;
   #endif
   }
   
   void space(FILE *out, int n)
   {
- -  fprintf(out,"%*s",n,"");
+ +    fprintf(out, "%*s", n, "");
   }
   
- void f(char *a)
- {
-     int i;
-     int len = strlen(a);
- 
-     for (i = 0; i < len; i++)
-     {
-         a[i] = ~a[i];
-     }
- }
- 
- -static void sp_print(FILE *out,const char *s)
+ +static void sp_print(FILE *out, const char *s)
   {
- -  int slen;
- -  
- -  slen=strlen(s);
- -  space(out,(80-slen)/2);
- -  fprintf(out,"%s\n",s);
+ +    int slen;
+ +
+ +    slen = strlen(s);
+ +    space(out, (80-slen)/2);
+ +    fprintf(out, "%s\n", s);
   }
   
- -static void ster_print(FILE *out,const char *s)
+ +static void ster_print(FILE *out, const char *s)
   {
- -  int  slen;
- -  char buf[128];
- -  
- -  snprintf(buf,128,":-)  %s  (-:",s);
- -  slen=strlen(buf);
- -  space(out,(80-slen)/2);
- -  fprintf(out,"%s\n",buf);
+ +    int  slen;
+ +    char buf[128];
+ +
+ +    snprintf(buf, 128, ":-)  %s  (-:", s);
+ +    slen = strlen(buf);
+ +    space(out, (80-slen)/2);
+ +    fprintf(out, "%s\n", buf);
   }
   
   
- -static void pukeit(const char *db,const char *defstring, char *retstring, 
- -                 int retsize, int *cqnum)
+ +static void pukeit(const char *db, const char *defstring, char *retstring,
+ +                   int retsize, int *cqnum)
   {
- -  FILE *fp;
- -  char **help;
- -  int  i,nhlp;
- -  int  seed;
- - 
- -  if (be_cool() && ((fp = low_libopen(db,FALSE)) != NULL)) {
- -    nhlp=fget_lines(fp,&help);
- -    /* for libraries we can use the low-level close routines */
- -    ffclose(fp);
- -    seed=time(NULL);
- -    *cqnum=nhlp*rando(&seed);
- -    if (strlen(help[*cqnum]) >= STRLEN)
- -      help[*cqnum][STRLEN-1] = '\0';
- -    strncpy(retstring,help[*cqnum],retsize);
- -    for(i=0; (i<nhlp); i++)
- -      sfree(help[i]);
- -    sfree(help);
- -  }
- -  else 
- -    strncpy(retstring,defstring,retsize);
+ +    FILE  *fp;
+ +    char **help;
+ +    int    i, nhlp;
+ +    int    seed;
+ +
+ +    if (be_cool() && ((fp = low_libopen(db, FALSE)) != NULL))
+ +    {
+ +        nhlp = fget_lines(fp, &help);
+ +        /* for libraries we can use the low-level close routines */
+ +        ffclose(fp);
+ +        seed   = time(NULL);
+ +        *cqnum = nhlp*rando(&seed);
+ +        if (strlen(help[*cqnum]) >= STRLEN)
+ +        {
+ +            help[*cqnum][STRLEN-1] = '\0';
+ +        }
+ +        strncpy(retstring, help[*cqnum], retsize);
-         f(retstring);
+ +        for (i = 0; (i < nhlp); i++)
+ +        {
+ +            sfree(help[i]);
+ +        }
+ +        sfree(help);
+ +    }
+ +    else
+ +    {
+ +        strncpy(retstring, defstring, retsize);
+ +    }
   }
   
   void bromacs(char *retstring, int retsize)
   {
- -  int dum;
+ +    int dum;
   
- -  pukeit("bromacs.dat",
- -       "Groningen Machine for Chemical Simulation",
- -       retstring,retsize,&dum);
+ +    pukeit("bromacs.dat",
+ +           "Groningen Machine for Chemical Simulation",
+ +           retstring, retsize, &dum);
   }
   
   void cool_quote(char *retstring, int retsize, int *cqnum)
   {
- -  char *tmpstr;
- -  char *s,*ptr;
- -  int tmpcq,*p;
- -  
- -  if (cqnum!=NULL)
- -    p = cqnum;
- -  else
- -    p = &tmpcq;
- -  
- -  /* protect audience from explicit lyrics */
- -  snew(tmpstr,retsize+1);
- -  pukeit("gurgle.dat","Thanx for Using GROMACS - Have a Nice Day",
- -       tmpstr,retsize-2,p);
- -
- -  if ((ptr = strchr(tmpstr,'_')) != NULL) {
- -    *ptr='\0';
- -    ptr++;
- -    sprintf(retstring,"\"%s\" %s",tmpstr,ptr);
- -  }
- -  else {
- -    strcpy(retstring,tmpstr);
- -  }
- -  sfree(tmpstr);
+ +    char *tmpstr;
+ +    char *s, *ptr;
+ +    int   tmpcq, *p;
+ +
+ +    if (cqnum != NULL)
+ +    {
+ +        p = cqnum;
+ +    }
+ +    else
+ +    {
+ +        p = &tmpcq;
+ +    }
+ +
+ +    /* protect audience from explicit lyrics */
+ +    snew(tmpstr, retsize+1);
+ +    pukeit("gurgle.dat", "Thanx for Using GROMACS - Have a Nice Day",
+ +           tmpstr, retsize-2, p);
+ +
+ +    if ((ptr = strchr(tmpstr, '_')) != NULL)
+ +    {
+ +        *ptr = '\0';
+ +        ptr++;
+ +        sprintf(retstring, "\"%s\" %s", tmpstr, ptr);
+ +    }
+ +    else
+ +    {
+ +        strcpy(retstring, tmpstr);
+ +    }
+ +    sfree(tmpstr);
   }
   
- -void CopyRight(FILE *out,const char *szProgram)
+ +void CopyRight(FILE *out, const char *szProgram)
   {
- -  /* Dont change szProgram arbitrarily - it must be argv[0], i.e. the 
- -   * name of a file. Otherwise, we won't be able to find the library dir.
- -   */
+ +    /* Dont change szProgram arbitrarily - it must be argv[0], i.e. the
+ +     * name of a file. Otherwise, we won't be able to find the library dir.
+ +     */
   #define NCR (int)asize(CopyrightText)
+ +/* TODO: Is this exception still needed? */
   #ifdef GMX_FAHCORE
- -#define NGPL 0 /*FAH has an exception permission from GPL to allow digital signatures in Gromacs*/
+ +#define NLICENSE 0 /*FAH has an exception permission from GPL to allow digital signatures in Gromacs*/
   #else
- -#define NGPL (int)asize(GPLText)
+ +#define NLICENSE (int)asize(LicenseText)
   #endif
   
- -  char buf[256],tmpstr[1024];
- -  int i;
+ +    char buf[256], tmpstr[1024];
+ +    int  i;
   
   #ifdef GMX_FAHCORE
- -  set_program_name("Gromacs");
+ +    set_program_name("Gromacs");
   #else
- -  set_program_name(szProgram);
+ +    set_program_name(szProgram);
   #endif
   
- -  ster_print(out,"G  R  O  M  A  C  S");
- -  fprintf(out,"\n");
- -  
- -  bromacs(tmpstr,1023);
- -  sp_print(out,tmpstr); 
- -  fprintf(out,"\n");
+ +    ster_print(out, "G  R  O  M  A  C  S");
+ +    fprintf(out, "\n");
   
- -  ster_print(out,GromacsVersion());
- -  fprintf(out,"\n");
+ +    bromacs(tmpstr, 1023);
+ +    sp_print(out, tmpstr);
+ +    fprintf(out, "\n");
   
- -  /* fprintf(out,"\n");*/
+ +    ster_print(out, GromacsVersion());
+ +    fprintf(out, "\n");
   
- -  /* sp_print(out,"PLEASE NOTE: THIS IS A BETA VERSION\n");
- -  
- -  fprintf(out,"\n"); */
+ +    /* fprintf(out,"\n");*/
   
- -  for(i=0; (i<NCR); i++) 
- -    sp_print(out,CopyrightText[i]);
- -  for(i=0; (i<NGPL); i++)
- -    sp_print(out,GPLText[i]);
+ +    /* sp_print(out,"PLEASE NOTE: THIS IS A BETA VERSION\n");
   
- -  fprintf(out,"\n");
+ +       fprintf(out,"\n"); */
   
- -  snprintf(buf,256,"%s",Program());
+ +    for (i = 0; (i < NCR); i++)
+ +    {
+ +        sp_print(out, CopyrightText[i]);
+ +    }
+ +    for (i = 0; (i < NLICENSE); i++)
+ +    {
+ +        sp_print(out, LicenseText[i]);
+ +    }
+ +
+ +    fprintf(out, "\n");
+ +
+ +    snprintf(buf, 256, "%s", Program());
   #ifdef GMX_DOUBLE
- -  strcat(buf," (double precision)");
+ +    strcat(buf, " (double precision)");
   #endif
- -  ster_print(out,buf);
- -  fprintf(out,"\n");
+ +    ster_print(out, buf);
+ +    fprintf(out, "\n");
   }
   
   
   void thanx(FILE *fp)
   {
- -  char cq[1024];
- -  int  cqnum;
- -
- -  /* protect the audience from suggestive discussions */
- -  cool_quote(cq,1023,&cqnum);
- -  
- -  if (be_cool()) 
- -    fprintf(fp,"\ngcq#%d: %s\n\n",cqnum,cq);
- -  else
- -    fprintf(fp,"\n%s\n\n",cq);
+ +    char cq[1024];
+ +    int  cqnum;
+ +
+ +    /* protect the audience from suggestive discussions */
+ +    cool_quote(cq, 1023, &cqnum);
+ +
+ +    if (be_cool())
+ +    {
+ +        fprintf(fp, "\ngcq#%d: %s\n\n", cqnum, cq);
+ +    }
+ +    else
+ +    {
+ +        fprintf(fp, "\n%s\n\n", cq);
+ +    }
   }
   
   typedef struct {
- -  const char *key;
- -  const char *author;
- -  const char *title;
- -  const char *journal;
- -  int volume,year;
- -  const char *pages;
+ +    const char *key;
+ +    const char *author;
+ +    const char *title;
+ +    const char *journal;
+ +    int         volume, year;
+ +    const char *pages;
   } t_citerec;
   
- -void please_cite(FILE *fp,const char *key)
+ +void please_cite(FILE *fp, const char *key)
   {
- -  static const t_citerec citedb[] = {
- -    { "Allen1987a",
- -      "M. P. Allen and D. J. Tildesley",
- -      "Computer simulation of liquids",
- -      "Oxford Science Publications",
- -      1, 1987, "1" },
- -    { "Berendsen95a",
- -      "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
- -      "GROMACS: A message-passing parallel molecular dynamics implementation",
- -      "Comp. Phys. Comm.",
- -      91, 1995, "43-56" },
- -    { "Berendsen84a",
- -      "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
- -      "Molecular dynamics with coupling to an external bath",
- -      "J. Chem. Phys.",
- -      81, 1984, "3684-3690" },
- -    { "Ryckaert77a",
- -      "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
- -      "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
- -      "J. Comp. Phys.",
- -      23, 1977, "327-341" },
- -    { "Miyamoto92a",
- -      "S. Miyamoto and P. A. Kollman",
- -      "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
- -      "J. Comp. Chem.",
- -      13, 1992, "952-962" },
- -    { "Cromer1968a",
- -      "D. T. Cromer & J. B. Mann",
- -      "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
- -      "Acta Cryst. A",
- -      24, 1968, "321" },
- -    { "Barth95a",
- -      "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
- -      "Algorithms for Constrained Molecular Dynamics",
- -      "J. Comp. Chem.",
- -      16, 1995, "1192-1209" },
- -    { "Essmann95a",
- -      "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
- -      "A smooth particle mesh Ewald method",
- -      "J. Chem. Phys.",
- -      103, 1995, "8577-8592" },
- -    { "Torda89a",
- -      "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
- -      "Time-dependent distance restraints in molecular dynamics simulations",
- -      "Chem. Phys. Lett.",
- -      157, 1989, "289-294" },
- -    { "Tironi95a",
- -      "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
- -      "Generalized reaction field method for molecular dynamics simulations",
- -      "J. Chem. Phys",
- -      102, 1995, "5451-5459" },
- -    { "Hess97a",
- -      "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
- -      "LINCS: A Linear Constraint Solver for molecular simulations",
- -      "J. Comp. Chem.",
- -      18, 1997, "1463-1472" },
- -    { "Hess2008a",
- -      "B. Hess",
- -      "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
- -      "J. Chem. Theory Comput.",
- -      4, 2008, "116-122" },
- -    { "Hess2008b",
- -      "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
- -      "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
- -      "J. Chem. Theory Comput.",
- -      4, 2008, "435-447" },
- -    { "Hub2010",
- -      "J. S. Hub, B. L. de Groot and D. van der Spoel",
- -      "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
- -      "J. Chem. Theory Comput.",
- -      6, 2010, "3713-3720"}, 
- -    { "In-Chul99a",
- -      "Y. In-Chul and M. L. Berkowitz",
- -      "Ewald summation for systems with slab geometry",
- -      "J. Chem. Phys.",
- -      111, 1999, "3155-3162" },
- -    { "DeGroot97a",
- -      "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
- -      "Prediction of Protein Conformational Freedom From Distance Constrains",
- -      "Proteins",
- -      29, 1997, "240-251" },
- -    { "Spoel98a",
- -      "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
- -      "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
- -      "J. Chem. Phys.",
- -      108, 1998, "10220-10230" },
- -    { "Wishart98a",
- -      "D. S. Wishart and A. M. Nip",
- -      "Protein Chemical Shift Analysis: A Practical Guide",
- -      "Biochem. Cell Biol.",
- -      76, 1998, "153-163" },
- -    { "Maiorov95",
- -      "V. N. Maiorov and G. M. Crippen",
- -      "Size-Independent Comparison of Protein Three-Dimensional Structures",
- -      "PROTEINS: Struct. Funct. Gen.",
- -      22, 1995, "273-283" },
- -    { "Feenstra99",
- -      "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
- -      "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
- -      "J. Comput. Chem.",
- -      20, 1999, "786-798" },
- -    { "Timneanu2004a",
- -      "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
- -      "Auger Electron Cascades in Water and Ice",
- -      "Chem. Phys.",
- -      299, 2004, "277-283" },
- -    { "Pascal2011a",
- -      "T. A. Pascal and S. T. Lin and W. A. Goddard III",
- -      "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
- -      "Phys. Chem. Chem. Phys.",
- -      13, 2011, "169-181" },
- -    { "Caleman2011b",
- -      "C. Caleman and M. Hong and J. S. Hub and L. T. da Costa and P. J. van Maaren and D. van der Spoel",
- -      "Force Field Benchmark 1: Density, Heat of Vaporization, Heat Capacity, Surface Tension and Dielectric Constant of 152 Organic Liquids",
- -      "Submitted",
- -      0, 2011, "" },
- -    { "Lindahl2001a",
- -      "E. Lindahl and B. Hess and D. van der Spoel",
- -      "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
- -      "J. Mol. Mod.",
- -      7, 2001, "306-317" },
- -    { "Wang2001a",
- -      "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
- -      "Solvation model based on weighted solvent accessible surface area",
- -      "J. Phys. Chem. B",
- -      105, 2001, "5055-5067" },
- -    { "Eisenberg86a",
- -      "D. Eisenberg and A. D. McLachlan",
- -      "Solvation energy in protein folding and binding",
- -      "Nature",
- -      319, 1986, "199-203" },
- -    { "Eisenhaber95",
- -      "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
- -      "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
- -      "J. Comp. Chem.",
- -      16, 1995, "273-284" },
- -    { "Hess2002",
- -      "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
- -      "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
- -      "J. Chem. Phys.",
- -      116, 2002, "9602-9610" },
- -    { "Hetenyi2002b",
- -      "Csaba Hetenyi and David van der Spoel",
- -      "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
- -      "Prot. Sci.",
- -      11, 2002, "1729-1737" },
- -    { "Hess2003",
- -      "B. Hess and R.M. Scheek",
- -      "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
- -      "J. Magn. Res.",
- -      164, 2003, "19-27" },
- -    { "Rappe1991a",
- -      "A. K. Rappe and W. A. Goddard III",
- -      "Charge Equillibration for Molecular Dynamics Simulations",
- -      "J. Phys. Chem.",
- -      95, 1991, "3358-3363" },
- -    { "Mu2005a",
- -      "Y. Mu, P. H. Nguyen and G. Stock",
- -      "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
- -      "Prot. Struct. Funct. Bioinf.",
- -      58, 2005, "45-52" },
- -    { "Okabe2001a",
- -      "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
- -      "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
- -      "Chem. Phys. Lett.",
- -      335, 2001, "435-439" },
- -    { "Hukushima96a",
- -      "K. Hukushima and K. Nemoto",
- -      "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
- -      "J. Phys. Soc. Jpn.",
- -      65, 1996, "1604-1608" },
- -    { "Tropp80a",
- -      "J. Tropp",
- -      "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
- -      "J. Chem. Phys.",
- -      72, 1980, "6035-6043" },
- -    { "Bultinck2002a",
- -       "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
- -      "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
- -      "J. Phys. Chem. A",
- -      106, 2002, "7887-7894" },
- -    { "Yang2006b",
- -      "Q. Y. Yang and K. A. Sharp",
- -      "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
- -      "J. Chem. Theory Comput.",
- -      2, 2006, "1152-1167" },
- -    { "Spoel2005a",
- -      "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
- -      "GROMACS: Fast, Flexible and Free",
- -      "J. Comp. Chem.",
- -      26, 2005, "1701-1719" },
- -    { "Spoel2006b",
- -      "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
- -      "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
- -      "J. Phys. Chem. B",
- -      110, 2006, "4393-4398" },
- -    { "Spoel2006d",
- -      "D. van der Spoel and M. M. Seibert",
- -      "Protein folding kinetics and thermodynamics from atomistic simulations",
- -      "Phys. Rev. Letters",
- -      96, 2006, "238102" },
- -    { "Palmer94a",
- -      "B. J. Palmer",
- -      "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
- -      "Phys. Rev. E",
- -      49, 1994, "359-366" },
- -    { "Bussi2007a",
- -      "G. Bussi, D. Donadio and M. Parrinello",
- -      "Canonical sampling through velocity rescaling",
- -      "J. Chem. Phys.",
- -      126, 2007, "014101" },
- -    { "Hub2006",
- -      "J. S. Hub and B. L. de Groot",
- -      "Does CO2 permeate through Aquaporin-1?",
- -      "Biophys. J.",
- -      91, 2006, "842-848" },
- -    { "Hub2008",
- -      "J. S. Hub and B. L. de Groot",
- -      "Mechanism of selectivity in aquaporins and aquaglyceroporins",
- -      "PNAS",
- -      105, 2008, "1198-1203" },
- -    { "Friedrich2009",
- -      "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
- -      "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
- -      "J. Comp. Chem.",
- -      30, 2009, "864-872" },
- -    { "Engin2010",
- -      "O. Engin, A. Villa, M. Sayar and B. Hess",
- -      "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
- -      "J. Phys. Chem. B",
- -      0, 2010, "???" },
- -    { "Wang2010",
- -      "H. Wang, F. Dommert, C.Holm",
- -      "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
- -      "J. Chem. Phys. B",
- -      133, 2010, "034117"
- -    },
- -    { "Sugita1999a",
- -      "Y. Sugita, Y. Okamoto",
- -      "Replica-exchange molecular dynamics method for protein folding",
- -      "Chem. Phys. Lett.",
- -      314, 1999, "141-151" },
- -  };
+ +    static const t_citerec citedb[] = {
+ +        { "Allen1987a",
+ +          "M. P. Allen and D. J. Tildesley",
+ +          "Computer simulation of liquids",
+ +          "Oxford Science Publications",
+ +          1, 1987, "1" },
+ +        { "Berendsen95a",
+ +          "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
+ +          "GROMACS: A message-passing parallel molecular dynamics implementation",
+ +          "Comp. Phys. Comm.",
+ +          91, 1995, "43-56" },
+ +        { "Berendsen84a",
+ +          "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
+ +          "Molecular dynamics with coupling to an external bath",
+ +          "J. Chem. Phys.",
+ +          81, 1984, "3684-3690" },
+ +        { "Ryckaert77a",
+ +          "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
+ +          "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
+ +          "J. Comp. Phys.",
+ +          23, 1977, "327-341" },
+ +        { "Miyamoto92a",
+ +          "S. Miyamoto and P. A. Kollman",
+ +          "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
+ +          "J. Comp. Chem.",
+ +          13, 1992, "952-962" },
+ +        { "Cromer1968a",
+ +          "D. T. Cromer & J. B. Mann",
+ +          "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
+ +          "Acta Cryst. A",
+ +          24, 1968, "321" },
+ +        { "Barth95a",
+ +          "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
+ +          "Algorithms for Constrained Molecular Dynamics",
+ +          "J. Comp. Chem.",
+ +          16, 1995, "1192-1209" },
+ +        { "Essmann95a",
+ +          "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
+ +          "A smooth particle mesh Ewald method",
+ +          "J. Chem. Phys.",
+ +          103, 1995, "8577-8592" },
+ +        { "Torda89a",
+ +          "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
+ +          "Time-dependent distance restraints in molecular dynamics simulations",
+ +          "Chem. Phys. Lett.",
+ +          157, 1989, "289-294" },
+ +        { "Tironi95a",
+ +          "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
+ +          "Generalized reaction field method for molecular dynamics simulations",
+ +          "J. Chem. Phys",
+ +          102, 1995, "5451-5459" },
+ +        { "Hess97a",
+ +          "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
+ +          "LINCS: A Linear Constraint Solver for molecular simulations",
+ +          "J. Comp. Chem.",
+ +          18, 1997, "1463-1472" },
+ +        { "Hess2008a",
+ +          "B. Hess",
+ +          "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
+ +          "J. Chem. Theory Comput.",
+ +          4, 2008, "116-122" },
+ +        { "Hess2008b",
+ +          "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
+ +          "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
+ +          "J. Chem. Theory Comput.",
+ +          4, 2008, "435-447" },
+ +        { "Hub2010",
+ +          "J. S. Hub, B. L. de Groot and D. van der Spoel",
+ +          "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
+ +          "J. Chem. Theory Comput.",
+ +          6, 2010, "3713-3720"},
+ +        { "In-Chul99a",
+ +          "Y. In-Chul and M. L. Berkowitz",
+ +          "Ewald summation for systems with slab geometry",
+ +          "J. Chem. Phys.",
+ +          111, 1999, "3155-3162" },
+ +        { "DeGroot97a",
+ +          "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
+ +          "Prediction of Protein Conformational Freedom From Distance Constrains",
+ +          "Proteins",
+ +          29, 1997, "240-251" },
+ +        { "Spoel98a",
+ +          "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
+ +          "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
+ +          "J. Chem. Phys.",
+ +          108, 1998, "10220-10230" },
+ +        { "Wishart98a",
+ +          "D. S. Wishart and A. M. Nip",
+ +          "Protein Chemical Shift Analysis: A Practical Guide",
+ +          "Biochem. Cell Biol.",
+ +          76, 1998, "153-163" },
+ +        { "Maiorov95",
+ +          "V. N. Maiorov and G. M. Crippen",
+ +          "Size-Independent Comparison of Protein Three-Dimensional Structures",
+ +          "PROTEINS: Struct. Funct. Gen.",
+ +          22, 1995, "273-283" },
+ +        { "Feenstra99",
+ +          "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
+ +          "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
+ +          "J. Comput. Chem.",
+ +          20, 1999, "786-798" },
+ +        { "Timneanu2004a",
+ +          "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
+ +          "Auger Electron Cascades in Water and Ice",
+ +          "Chem. Phys.",
+ +          299, 2004, "277-283" },
+ +        { "Pascal2011a",
+ +          "T. A. Pascal and S. T. Lin and W. A. Goddard III",
+ +          "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
+ +          "Phys. Chem. Chem. Phys.",
+ +          13, 2011, "169-181" },
+ +        { "Caleman2011b",
+ +          "C. Caleman and P. J. van Maaren and M. Hong and J. S. Hub and L. T. da Costa and D. van der Spoel",
+ +          "Force Field Benchmark of Organic Liquids: Density, Enthalpy of Vaporization, Heat Capacities, Surface Tension, Isothermal Compressibility, Volumetric Expansion Coefficient, and Dielectric Constant",
+ +          "J. Chem. Theo. Comp.",
+ +          8, 2012, "61" },
+ +        { "Lindahl2001a",
+ +          "E. Lindahl and B. Hess and D. van der Spoel",
+ +          "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
+ +          "J. Mol. Mod.",
+ +          7, 2001, "306-317" },
+ +        { "Wang2001a",
+ +          "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
+ +          "Solvation model based on weighted solvent accessible surface area",
+ +          "J. Phys. Chem. B",
+ +          105, 2001, "5055-5067" },
+ +        { "Eisenberg86a",
+ +          "D. Eisenberg and A. D. McLachlan",
+ +          "Solvation energy in protein folding and binding",
+ +          "Nature",
+ +          319, 1986, "199-203" },
+ +        { "Eisenhaber95",
+ +          "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
+ +          "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
+ +          "J. Comp. Chem.",
+ +          16, 1995, "273-284" },
+ +        { "Hess2002",
+ +          "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
+ +          "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
+ +          "J. Chem. Phys.",
+ +          116, 2002, "9602-9610" },
+ +        { "Hetenyi2002b",
+ +          "Csaba Hetenyi and David van der Spoel",
+ +          "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
+ +          "Prot. Sci.",
+ +          11, 2002, "1729-1737" },
+ +        { "Hess2003",
+ +          "B. Hess and R.M. Scheek",
+ +          "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
+ +          "J. Magn. Res.",
+ +          164, 2003, "19-27" },
+ +        { "Rappe1991a",
+ +          "A. K. Rappe and W. A. Goddard III",
+ +          "Charge Equillibration for Molecular Dynamics Simulations",
+ +          "J. Phys. Chem.",
+ +          95, 1991, "3358-3363" },
+ +        { "Mu2005a",
+ +          "Y. Mu, P. H. Nguyen and G. Stock",
+ +          "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
+ +          "Prot. Struct. Funct. Bioinf.",
+ +          58, 2005, "45-52" },
+ +        { "Okabe2001a",
+ +          "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
+ +          "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
+ +          "Chem. Phys. Lett.",
+ +          335, 2001, "435-439" },
+ +        { "Hukushima96a",
+ +          "K. Hukushima and K. Nemoto",
+ +          "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
+ +          "J. Phys. Soc. Jpn.",
+ +          65, 1996, "1604-1608" },
+ +        { "Tropp80a",
+ +          "J. Tropp",
+ +          "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
+ +          "J. Chem. Phys.",
+ +          72, 1980, "6035-6043" },
+ +        { "Bultinck2002a",
+ +          "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
+ +          "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
+ +          "J. Phys. Chem. A",
+ +          106, 2002, "7887-7894" },
+ +        { "Yang2006b",
+ +          "Q. Y. Yang and K. A. Sharp",
+ +          "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
+ +          "J. Chem. Theory Comput.",
+ +          2, 2006, "1152-1167" },
+ +        { "Spoel2005a",
+ +          "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
+ +          "GROMACS: Fast, Flexible and Free",
+ +          "J. Comp. Chem.",
+ +          26, 2005, "1701-1719" },
+ +        { "Spoel2006b",
+ +          "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
+ +          "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
+ +          "J. Phys. Chem. B",
+ +          110, 2006, "4393-4398" },
+ +        { "Spoel2006d",
+ +          "D. van der Spoel and M. M. Seibert",
+ +          "Protein folding kinetics and thermodynamics from atomistic simulations",
+ +          "Phys. Rev. Letters",
+ +          96, 2006, "238102" },
+ +        { "Palmer94a",
+ +          "B. J. Palmer",
+ +          "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
+ +          "Phys. Rev. E",
+ +          49, 1994, "359-366" },
+ +        { "Bussi2007a",
+ +          "G. Bussi, D. Donadio and M. Parrinello",
+ +          "Canonical sampling through velocity rescaling",
+ +          "J. Chem. Phys.",
+ +          126, 2007, "014101" },
+ +        { "Hub2006",
+ +          "J. S. Hub and B. L. de Groot",
+ +          "Does CO2 permeate through Aquaporin-1?",
+ +          "Biophys. J.",
+ +          91, 2006, "842-848" },
+ +        { "Hub2008",
+ +          "J. S. Hub and B. L. de Groot",
+ +          "Mechanism of selectivity in aquaporins and aquaglyceroporins",
+ +          "PNAS",
+ +          105, 2008, "1198-1203" },
+ +        { "Friedrich2009",
+ +          "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
+ +          "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
+ +          "J. Comp. Chem.",
+ +          30, 2009, "864-872" },
+ +        { "Engin2010",
+ +          "O. Engin, A. Villa, M. Sayar and B. Hess",
+ +          "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
+ +          "J. Phys. Chem. B",
+ +          114, 2010, "11093" },
+ +        { "Fritsch12",
+ +          "S. Fritsch, C. Junghans and K. Kremer",
+ +          "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs",
+ +          "J. Chem. Theo. Comp.",
+ +          8, 2012, "398" },
+ +        { "Junghans10",
+ +          "C. Junghans and S. Poblete",
+ +          "A reference implementation of the adaptive resolution scheme in ESPResSo",
+ +          "Comp. Phys. Comm.",
+ +          181, 2010, "1449" },
+ +        { "Wang2010",
+ +          "H. Wang, F. Dommert, C.Holm",
+ +          "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
+ +          "J. Chem. Phys. B",
+ +          133, 2010, "034117" },
+ +        { "Sugita1999a",
+ +          "Y. Sugita, Y. Okamoto",
+ +          "Replica-exchange molecular dynamics method for protein folding",
+ +          "Chem. Phys. Lett.",
+ +          314, 1999, "141-151" },
+ +        { "Kutzner2011",
+ +          "C. Kutzner and J. Czub and H. Grubmuller",
+ +          "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS",
+ +          "J. Chem. Theory Comput.",
+ +          7, 2011, "1381-1393" },
+ +        { "Hoefling2011",
+ +          "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller",
+ +          "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach",
+ +          "PLoS ONE",
+ +          6, 2011, "e19791" },
+ +        { "Hockney1988",
+ +          "R. W. Hockney and J. W. Eastwood",
+ +          "Computer simulation using particles",
+ +          "IOP, Bristol",
+ +          1, 1988, "1" },
+ +        { "Ballenegger2012",
+ +          "V. Ballenegger, J.J. Cerda, and C. Holm",
+ +          "How to Convert SPME to P3M: Influence Functions and Error Estimates",
+ +          "J. Chem. Theory Comput.",
+ +          8, 2012, "936-947" },
+ +        { "Garmay2012",
+ +          "Garmay Yu, Shvetsov A, Karelov D, Lebedev D, Radulescu A, Petukhov M, Isaev-Ivanov V",
+ +          "Correlated motion of protein subdomains and large-scale conformational flexibility of RecA protein filament",
+ +          "Journal of Physics: Conference Series",
+ +          340, 2012, "012094" }
+ +    };
   #define NSTR (int)asize(citedb)
- -  
- -  int  j,index;
- -  char *author;
- -  char *title;
+ +
+ +    int   j, index;
+ +    char *author;
+ +    char *title;
   #define LINE_WIDTH 79
- -  
- -  if (fp == NULL)
- -    return;
- -
- -  for(index=0; (index<NSTR) && (strcmp(citedb[index].key,key) != 0); index++)
- -    ;
- -  
- -  fprintf(fp,"\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
- -  if (index < NSTR) {
- -    /* Insert newlines */
- -    author = wrap_lines(citedb[index].author,LINE_WIDTH,0,FALSE);
- -    title  = wrap_lines(citedb[index].title,LINE_WIDTH,0,FALSE);
- -    fprintf(fp,"%s\n%s\n%s %d (%d) pp. %s\n",
- -          author,title,citedb[index].journal,
- -          citedb[index].volume,citedb[index].year,
- -          citedb[index].pages);
- -    sfree(author);
- -    sfree(title);
- -  }
- -  else {
- -    fprintf(fp,"Entry %s not found in citation database\n",key);
- -  }
- -  fprintf(fp,"-------- -------- --- Thank You --- -------- --------\n\n");
- -  fflush(fp);
+ +
+ +    if (fp == NULL)
+ +    {
+ +        return;
+ +    }
+ +
+ +    for (index = 0; (index < NSTR) && (strcmp(citedb[index].key, key) != 0); index++)
+ +    {
+ +        ;
+ +    }
+ +
+ +    fprintf(fp, "\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
+ +    if (index < NSTR)
+ +    {
+ +        /* Insert newlines */
+ +        author = wrap_lines(citedb[index].author, LINE_WIDTH, 0, FALSE);
+ +        title  = wrap_lines(citedb[index].title, LINE_WIDTH, 0, FALSE);
+ +        fprintf(fp, "%s\n%s\n%s %d (%d) pp. %s\n",
+ +                author, title, citedb[index].journal,
+ +                citedb[index].volume, citedb[index].year,
+ +                citedb[index].pages);
+ +        sfree(author);
+ +        sfree(title);
+ +    }
+ +    else
+ +    {
+ +        fprintf(fp, "Entry %s not found in citation database\n", key);
+ +    }
+ +    fprintf(fp, "-------- -------- --- Thank You --- -------- --------\n\n");
+ +    fflush(fp);
   }
   
   #ifdef USE_VERSION_H
@@@ -669,111 -562,58 +657,111 @@@
   #include "version.h"
   #else
   /* Fall back to statically defined version. */
- -static const char _gmx_ver_string[]="VERSION " VERSION;
+ +static const char _gmx_ver_string[] = "VERSION " VERSION;
   #endif
   
- -/* This routine only returns a static (constant) string, so we use a 
- - * mutex to initialize it. Since the string is only written to the
- - * first time, there is no risk with multiple calls overwriting the
- - * output for each other.
- - */
   const char *GromacsVersion()
   {
- -  return _gmx_ver_string;
+ +    return _gmx_ver_string;
   }
   
+ +void gmx_print_version_info_gpu(FILE *fp);
   
   void gmx_print_version_info(FILE *fp)
   {
- -    fprintf(fp, "Version:          %s\n", _gmx_ver_string);
+ +    fprintf(fp, "Gromacs version:    %s\n", _gmx_ver_string);
   #ifdef USE_VERSION_H
- -    fprintf(fp, "GIT SHA1 hash:    %s\n", _gmx_full_git_hash);
+ +    fprintf(fp, "GIT SHA1 hash:      %s\n", _gmx_full_git_hash);
       /* Only print out the branch information if present.
        * The generating script checks whether the branch point actually
        * coincides with the hash reported above, and produces an empty string
        * in such cases. */
       if (_gmx_central_base_hash[0] != 0)
       {
- -        fprintf(fp, "Branched from:    %s\n", _gmx_central_base_hash);
+ +        fprintf(fp, "Branched from:      %s\n", _gmx_central_base_hash);
       }
   #endif
   
   #ifdef GMX_DOUBLE
- -    fprintf(fp, "Precision:        double\n");
+ +    fprintf(fp, "Precision:          double\n");
   #else
- -    fprintf(fp, "Precision:        single\n");
+ +    fprintf(fp, "Precision:          single\n");
   #endif
+ +    fprintf(fp, "Memory model:       %lu bit\n", 8*sizeof(void *));
   
- -#ifdef GMX_THREADS
- -    fprintf(fp, "Parallellization: thread_mpi\n");
+ +#ifdef GMX_THREAD_MPI
+ +    fprintf(fp, "MPI library:        thread_mpi\n");
   #elif defined(GMX_MPI)
- -    fprintf(fp, "Parallellization: MPI\n");
+ +    fprintf(fp, "MPI library:        MPI\n");
   #else
- -    fprintf(fp, "Parallellization: none\n");
+ +    fprintf(fp, "MPI library:        none\n");
   #endif
- -
+ +#ifdef GMX_OPENMP
+ +    fprintf(fp, "OpenMP support:     enabled\n");
+ +#else
+ +    fprintf(fp, "OpenMP support:     disabled\n");
+ +#endif
+ +#ifdef GMX_GPU
+ +    fprintf(fp, "GPU support:        enabled\n");
+ +#else
+ +    fprintf(fp, "GPU support:        disabled\n");
+ +#endif
+ +    /* A preprocessor trick to avoid duplicating logic from vec.h */
+ +#define gmx_stringify2(x) #x
+ +#define gmx_stringify(x) gmx_stringify2(x)
+ +    fprintf(fp, "invsqrt routine:    %s\n", gmx_stringify(gmx_invsqrt(x)));
+ +    fprintf(fp, "CPU acceleration:   %s\n", GMX_CPU_ACCELERATION_STRING);
+ +
+ +    /* TODO: Would be nicer to wrap this in a gmx_fft_version() call, but
+ +     * since that is currently in mdlib, can wait for master. */
   #ifdef GMX_FFT_FFTPACK
- -    fprintf(fp, "FFT Library:      fftpack\n");
- -#elif defined(GMX_FFT_FFTW2)
- -    fprintf(fp, "FFT Library:      fftw2\n");
+ +    fprintf(fp, "FFT library:        fftpack (built-in)\n");
+ +#elif defined(GMX_FFT_FFTW3) && defined(GMX_NATIVE_WINDOWS)
+ +    fprintf(fp, "FFT library:        %s\n", "fftw3");
+ +#elif defined(GMX_FFT_FFTW3) && defined(GMX_DOUBLE)
+ +    fprintf(fp, "FFT library:        %s\n", fftw_version);
   #elif defined(GMX_FFT_FFTW3)
- -    fprintf(fp, "FFT Library:      fftw3\n");
+ +    fprintf(fp, "FFT library:        %s\n", fftwf_version);
   #elif defined(GMX_FFT_MKL)
- -    fprintf(fp, "FFT Library:      MKL\n");
+ +    fprintf(fp, "FFT library:        MKL\n");
   #else
- -    fprintf(fp, "FFT Library:      unknown\n");
+ +    fprintf(fp, "FFT library:        unknown\n");
+ +#endif
+ +#ifdef GMX_LARGEFILES
+ +    fprintf(fp, "Large file support: enabled\n");
+ +#else
+ +    fprintf(fp, "Large file support: disabled\n");
+ +#endif
+ +#ifdef HAVE_RDTSCP
+ +    fprintf(fp, "RDTSCP usage:       enabled\n");
+ +#else
+ +    fprintf(fp, "RDTSCP usage:       disabled\n");
+ +#endif
+ +
+ +    fprintf(fp, "Built on:           %s\n", BUILD_TIME);
+ +    fprintf(fp, "Built by:           %s\n", BUILD_USER);
+ +    fprintf(fp, "Build OS/arch:      %s\n", BUILD_HOST);
+ +    fprintf(fp, "Build CPU vendor:   %s\n", BUILD_CPU_VENDOR);
+ +    fprintf(fp, "Build CPU brand:    %s\n", BUILD_CPU_BRAND);
+ +    fprintf(fp, "Build CPU family:   %d   Model: %d   Stepping: %d\n",
+ +            BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING);
+ +    /* TODO: The below strings can be quite long, so it would be nice to wrap
+ +     * them. Can wait for later, as the master branch has ready code to do all
+ +     * that. */
+ +    fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES);
+ +    fprintf(fp, "C compiler:         %s\n", BUILD_C_COMPILER);
+ +    fprintf(fp, "C compiler flags:   %s\n", BUILD_CFLAGS);
+ +    if (BUILD_CXX_COMPILER[0] != '\0')
+ +    {
+ +        fprintf(fp, "C++ compiler:       %s\n", BUILD_CXX_COMPILER);
+ +        fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS);
+ +    }
+ +#ifdef HAVE_LIBMKL
+ +    /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */
+ +    fprintf(fp, "Linked with Intel MKL version %s.%s.%s.\n",
+ +            __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__);
+ +#endif
+ +#ifdef GMX_GPU
+ +    gmx_print_version_info_gpu(fp);
   #endif
   }
diff --combined src/mdlib/vsite.c

index ab250de74f8784f9f2c0e1668aba64b3d575a28a,2eadddecb1febfdadb17c219822a8863395912c3..a2d541ea251a685bd471587ae747a4972db5c444
--- 1/src/mdlib/vsite.c
--- 2/src/mdlib/vsite.c
+++ b/src/mdlib/vsite.c
@@@ -1,39 -1,37 +1,39 @@@
- -/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ +/*
+ + * This file is part of the GROMACS molecular simulation package.
    *
- - * 
- - *                This source code is part of
- - * 
- - *                 G   R   O   M   A   C   S
- - * 
- - *          GROningen MAchine for Chemical Simulations
- - * 
- - *                        VERSION 3.2.0
- - * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
    * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
    * Copyright (c) 2001-2004, The GROMACS development team,
    * check out http://www.gromacs.org for more information.
- -
- - * This program is free software; you can redistribute it and/or
- - * modify it under the terms of the GNU General Public License
- - * as published by the Free Software Foundation; either version 2
+ + * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ + * David van der Spoel, Berk Hess, Erik Lindahl, and including many
+ + * others, as listed in the AUTHORS file in the top-level source
+ + * directory and at http://www.gromacs.org.
+ + *
+ + * GROMACS is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2.1
    * of the License, or (at your option) any later version.
- - * 
- - * If you want to redistribute modifications, please consider that
- - * scientific software is very special. Version control is crucial -
- - * bugs must be traceable. We will be happy to consider code for
- - * inclusion in the official distribution, but derived work must not
- - * be called official GROMACS. Details are found in the README & COPYING
- - * files - if they are missing, get the official version at www.gromacs.org.
- - * 
+ + *
+ + * GROMACS is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with GROMACS; if not, see
+ + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ + *
+ + * If you want to redistribute modifications to GROMACS, please
+ + * consider that scientific software is very special. Version
+ + * control is crucial - bugs must be traceable. We will be happy to
+ + * consider code for inclusion in the official distribution, but
+ + * derived work must not be called official GROMACS. Details are found
+ + * in the README & COPYING files - if they are missing, get the
+ + * official version at http://www.gromacs.org.
+ + *
    * To help us fund GROMACS development, we humbly ask that you cite
- - * the papers on the package - you can find them in the top README file.
- - * 
- - * For more info, check our website at http://www.gromacs.org
- - * 
- - * And Hey:
- - * GROwing Monsters And Cloning Shrimps
+ + * the research papers on the package. Check out http://www.gromacs.org.
    */
   #ifdef HAVE_CONFIG_H
   #include <config.h>
@@@ -53,1268 -51,1142 +53,1274 @@@
   #include "domdec.h"
   #include "partdec.h"
   #include "mtop_util.h"
+ +#include "gmx_omp_nthreads.h"
+ +#include "gmx_omp.h"
   
   /* Routines to send/recieve coordinates and force
- - * of constructing atoms. 
- - */ 
+ + * of constructing atoms.
+ + */
   
   static void move_construct_x(t_comm_vsites *vsitecomm, rvec x[], t_commrec *cr)
   {
- -      rvec *sendbuf;
- -      rvec *recvbuf;
- -      int i,ia;
- -      
- -      sendbuf = vsitecomm->send_buf;
- -      recvbuf = vsitecomm->recv_buf;
- -      
- -
- -              /* Prepare pulse left by copying to send buffer */
- -              for(i=0;i<vsitecomm->left_export_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->left_export_construct[i];
- -                      copy_rvec(x[ia],sendbuf[i]);
- -              }
- -      
- -              /* Pulse coordinates left */
- -              gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_export_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_import_nconstruct);
- -              
- -              /* Copy from receive buffer to coordinate array */
- -              for(i=0;i<vsitecomm->right_import_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->right_import_construct[i];
- -                      copy_rvec(recvbuf[i],x[ia]);
- -              }
- -
- -              /* Prepare pulse right by copying to send buffer */
- -              for(i=0;i<vsitecomm->right_export_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->right_export_construct[i];
- -                      copy_rvec(x[ia],sendbuf[i]);
- -              }
- -              
- -              /* Pulse coordinates right */
- -              gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_export_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_import_nconstruct);
- -              
- -              /* Copy from receive buffer to coordinate array */
- -              for(i=0;i<vsitecomm->left_import_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->left_import_construct[i];
- -                      copy_rvec(recvbuf[i],x[ia]);
- -              }
+ +    rvec *sendbuf;
+ +    rvec *recvbuf;
+ +    int   i, ia;
+ +
+ +    sendbuf = vsitecomm->send_buf;
+ +    recvbuf = vsitecomm->recv_buf;
+ +
+ +
+ +    /* Prepare pulse left by copying to send buffer */
+ +    for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->left_export_construct[i];
+ +        copy_rvec(x[ia], sendbuf[i]);
+ +    }
+ +
+ +    /* Pulse coordinates left */
+ +    gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_export_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_import_nconstruct);
+ +
+ +    /* Copy from receive buffer to coordinate array */
+ +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->right_import_construct[i];
+ +        copy_rvec(recvbuf[i], x[ia]);
+ +    }
+ +
+ +    /* Prepare pulse right by copying to send buffer */
+ +    for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->right_export_construct[i];
+ +        copy_rvec(x[ia], sendbuf[i]);
+ +    }
+ +
+ +    /* Pulse coordinates right */
+ +    gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_export_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_import_nconstruct);
+ +
+ +    /* Copy from receive buffer to coordinate array */
+ +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->left_import_construct[i];
+ +        copy_rvec(recvbuf[i], x[ia]);
+ +    }
   }
   
   
   static void move_construct_f(t_comm_vsites *vsitecomm, rvec f[], t_commrec *cr)
   {
- -      rvec *sendbuf;
- -      rvec *recvbuf;
- -      int i,ia;
- -
- -      sendbuf = vsitecomm->send_buf;
- -      recvbuf = vsitecomm->recv_buf;  
- -
- -              /* Prepare pulse right by copying to send buffer */
- -              for(i=0;i<vsitecomm->right_import_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->right_import_construct[i];
- -                      copy_rvec(f[ia],sendbuf[i]);
- -                      clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
- -              }
- -              
- -              /* Pulse forces right */
- -              gmx_tx_rx_real(cr,GMX_RIGHT,(real *)sendbuf,3*vsitecomm->right_import_nconstruct,GMX_LEFT,(real *)recvbuf,3*vsitecomm->left_export_nconstruct);
- -              
- -              /* Copy from receive buffer to coordinate array */
- -              for(i=0;i<vsitecomm->left_export_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->left_export_construct[i];
- -                      rvec_inc(f[ia],recvbuf[i]);
- -              }
- -
- -              /* Prepare pulse left by copying to send buffer */
- -              for(i=0;i<vsitecomm->left_import_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->left_import_construct[i];
- -                      copy_rvec(f[ia],sendbuf[i]);
- -                      clear_rvec(f[ia]); /* Zero it here after moving, just to simplify debug book-keeping... */
- -              }
- -              
- -              /* Pulse coordinates left */
- -              gmx_tx_rx_real(cr,GMX_LEFT,(real *)sendbuf,3*vsitecomm->left_import_nconstruct,GMX_RIGHT,(real *)recvbuf,3*vsitecomm->right_export_nconstruct);
- -              
- -              /* Copy from receive buffer to coordinate array */
- -              for(i=0;i<vsitecomm->right_export_nconstruct;i++)
- -              {
- -                      ia = vsitecomm->right_export_construct[i];
- -                      rvec_inc(f[ia],recvbuf[i]);
- -              }
- -              
- -      /* All forces are now on the home processors */
+ +    rvec *sendbuf;
+ +    rvec *recvbuf;
+ +    int   i, ia;
+ +
+ +    sendbuf = vsitecomm->send_buf;
+ +    recvbuf = vsitecomm->recv_buf;
+ +
+ +    /* Prepare pulse right by copying to send buffer */
+ +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->right_import_construct[i];
+ +        copy_rvec(f[ia], sendbuf[i]);
+ +        clear_rvec(f[ia]);     /* Zero it here after moving, just to simplify debug book-keeping... */
+ +    }
+ +
+ +    /* Pulse forces right */
+ +    gmx_tx_rx_real(cr, GMX_RIGHT, (real *)sendbuf, 3*vsitecomm->right_import_nconstruct, GMX_LEFT, (real *)recvbuf, 3*vsitecomm->left_export_nconstruct);
+ +
+ +    /* Copy from receive buffer to coordinate array */
+ +    for (i = 0; i < vsitecomm->left_export_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->left_export_construct[i];
+ +        rvec_inc(f[ia], recvbuf[i]);
+ +    }
+ +
+ +    /* Prepare pulse left by copying to send buffer */
+ +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->left_import_construct[i];
+ +        copy_rvec(f[ia], sendbuf[i]);
+ +        clear_rvec(f[ia]);     /* Zero it here after moving, just to simplify debug book-keeping... */
+ +    }
+ +
+ +    /* Pulse coordinates left */
+ +    gmx_tx_rx_real(cr, GMX_LEFT, (real *)sendbuf, 3*vsitecomm->left_import_nconstruct, GMX_RIGHT, (real *)recvbuf, 3*vsitecomm->right_export_nconstruct);
+ +
+ +    /* Copy from receive buffer to coordinate array */
+ +    for (i = 0; i < vsitecomm->right_export_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->right_export_construct[i];
+ +        rvec_inc(f[ia], recvbuf[i]);
+ +    }
+ +
+ +    /* All forces are now on the home processors */
   }
   
- -      
+ +
   static void
   pd_clear_nonlocal_constructs(t_comm_vsites *vsitecomm, rvec f[])
   {
- -      int i,ia;
- -      
- -      for(i=0;i<vsitecomm->left_import_nconstruct;i++)
- -      {
- -              ia = vsitecomm->left_import_construct[i];
- -              clear_rvec(f[ia]); 
- -      }
- -      for(i=0;i<vsitecomm->right_import_nconstruct;i++)
- -      {
- -              ia = vsitecomm->right_import_construct[i];
- -              clear_rvec(f[ia]); 
- -      }
+ +    int i, ia;
+ +
+ +    for (i = 0; i < vsitecomm->left_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->left_import_construct[i];
+ +        clear_rvec(f[ia]);
+ +    }
+ +    for (i = 0; i < vsitecomm->right_import_nconstruct; i++)
+ +    {
+ +        ia = vsitecomm->right_import_construct[i];
+ +        clear_rvec(f[ia]);
+ +    }
   }
   
   
   
- -static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
+ +static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
   {
- -  if (pbc) {
- -    return pbc_dx_aiuc(pbc,xi,xj,dx);
- -  }
- -  else {
- -    rvec_sub(xi,xj,dx);
- -    return CENTRAL;
- -  }
+ +    if (pbc)
+ +    {
+ +        return pbc_dx_aiuc(pbc, xi, xj, dx);
+ +    }
+ +    else
+ +    {
+ +        rvec_sub(xi, xj, dx);
+ +        return CENTRAL;
+ +    }
   }
   
   /* Vsite construction routines */
   
- -static void constr_vsite2(rvec xi,rvec xj,rvec x,real a,t_pbc *pbc)
+ +static void constr_vsite2(rvec xi, rvec xj, rvec x, real a, t_pbc *pbc)
   {
- -  real b;
- -  rvec dx;
- -
- -  b=1.0-a;
- -  /* 1 flop */
- -  
- -  if (pbc) {
- -    pbc_dx_aiuc(pbc,xj,xi,dx);
- -    x[XX] = xi[XX] + a*dx[XX];
- -    x[YY] = xi[YY] + a*dx[YY];
- -    x[ZZ] = xi[ZZ] + a*dx[ZZ];
- -  } else {
- -    x[XX] = b*xi[XX] + a*xj[XX];
- -    x[YY] = b*xi[YY] + a*xj[YY];
- -    x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
- -    /* 9 Flops */
- -  }
- -  
- -  /* TOTAL: 10 flops */
+ +    real b;
+ +    rvec dx;
+ +
+ +    b = 1.0-a;
+ +    /* 1 flop */
+ +
+ +    if (pbc)
+ +    {
+ +        pbc_dx_aiuc(pbc, xj, xi, dx);
+ +        x[XX] = xi[XX] + a*dx[XX];
+ +        x[YY] = xi[YY] + a*dx[YY];
+ +        x[ZZ] = xi[ZZ] + a*dx[ZZ];
+ +    }
+ +    else
+ +    {
+ +        x[XX] = b*xi[XX] + a*xj[XX];
+ +        x[YY] = b*xi[YY] + a*xj[YY];
+ +        x[ZZ] = b*xi[ZZ] + a*xj[ZZ];
+ +        /* 9 Flops */
+ +    }
+ +
+ +    /* TOTAL: 10 flops */
   }
   
- -static void constr_vsite3(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
- -                        t_pbc *pbc)
+ +static void constr_vsite3(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
+ +                          t_pbc *pbc)
   {
- -  real c;
- -  rvec dxj,dxk;
- -
- -  c=1.0-a-b;
- -  /* 2 flops */
- -  
- -  if (pbc) {
- -    pbc_dx_aiuc(pbc,xj,xi,dxj);
- -    pbc_dx_aiuc(pbc,xk,xi,dxk);
- -    x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
- -    x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
- -    x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
- -  } else {
- -    x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
- -    x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
- -    x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
- -  /* 15 Flops */
- -  }
- -  
- -  /* TOTAL: 17 flops */
+ +    real c;
+ +    rvec dxj, dxk;
+ +
+ +    c = 1.0-a-b;
+ +    /* 2 flops */
+ +
+ +    if (pbc)
+ +    {
+ +        pbc_dx_aiuc(pbc, xj, xi, dxj);
+ +        pbc_dx_aiuc(pbc, xk, xi, dxk);
+ +        x[XX] = xi[XX] + a*dxj[XX] + b*dxk[XX];
+ +        x[YY] = xi[YY] + a*dxj[YY] + b*dxk[YY];
+ +        x[ZZ] = xi[ZZ] + a*dxj[ZZ] + b*dxk[ZZ];
+ +    }
+ +    else
+ +    {
+ +        x[XX] = c*xi[XX] + a*xj[XX] + b*xk[XX];
+ +        x[YY] = c*xi[YY] + a*xj[YY] + b*xk[YY];
+ +        x[ZZ] = c*xi[ZZ] + a*xj[ZZ] + b*xk[ZZ];
+ +        /* 15 Flops */
+ +    }
+ +
+ +    /* TOTAL: 17 flops */
   }
   
- -static void constr_vsite3FD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b,
- -                          t_pbc *pbc)
+ +static void constr_vsite3FD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b,
+ +                            t_pbc *pbc)
   {
- -  rvec xij,xjk,temp;
- -  real c;
- -  
- -  pbc_rvec_sub(pbc,xj,xi,xij);
- -  pbc_rvec_sub(pbc,xk,xj,xjk);
- -  /* 6 flops */
- -
- -  /* temp goes from i to a point on the line jk */  
- -  temp[XX] = xij[XX] + a*xjk[XX];
- -  temp[YY] = xij[YY] + a*xjk[YY];
- -  temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
- -  /* 6 flops */
- -  
- -  c=b*gmx_invsqrt(iprod(temp,temp));
- -  /* 6 + 10 flops */
- -  
- -  x[XX] = xi[XX] + c*temp[XX];
- -  x[YY] = xi[YY] + c*temp[YY];
- -  x[ZZ] = xi[ZZ] + c*temp[ZZ];
- -  /* 6 Flops */
- -  
- -  /* TOTAL: 34 flops */
+ +    rvec xij, xjk, temp;
+ +    real c;
+ +
+ +    pbc_rvec_sub(pbc, xj, xi, xij);
+ +    pbc_rvec_sub(pbc, xk, xj, xjk);
+ +    /* 6 flops */
+ +
+ +    /* temp goes from i to a point on the line jk */
+ +    temp[XX] = xij[XX] + a*xjk[XX];
+ +    temp[YY] = xij[YY] + a*xjk[YY];
+ +    temp[ZZ] = xij[ZZ] + a*xjk[ZZ];
+ +    /* 6 flops */
+ +
+ +    c = b*gmx_invsqrt(iprod(temp, temp));
+ +    /* 6 + 10 flops */
+ +
+ +    x[XX] = xi[XX] + c*temp[XX];
+ +    x[YY] = xi[YY] + c*temp[YY];
+ +    x[ZZ] = xi[ZZ] + c*temp[ZZ];
+ +    /* 6 Flops */
+ +
+ +    /* TOTAL: 34 flops */
   }
   
- -static void constr_vsite3FAD(rvec xi,rvec xj,rvec xk,rvec x,real a,real b, t_pbc *pbc)
+ +static void constr_vsite3FAD(rvec xi, rvec xj, rvec xk, rvec x, real a, real b, t_pbc *pbc)
   {
- -  rvec xij,xjk,xp;
- -  real a1,b1,c1,invdij;
- -  
- -  pbc_rvec_sub(pbc,xj,xi,xij);
- -  pbc_rvec_sub(pbc,xk,xj,xjk);
- -  /* 6 flops */
- -
- -  invdij = gmx_invsqrt(iprod(xij,xij));
- -  c1 = invdij * invdij * iprod(xij,xjk);
- -  xp[XX] = xjk[XX] - c1*xij[XX];
- -  xp[YY] = xjk[YY] - c1*xij[YY];
- -  xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
- -  a1 = a*invdij;
- -  b1 = b*gmx_invsqrt(iprod(xp,xp));
- -  /* 45 */
- -  
- -  x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
- -  x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
- -  x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
- -  /* 12 Flops */
- -  
- -  /* TOTAL: 63 flops */
+ +    rvec xij, xjk, xp;
+ +    real a1, b1, c1, invdij;
+ +
+ +    pbc_rvec_sub(pbc, xj, xi, xij);
+ +    pbc_rvec_sub(pbc, xk, xj, xjk);
+ +    /* 6 flops */
+ +
+ +    invdij = gmx_invsqrt(iprod(xij, xij));
+ +    c1     = invdij * invdij * iprod(xij, xjk);
+ +    xp[XX] = xjk[XX] - c1*xij[XX];
+ +    xp[YY] = xjk[YY] - c1*xij[YY];
+ +    xp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
+ +    a1     = a*invdij;
+ +    b1     = b*gmx_invsqrt(iprod(xp, xp));
+ +    /* 45 */
+ +
+ +    x[XX] = xi[XX] + a1*xij[XX] + b1*xp[XX];
+ +    x[YY] = xi[YY] + a1*xij[YY] + b1*xp[YY];
+ +    x[ZZ] = xi[ZZ] + a1*xij[ZZ] + b1*xp[ZZ];
+ +    /* 12 Flops */
+ +
+ +    /* TOTAL: 63 flops */
   }
   
- -static void constr_vsite3OUT(rvec xi,rvec xj,rvec xk,rvec x,
- -                           real a,real b,real c,t_pbc *pbc)
+ +static void constr_vsite3OUT(rvec xi, rvec xj, rvec xk, rvec x,
+ +                             real a, real b, real c, t_pbc *pbc)
   {
- -  rvec xij,xik,temp;
- -  
- -  pbc_rvec_sub(pbc,xj,xi,xij);
- -  pbc_rvec_sub(pbc,xk,xi,xik);
- -  cprod(xij,xik,temp);
- -  /* 15 Flops */
- -  
- -  x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
- -  x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
- -  x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
- -  /* 18 Flops */
- -  
- -  /* TOTAL: 33 flops */
+ +    rvec xij, xik, temp;
+ +
+ +    pbc_rvec_sub(pbc, xj, xi, xij);
+ +    pbc_rvec_sub(pbc, xk, xi, xik);
+ +    cprod(xij, xik, temp);
+ +    /* 15 Flops */
+ +
+ +    x[XX] = xi[XX] + a*xij[XX] + b*xik[XX] + c*temp[XX];
+ +    x[YY] = xi[YY] + a*xij[YY] + b*xik[YY] + c*temp[YY];
+ +    x[ZZ] = xi[ZZ] + a*xij[ZZ] + b*xik[ZZ] + c*temp[ZZ];
+ +    /* 18 Flops */
+ +
+ +    /* TOTAL: 33 flops */
   }
   
- -static void constr_vsite4FD(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
- -                            real a,real b,real c,t_pbc *pbc)
+ +static void constr_vsite4FD(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
+ +                            real a, real b, real c, t_pbc *pbc)
   {
- -  rvec xij,xjk,xjl,temp;
- -  real d;
- -  
- -  pbc_rvec_sub(pbc,xj,xi,xij);
- -  pbc_rvec_sub(pbc,xk,xj,xjk);
- -  pbc_rvec_sub(pbc,xl,xj,xjl);
- -  /* 9 flops */
- -
- -  /* temp goes from i to a point on the plane jkl */  
- -  temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
- -  temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
- -  temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
- -  /* 12 flops */
- -  
- -  d=c*gmx_invsqrt(iprod(temp,temp));
- -  /* 6 + 10 flops */
- -  
- -  x[XX] = xi[XX] + d*temp[XX];
- -  x[YY] = xi[YY] + d*temp[YY];
- -  x[ZZ] = xi[ZZ] + d*temp[ZZ];
- -  /* 6 Flops */
- -  
- -  /* TOTAL: 43 flops */
+ +    rvec xij, xjk, xjl, temp;
+ +    real d;
+ +
+ +    pbc_rvec_sub(pbc, xj, xi, xij);
+ +    pbc_rvec_sub(pbc, xk, xj, xjk);
+ +    pbc_rvec_sub(pbc, xl, xj, xjl);
+ +    /* 9 flops */
+ +
+ +    /* temp goes from i to a point on the plane jkl */
+ +    temp[XX] = xij[XX] + a*xjk[XX] + b*xjl[XX];
+ +    temp[YY] = xij[YY] + a*xjk[YY] + b*xjl[YY];
+ +    temp[ZZ] = xij[ZZ] + a*xjk[ZZ] + b*xjl[ZZ];
+ +    /* 12 flops */
+ +
+ +    d = c*gmx_invsqrt(iprod(temp, temp));
+ +    /* 6 + 10 flops */
+ +
+ +    x[XX] = xi[XX] + d*temp[XX];
+ +    x[YY] = xi[YY] + d*temp[YY];
+ +    x[ZZ] = xi[ZZ] + d*temp[ZZ];
+ +    /* 6 Flops */
+ +
+ +    /* TOTAL: 43 flops */
   }
   
   
- -static void constr_vsite4FDN(rvec xi,rvec xj,rvec xk,rvec xl,rvec x,
- -                             real a,real b,real c,t_pbc *pbc)
+ +static void constr_vsite4FDN(rvec xi, rvec xj, rvec xk, rvec xl, rvec x,
+ +                             real a, real b, real c, t_pbc *pbc)
   {
- -    rvec xij,xik,xil,ra,rb,rja,rjb,rm;
+ +    rvec xij, xik, xil, ra, rb, rja, rjb, rm;
       real d;
- -    
- -    pbc_rvec_sub(pbc,xj,xi,xij);
- -    pbc_rvec_sub(pbc,xk,xi,xik);
- -    pbc_rvec_sub(pbc,xl,xi,xil);
+ +
+ +    pbc_rvec_sub(pbc, xj, xi, xij);
+ +    pbc_rvec_sub(pbc, xk, xi, xik);
+ +    pbc_rvec_sub(pbc, xl, xi, xil);
       /* 9 flops */
   
       ra[XX] = a*xik[XX];
       ra[YY] = a*xik[YY];
       ra[ZZ] = a*xik[ZZ];
- -    
+ +
       rb[XX] = b*xil[XX];
       rb[YY] = b*xil[YY];
       rb[ZZ] = b*xil[ZZ];
   
       /* 6 flops */
   
- -    rvec_sub(ra,xij,rja);
- -    rvec_sub(rb,xij,rjb);
+ +    rvec_sub(ra, xij, rja);
+ +    rvec_sub(rb, xij, rjb);
       /* 6 flops */
- -    
- -    cprod(rja,rjb,rm);
+ +
+ +    cprod(rja, rjb, rm);
       /* 9 flops */
- -    
- -    d=c*gmx_invsqrt(norm2(rm));
+ +
+ +    d = c*gmx_invsqrt(norm2(rm));
       /* 5+5+1 flops */
- -    
+ +
       x[XX] = xi[XX] + d*rm[XX];
       x[YY] = xi[YY] + d*rm[YY];
       x[ZZ] = xi[ZZ] + d*rm[ZZ];
       /* 6 Flops */
- -    
+ +
       /* TOTAL: 47 flops */
   }
   
   
   static int constr_vsiten(t_iatom *ia, t_iparams ip[],
- -                       rvec *x, t_pbc *pbc)
+ +                         rvec *x, t_pbc *pbc)
   {
- -  rvec xs,x1,dx;
- -  dvec dsum;
- -  int  n3,av,ai,i;
- -  real a;
- -
- -  n3 = 3*ip[ia[0]].vsiten.n;
- -  av = ia[1];
- -  ai = ia[2];
- -  copy_rvec(x[ai],x1);
- -  clear_dvec(dsum);
- -  for(i=3; i<n3; i+=3) {
- -    ai = ia[i+2];
- -    a = ip[ia[i]].vsiten.a;
- -    if (pbc) {
- -      pbc_dx_aiuc(pbc,x[ai],x1,dx);
- -    } else {
- -      rvec_sub(x[ai],x1,dx);
- -    }
- -    dsum[XX] += a*dx[XX];
- -    dsum[YY] += a*dx[YY];
- -    dsum[ZZ] += a*dx[ZZ];
- -    /* 9 Flops */
- -  }
+ +    rvec xs, x1, dx;
+ +    dvec dsum;
+ +    int  n3, av, ai, i;
+ +    real a;
+ +
+ +    n3 = 3*ip[ia[0]].vsiten.n;
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    copy_rvec(x[ai], x1);
+ +    clear_dvec(dsum);
+ +    for (i = 3; i < n3; i += 3)
+ +    {
+ +        ai = ia[i+2];
+ +        a  = ip[ia[i]].vsiten.a;
+ +        if (pbc)
+ +        {
+ +            pbc_dx_aiuc(pbc, x[ai], x1, dx);
+ +        }
+ +        else
+ +        {
+ +            rvec_sub(x[ai], x1, dx);
+ +        }
+ +        dsum[XX] += a*dx[XX];
+ +        dsum[YY] += a*dx[YY];
+ +        dsum[ZZ] += a*dx[ZZ];
+ +        /* 9 Flops */
+ +    }
   
- -  x[av][XX] = x1[XX] + dsum[XX];
- -  x[av][YY] = x1[YY] + dsum[YY];
- -  x[av][ZZ] = x1[ZZ] + dsum[ZZ];
+ +    x[av][XX] = x1[XX] + dsum[XX];
+ +    x[av][YY] = x1[YY] + dsum[YY];
+ +    x[av][ZZ] = x1[ZZ] + dsum[ZZ];
   
- -  return n3;
+ +    return n3;
   }
   
   
- -void construct_vsites(FILE *log,gmx_vsite_t *vsite,
- -                    rvec x[],t_nrnb *nrnb,
- -                    real dt,rvec *v,
- -                    t_iparams ip[],t_ilist ilist[],
- -                    int ePBC,gmx_bool bMolPBC,t_graph *graph,
- -                    t_commrec *cr,matrix box)
+ +void construct_vsites_thread(gmx_vsite_t *vsite,
+ +                             rvec x[], t_nrnb *nrnb,
+ +                             real dt, rvec *v,
+ +                             t_iparams ip[], t_ilist ilist[],
+ +                             t_pbc *pbc_null)
   {
- -  rvec      xpbc,xv,vv,dx;
- -  real      a1,b1,c1,inv_dt;
- -  int       i,inc,ii,nra,nr,tp,ftype;
- -  t_iatom   avsite,ai,aj,ak,al,pbc_atom;
- -  t_iatom   *ia;
- -  t_pbc     pbc,*pbc_null,*pbc_null2;
- -  gmx_bool      bDomDec;
- -  int       *vsite_pbc,ishift;
- -  rvec      reftmp,vtmp,rtmp;
- -      
- -  bDomDec = cr && DOMAINDECOMP(cr);
- -              
- -  /* We only need to do pbc when we have inter-cg vsites */
- -  if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite) {
- -    /* This is wasting some CPU time as we now do this multiple times
- -     * per MD step. But how often do we have vsites with full pbc?
- -     */
- -    pbc_null = set_pbc_dd(&pbc,ePBC,cr!=NULL ? cr->dd : NULL,FALSE,box);
- -  } else {
- -    pbc_null = NULL;
- -  }
- -              
- -  if (cr) {
- -    if (bDomDec) {
- -      dd_move_x_vsites(cr->dd,box,x);
- -    } else if (vsite->bPDvsitecomm) {
- -      /* I'm not sure whether the periodicity and shift are guaranteed
- -       * to be consistent between different nodes when running e.g. polymers
- -       * in parallel. In this special case we thus unshift/shift,
- -       * but only when necessary. This is to make sure the coordinates
- -       * we move don't end up a box away...
- -       */
- -              if (graph)
- -                      unshift_self(graph,box,x);
- -              
- -              move_construct_x(vsite->vsitecomm,x,cr);
- -              
- -              if (graph)
- -                      shift_self(graph,box,x);
- -    }
- -  }
- -
- -  if (v) {
- -    inv_dt = 1.0/dt;
- -  } else {
- -    inv_dt = 1.0;
- -  }
- -
- -  pbc_null2 = NULL;
- -  for(ftype=0; (ftype<F_NRE); ftype++) {
- -    if (interaction_function[ftype].flags & IF_VSITE) {
- -      nra    = interaction_function[ftype].nratoms;
- -      nr     = ilist[ftype].nr;
- -      ia     = ilist[ftype].iatoms;
- -
- -      if (pbc_null) {
- -      vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
- -      } else {
- -      vsite_pbc = NULL;
- -      }
- -              
- -      for(i=0; (i<nr); ) {
- -      tp   = ia[0];
- -      /*
- -      if (ftype != idef->functype[tp]) 
- -        gmx_incons("Function types for vsites wrong");
- -      */
- -      
- -      /* The vsite and constructing atoms */
- -      avsite = ia[1];
- -      ai   = ia[2];
- -
- -      /* Constants for constructing vsites */
- -      a1   = ip[tp].vsite.a;
- -      /* Check what kind of pbc we need to use */
- -      if (vsite_pbc) {
- -        pbc_atom = vsite_pbc[i/(1+nra)];
- -        if (pbc_atom > -2) {
- -          if (pbc_atom >= 0) {
- -            /* We need to copy the coordinates here,
- -             * single for single atom cg's pbc_atom is the vsite itself.
- -             */
- -            copy_rvec(x[pbc_atom],xpbc);
- -          }
- -          pbc_null2 = pbc_null;
- -        } else {
- -          pbc_null2 = NULL;
- -        }
- -      } else {
- -        pbc_atom = -2;
- -      }
- -      /* Copy the old position */
- -      copy_rvec(x[avsite],xv);
- -
- -      /* Construct the vsite depending on type */
- -      inc = nra+1;
- -      switch (ftype) {
- -      case F_VSITE2:
- -        aj = ia[3];
- -        constr_vsite2(x[ai],x[aj],x[avsite],a1,pbc_null2);
- -        break;
- -      case F_VSITE3:
- -        aj = ia[3];
- -        ak = ia[4];
- -        b1 = ip[tp].vsite.b;
- -        constr_vsite3(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
- -        break;
- -      case F_VSITE3FD:
- -        aj = ia[3];
- -        ak = ia[4];
- -        b1 = ip[tp].vsite.b;
- -        constr_vsite3FD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
- -        break;
- -      case F_VSITE3FAD:
- -        aj = ia[3];
- -        ak = ia[4];
- -        b1 = ip[tp].vsite.b;
- -        constr_vsite3FAD(x[ai],x[aj],x[ak],x[avsite],a1,b1,pbc_null2);
- -        break;
- -      case F_VSITE3OUT:
- -        aj = ia[3];
- -        ak = ia[4];
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        constr_vsite3OUT(x[ai],x[aj],x[ak],x[avsite],a1,b1,c1,pbc_null2);
- -        break;
- -      case F_VSITE4FD:
- -        aj = ia[3];
- -        ak = ia[4];
- -        al = ia[5];
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        constr_vsite4FD(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
- -                        pbc_null2);
- -        break;
- -      case F_VSITE4FDN:
- -        aj = ia[3];
- -        ak = ia[4];
- -        al = ia[5];
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        constr_vsite4FDN(x[ai],x[aj],x[ak],x[al],x[avsite],a1,b1,c1,
- -                        pbc_null2);
- -        break;
- -      case F_VSITEN:
- -        inc = constr_vsiten(ia,ip,x,pbc_null2);
- -        break;
- -      default:
- -        gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
- -                    ftype,__FILE__,__LINE__);
- -      }
- -
- -      if (pbc_atom >= 0) {
- -        /* Match the pbc of this vsite to the rest of its charge group */
- -        ishift = pbc_dx_aiuc(pbc_null,x[avsite],xpbc,dx);
- -        if (ishift != CENTRAL)
- -          rvec_add(xpbc,dx,x[avsite]);
- -      }
- -      if (v) {
- -        /* Calculate velocity of vsite... */
- -        rvec_sub(x[avsite],xv,vv);
- -        svmul(inv_dt,vv,v[avsite]);
- -      }
- -
- -      /* Increment loop variables */
- -      i  += inc;
- -      ia += inc;
- -      }
- -    }
- -  }
- -}
+ +    gmx_bool   bPBCAll;
+ +    rvec       xpbc, xv, vv, dx;
+ +    real       a1, b1, c1, inv_dt;
+ +    int        i, inc, ii, nra, nr, tp, ftype;
+ +    t_iatom    avsite, ai, aj, ak, al, pbc_atom;
+ +    t_iatom   *ia;
+ +    t_pbc     *pbc_null2;
+ +    int       *vsite_pbc, ishift;
+ +    rvec       reftmp, vtmp, rtmp;
+ +
+ +    if (v != NULL)
+ +    {
+ +        inv_dt = 1.0/dt;
+ +    }
+ +    else
+ +    {
+ +        inv_dt = 1.0;
+ +    }
   
- -static void spread_vsite2(t_iatom ia[],real a,
- -                          rvec x[],rvec f[],rvec fshift[],
- -                          t_pbc *pbc,t_graph *g)
- -{
- -  rvec    fi,fj,dx;
- -  t_iatom av,ai,aj;
- -  ivec    di;
- -  real    b;
- -  int     siv,sij;
- -  
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  
- -  svmul(1-a,f[av],fi);
- -  svmul(  a,f[av],fj);
- -  /* 7 flop */
- -  
- -  rvec_inc(f[ai],fi);
- -  rvec_inc(f[aj],fj);
- -  /* 6 Flops */
- -
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
- -    siv = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
- -    sij = IVEC2IS(di);
- -  } else if (pbc) {
- -    siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
- -    sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
- -  } else {
- -    siv = CENTRAL;
- -    sij = CENTRAL;
- -  }
- -
- -  if (fshift && (siv != CENTRAL || sij != CENTRAL)) {
- -    rvec_inc(fshift[siv],f[av]);
- -    rvec_dec(fshift[CENTRAL],fi);
- -    rvec_dec(fshift[sij],fj);
- -  }
- -
- -  /* TOTAL: 13 flops */
- -}
+ +    bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
   
- -void construct_vsites_mtop(FILE *log,gmx_vsite_t *vsite,
- -                         gmx_mtop_t *mtop,rvec x[])
- -{
- -  int as,mb,mol;
- -  gmx_molblock_t *molb;
- -  gmx_moltype_t  *molt;
- -
- -  as = 0;
- -  for(mb=0; mb<mtop->nmolblock; mb++) {
- -    molb = &mtop->molblock[mb];
- -    molt = &mtop->moltype[molb->type];          
- -    for(mol=0; mol<molb->nmol; mol++) {
- -      construct_vsites(log,vsite,x+as,NULL,0.0,NULL,
- -                     mtop->ffparams.iparams,molt->ilist,
- -                     epbcNONE,TRUE,NULL,NULL,NULL);
- -      as += molt->atoms.nr;
- -    }
- -  }
- -}
+ +    pbc_null2 = NULL;
+ +    vsite_pbc = NULL;
+ +    for (ftype = 0; (ftype < F_NRE); ftype++)
+ +    {
+ +        if ((interaction_function[ftype].flags & IF_VSITE) &&
+ +            ilist[ftype].nr > 0)
+ +        {
+ +            nra    = interaction_function[ftype].nratoms;
+ +            inc    = 1 + nra;
+ +            nr     = ilist[ftype].nr;
+ +            ia     = ilist[ftype].iatoms;
   
- -static void spread_vsite3(t_iatom ia[],real a,real b,
- -                          rvec x[],rvec f[],rvec fshift[],
- -                          t_pbc *pbc,t_graph *g)
- -{
- -  rvec    fi,fj,fk,dx;
- -  atom_id av,ai,aj,ak;
- -  ivec    di;
- -  int     siv,sij,sik;
- -
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  ak = ia[4];
- -  
- -  svmul(1-a-b,f[av],fi);
- -  svmul(    a,f[av],fj);
- -  svmul(    b,f[av],fk);
- -  /* 11 flops */
- -
- -  rvec_inc(f[ai],fi);
- -  rvec_inc(f[aj],fj);
- -  rvec_inc(f[ak],fk);
- -  /* 9 Flops */
- -  
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ia[1]),di);
- -    siv = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),di);
- -    sij = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),di);
- -    sik = IVEC2IS(di);
- -  } else if (pbc) {
- -    siv = pbc_dx_aiuc(pbc,x[ai],x[av],dx);
- -    sij = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
- -    sik = pbc_dx_aiuc(pbc,x[ai],x[ak],dx);
- -  } else {
- -    siv = CENTRAL;
- -    sij = CENTRAL;
- -    sik = CENTRAL;
- -  }
- -
- -  if (fshift && (siv!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL)) {
- -    rvec_inc(fshift[siv],f[av]);
- -    rvec_dec(fshift[CENTRAL],fi);
- -    rvec_dec(fshift[sij],fj);
- -    rvec_dec(fshift[sik],fk);
- -  }
- -
- -  /* TOTAL: 20 flops */
+ +            if (bPBCAll)
+ +            {
+ +                pbc_null2 = pbc_null;
+ +            }
+ +            else if (pbc_null != NULL)
+ +            {
+ +                vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
+ +            }
+ +
+ +            for (i = 0; i < nr; )
+ +            {
+ +                tp   = ia[0];
+ +
+ +                /* The vsite and constructing atoms */
+ +                avsite = ia[1];
+ +                ai     = ia[2];
-                 aj     = ia[3];
+ +
+ +                /* Constants for constructing vsites */
+ +                a1   = ip[tp].vsite.a;
+ +                /* Check what kind of pbc we need to use */
+ +                if (bPBCAll)
+ +                {
+ +                    /* No charge groups, vsite follows its own pbc */
+ +                    pbc_atom = avsite;
+ +                    copy_rvec(x[avsite], xpbc);
+ +                }
+ +                else if (vsite_pbc != NULL)
+ +                {
+ +                    pbc_atom = vsite_pbc[i/(1+nra)];
+ +                    if (pbc_atom > -2)
+ +                    {
+ +                        if (pbc_atom >= 0)
+ +                        {
+ +                            /* We need to copy the coordinates here,
+ +                             * single for single atom cg's pbc_atom
+ +                             * is the vsite itself.
+ +                             */
+ +                            copy_rvec(x[pbc_atom], xpbc);
+ +                        }
+ +                        pbc_null2 = pbc_null;
+ +                    }
+ +                    else
+ +                    {
+ +                        pbc_null2 = NULL;
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    pbc_atom = -2;
+ +                }
+ +                /* Copy the old position */
+ +                copy_rvec(x[avsite], xv);
+ +
+ +                /* Construct the vsite depending on type */
+ +                switch (ftype)
+ +                {
+ +                    case F_VSITE2:
++                        aj = ia[3];
+ +                        constr_vsite2(x[ai], x[aj], x[avsite], a1, pbc_null2);
+ +                        break;
+ +                    case F_VSITE3:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        b1 = ip[tp].vsite.b;
+ +                        constr_vsite3(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
+ +                        break;
+ +                    case F_VSITE3FD:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        b1 = ip[tp].vsite.b;
+ +                        constr_vsite3FD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
+ +                        break;
+ +                    case F_VSITE3FAD:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        b1 = ip[tp].vsite.b;
+ +                        constr_vsite3FAD(x[ai], x[aj], x[ak], x[avsite], a1, b1, pbc_null2);
+ +                        break;
+ +                    case F_VSITE3OUT:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        constr_vsite3OUT(x[ai], x[aj], x[ak], x[avsite], a1, b1, c1, pbc_null2);
+ +                        break;
+ +                    case F_VSITE4FD:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        al = ia[5];
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        constr_vsite4FD(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
+ +                                        pbc_null2);
+ +                        break;
+ +                    case F_VSITE4FDN:
++                        aj = ia[3];
+ +                        ak = ia[4];
+ +                        al = ia[5];
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        constr_vsite4FDN(x[ai], x[aj], x[ak], x[al], x[avsite], a1, b1, c1,
+ +                                         pbc_null2);
+ +                        break;
+ +                    case F_VSITEN:
+ +                        inc = constr_vsiten(ia, ip, x, pbc_null2);
+ +                        break;
+ +                    default:
+ +                        gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
+ +                                  ftype, __FILE__, __LINE__);
+ +                }
+ +
+ +                if (pbc_atom >= 0)
+ +                {
+ +                    /* Match the pbc of this vsite to the rest of its charge group */
+ +                    ishift = pbc_dx_aiuc(pbc_null, x[avsite], xpbc, dx);
+ +                    if (ishift != CENTRAL)
+ +                    {
+ +                        rvec_add(xpbc, dx, x[avsite]);
+ +                    }
+ +                }
+ +                if (v != NULL)
+ +                {
+ +                    /* Calculate velocity of vsite... */
+ +                    rvec_sub(x[avsite], xv, vv);
+ +                    svmul(inv_dt, vv, v[avsite]);
+ +                }
+ +
+ +                /* Increment loop variables */
+ +                i  += inc;
+ +                ia += inc;
+ +            }
+ +        }
+ +    }
   }
   
- -static void spread_vsite3FD(t_iatom ia[],real a,real b,
- -                            rvec x[],rvec f[],rvec fshift[],
- -                            gmx_bool VirCorr,matrix dxdf,
- -                            t_pbc *pbc,t_graph *g)
+ +void construct_vsites(FILE *log, gmx_vsite_t *vsite,
+ +                      rvec x[], t_nrnb *nrnb,
+ +                      real dt, rvec *v,
+ +                      t_iparams ip[], t_ilist ilist[],
+ +                      int ePBC, gmx_bool bMolPBC, t_graph *graph,
+ +                      t_commrec *cr, matrix box)
   {
- -  real fx,fy,fz,c,invl,fproj,a1;
- -  rvec xvi,xij,xjk,xix,fv,temp;
- -  t_iatom av,ai,aj,ak;
- -  int     svi,sji,skj,d;
- -  ivec    di;
- -
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  ak = ia[4];
- -  copy_rvec(f[av],fv);
- -  
- -  sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
- -  skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
- -  /* 6 flops */
- -
- -  /* xix goes from i to point x on the line jk */  
- -  xix[XX]=xij[XX]+a*xjk[XX];
- -  xix[YY]=xij[YY]+a*xjk[YY];
- -  xix[ZZ]=xij[ZZ]+a*xjk[ZZ];
- -  /* 6 flops */
- -  
- -  invl=gmx_invsqrt(iprod(xix,xix));
- -  c=b*invl;
- -  /* 4 + ?10? flops */
- -  
- -  fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
- -  
- -  temp[XX]=c*(fv[XX]-fproj*xix[XX]);
- -  temp[YY]=c*(fv[YY]-fproj*xix[YY]);
- -  temp[ZZ]=c*(fv[ZZ]-fproj*xix[ZZ]);
- -  /* 16 */
- -  
- -  /* c is already calculated in constr_vsite3FD
- -     storing c somewhere will save 26 flops!     */
- -  
- -  a1=1-a;
- -  f[ai][XX] += fv[XX] - temp[XX];
- -  f[ai][YY] += fv[YY] - temp[YY];
- -  f[ai][ZZ] += fv[ZZ] - temp[ZZ];
- -  f[aj][XX] += a1*temp[XX];
- -  f[aj][YY] += a1*temp[YY];
- -  f[aj][ZZ] += a1*temp[ZZ];
- -  f[ak][XX] += a*temp[XX];
- -  f[ak][YY] += a*temp[YY];
- -  f[ak][ZZ] += a*temp[ZZ];
- -  /* 19 Flops */
- -
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
- -    svi = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
- -    sji = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
- -    skj = IVEC2IS(di);
- -  } else if (pbc) {
- -    svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
- -  } else {
- -    svi = CENTRAL;
- -  }
- -
- -  if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
- -    rvec_dec(fshift[svi],fv);
- -    fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
- -    fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
- -    fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
- -    fshift[    sji][XX] += temp[XX];
- -    fshift[    sji][YY] += temp[YY];
- -    fshift[    sji][ZZ] += temp[ZZ];
- -    fshift[    skj][XX] += a*temp[XX];
- -    fshift[    skj][YY] += a*temp[YY];
- -    fshift[    skj][ZZ] += a*temp[ZZ];
- -  }
+ +    t_pbc     pbc, *pbc_null;
+ +    gmx_bool  bDomDec;
+ +    int       nthreads;
   
- -    if (VirCorr)
+ +    bDomDec = cr && DOMAINDECOMP(cr);
+ +
+ +    /* We only need to do pbc when we have inter-cg vsites */
+ +    if (ePBC != epbcNONE && (bDomDec || bMolPBC) && vsite->n_intercg_vsite)
       {
- -        /* When VirCorr=TRUE, the virial for the current forces is not
- -         * calculated from the redistributed forces. This means that
- -         * the effect of non-linear virtual site constructions on the virial
- -         * needs to be added separately. This contribution can be calculated
- -         * in many ways, but the simplest and cheapest way is to use
- -         * the first constructing atom ai as a reference position in space:
- -         * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
+ +        /* This is wasting some CPU time as we now do this multiple times
+ +         * per MD step. But how often do we have vsites with full pbc?
            */
- -        rvec xiv;
- -        int  i,j;
- -
- -        pbc_rvec_sub(pbc,x[av],x[ai],xiv);
+ +        pbc_null = set_pbc_dd(&pbc, ePBC, cr != NULL ? cr->dd : NULL, FALSE, box);
+ +    }
+ +    else
+ +    {
+ +        pbc_null = NULL;
+ +    }
   
- -        for(i=0; i<DIM; i++)
+ +    if (cr)
+ +    {
+ +        if (bDomDec)
+ +        {
+ +            dd_move_x_vsites(cr->dd, box, x);
+ +        }
+ +        else if (vsite->bPDvsitecomm)
           {
- -            for(j=0; j<DIM; j++)
+ +            /* I'm not sure whether the periodicity and shift are guaranteed
+ +             * to be consistent between different nodes when running e.g. polymers
+ +             * in parallel. In this special case we thus unshift/shift,
+ +             * but only when necessary. This is to make sure the coordinates
+ +             * we move don't end up a box away...
+ +             */
+ +            if (graph != NULL)
               {
- -                /* As xix is a linear combination of j and k, use that here */
- -                dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
+ +                unshift_self(graph, box, x);
+ +            }
+ +
+ +            move_construct_x(vsite->vsitecomm, x, cr);
+ +
+ +            if (graph != NULL)
+ +            {
+ +                shift_self(graph, box, x);
               }
           }
       }
   
- -  /* TOTAL: 61 flops */
+ +    if (vsite->nthreads == 1)
+ +    {
+ +        construct_vsites_thread(vsite,
+ +                                x, nrnb, dt, v,
+ +                                ip, ilist,
+ +                                pbc_null);
+ +    }
+ +    else
+ +    {
+ +#pragma omp parallel num_threads(vsite->nthreads)
+ +        {
+ +            construct_vsites_thread(vsite,
+ +                                    x, nrnb, dt, v,
+ +                                    ip, vsite->tdata[gmx_omp_get_thread_num()].ilist,
+ +                                    pbc_null);
+ +        }
+ +        /* Now we can construct the vsites that might depend on other vsites */
+ +        construct_vsites_thread(vsite,
+ +                                x, nrnb, dt, v,
+ +                                ip, vsite->tdata[vsite->nthreads].ilist,
+ +                                pbc_null);
+ +    }
   }
   
- -static void spread_vsite3FAD(t_iatom ia[],real a,real b,
- -                             rvec x[],rvec f[],rvec fshift[],
- -                             gmx_bool VirCorr,matrix dxdf,
- -                             t_pbc *pbc,t_graph *g)
+ +static void spread_vsite2(t_iatom ia[], real a,
+ +                          rvec x[], rvec f[], rvec fshift[],
+ +                          t_pbc *pbc, t_graph *g)
   {
- -  rvec    xvi,xij,xjk,xperp,Fpij,Fppp,fv,f1,f2,f3;
- -  real    a1,b1,c1,c2,invdij,invdij2,invdp,fproj;
- -  t_iatom av,ai,aj,ak;
- -  int     svi,sji,skj,d;
- -  ivec    di;
- -  
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  ak = ia[4];
- -  copy_rvec(f[ia[1]],fv);
- -
- -  sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
- -  skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
- -  /* 6 flops */
- -  
- -  invdij = gmx_invsqrt(iprod(xij,xij));
- -  invdij2 = invdij * invdij;
- -  c1 = iprod(xij,xjk) * invdij2;
- -  xperp[XX] = xjk[XX] - c1*xij[XX];
- -  xperp[YY] = xjk[YY] - c1*xij[YY];
- -  xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
- -  /* xperp in plane ijk, perp. to ij */
- -  invdp = gmx_invsqrt(iprod(xperp,xperp));
- -  a1 = a*invdij;
- -  b1 = b*invdp;
- -  /* 45 flops */
- -  
- -  /* a1, b1 and c1 are already calculated in constr_vsite3FAD
- -     storing them somewhere will save 45 flops!     */
- -  
- -  fproj=iprod(xij  ,fv)*invdij2;
- -  svmul(fproj,                      xij,  Fpij); /* proj. f on xij */
- -  svmul(iprod(xperp,fv)*invdp*invdp,xperp,Fppp); /* proj. f on xperp */
- -  svmul(b1*fproj,                   xperp,f3);
- -  /* 23 flops */
- -  
- -  rvec_sub(fv,Fpij,f1); /* f1 = f - Fpij */
- -  rvec_sub(f1,Fppp,f2); /* f2 = f - Fpij - Fppp */
- -  for (d=0; (d<DIM); d++) {
- -    f1[d]*=a1;
- -    f2[d]*=b1;
- -  }
- -  /* 12 flops */
- -  
- -  c2=1+c1;
- -  f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
- -  f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
- -  f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
- -  f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
- -  f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
- -  f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
- -  f[ak][XX] +=                      f2[XX];
- -  f[ak][YY] +=                      f2[YY];
- -  f[ak][ZZ] +=                      f2[ZZ];
- -  /* 30 Flops */
- -
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
- -    svi = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
- -    sji = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
- -    skj = IVEC2IS(di);
- -  } else if (pbc) {
- -    svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
- -  } else {
- -    svi = CENTRAL;
- -  }
- -
- -  if (fshift && (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL)) {
- -    rvec_dec(fshift[svi],fv);
- -    fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
- -    fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
- -    fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
- -    fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
- -    fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
- -    fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
- -    fshift[    skj][XX] +=                          f2[XX];
- -    fshift[    skj][YY] +=                          f2[YY];
- -    fshift[    skj][ZZ] +=                          f2[ZZ];
- -  }
+ +    rvec    fi, fj, dx;
+ +    t_iatom av, ai, aj;
+ +    ivec    di;
+ +    real    b;
+ +    int     siv, sij;
   
- -    if (VirCorr)
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +
+ +    svmul(1-a, f[av], fi);
+ +    svmul(  a, f[av], fj);
+ +    /* 7 flop */
+ +
+ +    rvec_inc(f[ai], fi);
+ +    rvec_inc(f[aj], fj);
+ +    /* 6 Flops */
+ +
+ +    if (g)
       {
- -        rvec xiv;
- -        int  i,j;
+ +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
+ +        siv = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
+ +        sij = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
+ +        sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
+ +    }
+ +    else
+ +    {
+ +        siv = CENTRAL;
+ +        sij = CENTRAL;
+ +    }
   
- -        pbc_rvec_sub(pbc,x[av],x[ai],xiv);
+ +    if (fshift && (siv != CENTRAL || sij != CENTRAL))
+ +    {
+ +        rvec_inc(fshift[siv], f[av]);
+ +        rvec_dec(fshift[CENTRAL], fi);
+ +        rvec_dec(fshift[sij], fj);
+ +    }
+ +
+ +    /* TOTAL: 13 flops */
+ +}
   
- -        for(i=0; i<DIM; i++)
+ +void construct_vsites_mtop(FILE *log, gmx_vsite_t *vsite,
+ +                           gmx_mtop_t *mtop, rvec x[])
+ +{
+ +    int             as, mb, mol;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t  *molt;
+ +
+ +    as = 0;
+ +    for (mb = 0; mb < mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +        for (mol = 0; mol < molb->nmol; mol++)
           {
- -            for(j=0; j<DIM; j++)
- -            {
- -                /* Note that xik=xij+xjk, so we have to add xij*f2 */
- -                dxdf[i][j] +=
- -                    - xiv[i]*fv[j]
- -                    + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
- -                    + xjk[i]*f2[j];
- -            }
+ +            construct_vsites(log, vsite, x+as, NULL, 0.0, NULL,
+ +                             mtop->ffparams.iparams, molt->ilist,
+ +                             epbcNONE, TRUE, NULL, NULL, NULL);
+ +            as += molt->atoms.nr;
           }
       }
- -  
- -  /* TOTAL: 113 flops */
   }
   
- -static void spread_vsite3OUT(t_iatom ia[],real a,real b,real c,
- -                             rvec x[],rvec f[],rvec fshift[],
- -                             gmx_bool VirCorr,matrix dxdf,
- -                             t_pbc *pbc,t_graph *g)
+ +static void spread_vsite3(t_iatom ia[], real a, real b,
+ +                          rvec x[], rvec f[], rvec fshift[],
+ +                          t_pbc *pbc, t_graph *g)
   {
- -  rvec    xvi,xij,xik,fv,fj,fk;
- -  real    cfx,cfy,cfz;
- -  atom_id av,ai,aj,ak;
- -  ivec    di;
- -  int     svi,sji,ski;
- -  
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  ak = ia[4];
- -
- -  sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
- -  ski = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
- -  /* 6 Flops */
- -  
- -  copy_rvec(f[av],fv);
- -
- -  cfx = c*fv[XX];
- -  cfy = c*fv[YY];
- -  cfz = c*fv[ZZ];
- -  /* 3 Flops */
- -  
- -  fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
- -  fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
- -  fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
- -  
- -  fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
- -  fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
- -  fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
- -  /* 30 Flops */
- -    
- -  f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
- -  f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
- -  f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
- -  rvec_inc(f[aj],fj);
- -  rvec_inc(f[ak],fk);
- -  /* 15 Flops */
- -
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
- -    svi = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
- -    sji = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
- -    ski = IVEC2IS(di);
- -  } else if (pbc) {
- -    svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
- -  } else {
- -    svi = CENTRAL;
- -  }
- -
- -  if (fshift && (svi!=CENTRAL || sji!=CENTRAL || ski!=CENTRAL)) {
- -    rvec_dec(fshift[svi],fv);
- -    fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
- -    fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
- -    fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
- -    rvec_inc(fshift[sji],fj);
- -    rvec_inc(fshift[ski],fk);
- -  }
+ +    rvec    fi, fj, fk, dx;
+ +    atom_id av, ai, aj, ak;
+ +    ivec    di;
+ +    int     siv, sij, sik;
   
- -    if (VirCorr)
- -    {
- -        rvec xiv;
- -        int  i,j;
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +    ak = ia[4];
   
- -        pbc_rvec_sub(pbc,x[av],x[ai],xiv);
+ +    svmul(1-a-b, f[av], fi);
+ +    svmul(    a, f[av], fj);
+ +    svmul(    b, f[av], fk);
+ +    /* 11 flops */
   
- -        for(i=0; i<DIM; i++)
- -        {
- -            for(j=0; j<DIM; j++)
- -            {
- -                dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
- -            }
- -        }
+ +    rvec_inc(f[ai], fi);
+ +    rvec_inc(f[aj], fj);
+ +    rvec_inc(f[ak], fk);
+ +    /* 9 Flops */
+ +
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ia[1]), di);
+ +        siv = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), di);
+ +        sij = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), di);
+ +        sik = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        siv = pbc_dx_aiuc(pbc, x[ai], x[av], dx);
+ +        sij = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
+ +        sik = pbc_dx_aiuc(pbc, x[ai], x[ak], dx);
       }
- -  
- -  /* TOTAL: 54 flops */
+ +    else
+ +    {
+ +        siv = CENTRAL;
+ +        sij = CENTRAL;
+ +        sik = CENTRAL;
+ +    }
+ +
+ +    if (fshift && (siv != CENTRAL || sij != CENTRAL || sik != CENTRAL))
+ +    {
+ +        rvec_inc(fshift[siv], f[av]);
+ +        rvec_dec(fshift[CENTRAL], fi);
+ +        rvec_dec(fshift[sij], fj);
+ +        rvec_dec(fshift[sik], fk);
+ +    }
+ +
+ +    /* TOTAL: 20 flops */
   }
   
- -static void spread_vsite4FD(t_iatom ia[],real a,real b,real c,
- -                            rvec x[],rvec f[],rvec fshift[],
- -                            gmx_bool VirCorr,matrix dxdf,
- -                            t_pbc *pbc,t_graph *g)
+ +static void spread_vsite3FD(t_iatom ia[], real a, real b,
+ +                            rvec x[], rvec f[], rvec fshift[],
+ +                            gmx_bool VirCorr, matrix dxdf,
+ +                            t_pbc *pbc, t_graph *g)
   {
- -  real    d,invl,fproj,a1;
- -  rvec    xvi,xij,xjk,xjl,xix,fv,temp;
- -  atom_id av,ai,aj,ak,al;
- -  ivec    di;
- -  int     svi,sji,skj,slj,m;
- -
- -  av = ia[1];
- -  ai = ia[2];
- -  aj = ia[3];
- -  ak = ia[4];
- -  al = ia[5];
- - 
- -  sji = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
- -  skj = pbc_rvec_sub(pbc,x[ak],x[aj],xjk);
- -  slj = pbc_rvec_sub(pbc,x[al],x[aj],xjl);
- -  /* 9 flops */
- -  
- -  /* xix goes from i to point x on the plane jkl */  
- -  for(m=0; m<DIM; m++)
- -    xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
- -  /* 12 flops */
- -  
- -  invl=gmx_invsqrt(iprod(xix,xix));
- -  d=c*invl;
- -  /* 4 + ?10? flops */
- -
- -  copy_rvec(f[av],fv);
- -
- -  fproj=iprod(xix,fv)*invl*invl; /* = (xix . f)/(xix . xix) */
- -
- -  for(m=0; m<DIM; m++)
- -    temp[m] = d*(fv[m] - fproj*xix[m]);
- -  /* 16 */
- -  
- -  /* c is already calculated in constr_vsite3FD
- -     storing c somewhere will save 35 flops!     */
- -  
- -  a1 = 1 - a - b;
- -  for(m=0; m<DIM; m++) {
- -    f[ai][m] += fv[m] - temp[m];
- -    f[aj][m] += a1*temp[m];
- -    f[ak][m] += a*temp[m];
- -    f[al][m] += b*temp[m];
- -  }
- -  /* 26 Flops */
- -  
- -  if (g) {
- -    ivec_sub(SHIFT_IVEC(g,ia[1]),SHIFT_IVEC(g,ai),di);
- -    svi = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
- -    sji = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,aj),di);
- -    skj = IVEC2IS(di);
- -    ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,aj),di);
- -    slj = IVEC2IS(di);
- -  } else if (pbc) {
- -    svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
- -  } else {
- -    svi = CENTRAL;
- -  }
- -
- -  if (fshift &&
- -      (svi!=CENTRAL || sji!=CENTRAL || skj!=CENTRAL || slj!=CENTRAL)) {
- -    rvec_dec(fshift[svi],fv);
- -    for(m=0; m<DIM; m++) {
- -      fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
- -      fshift[    sji][m] += temp[m];
- -      fshift[    skj][m] += a*temp[m];
- -      fshift[    slj][m] += b*temp[m];
- -    }
- -  }
+ +    real    fx, fy, fz, c, invl, fproj, a1;
+ +    rvec    xvi, xij, xjk, xix, fv, temp;
+ +    t_iatom av, ai, aj, ak;
+ +    int     svi, sji, skj, d;
+ +    ivec    di;
+ +
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +    ak = ia[4];
+ +    copy_rvec(f[av], fv);
+ +
+ +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
+ +    /* 6 flops */
+ +
+ +    /* xix goes from i to point x on the line jk */
+ +    xix[XX] = xij[XX]+a*xjk[XX];
+ +    xix[YY] = xij[YY]+a*xjk[YY];
+ +    xix[ZZ] = xij[ZZ]+a*xjk[ZZ];
+ +    /* 6 flops */
+ +
+ +    invl = gmx_invsqrt(iprod(xix, xix));
+ +    c    = b*invl;
+ +    /* 4 + ?10? flops */
+ +
+ +    fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
+ +
+ +    temp[XX] = c*(fv[XX]-fproj*xix[XX]);
+ +    temp[YY] = c*(fv[YY]-fproj*xix[YY]);
+ +    temp[ZZ] = c*(fv[ZZ]-fproj*xix[ZZ]);
+ +    /* 16 */
+ +
+ +    /* c is already calculated in constr_vsite3FD
+ +       storing c somewhere will save 26 flops!     */
+ +
+ +    a1         = 1-a;
+ +    f[ai][XX] += fv[XX] - temp[XX];
+ +    f[ai][YY] += fv[YY] - temp[YY];
+ +    f[ai][ZZ] += fv[ZZ] - temp[ZZ];
+ +    f[aj][XX] += a1*temp[XX];
+ +    f[aj][YY] += a1*temp[YY];
+ +    f[aj][ZZ] += a1*temp[ZZ];
+ +    f[ak][XX] += a*temp[XX];
+ +    f[ak][YY] += a*temp[YY];
+ +    f[ak][ZZ] += a*temp[ZZ];
+ +    /* 19 Flops */
+ +
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
+ +        svi = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
+ +        sji = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
+ +        skj = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ +    }
+ +    else
+ +    {
+ +        svi = CENTRAL;
+ +    }
+ +
+ +    if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
+ +    {
+ +        rvec_dec(fshift[svi], fv);
+ +        fshift[CENTRAL][XX] += fv[XX] - (1 + a)*temp[XX];
+ +        fshift[CENTRAL][YY] += fv[YY] - (1 + a)*temp[YY];
+ +        fshift[CENTRAL][ZZ] += fv[ZZ] - (1 + a)*temp[ZZ];
+ +        fshift[    sji][XX] += temp[XX];
+ +        fshift[    sji][YY] += temp[YY];
+ +        fshift[    sji][ZZ] += temp[ZZ];
+ +        fshift[    skj][XX] += a*temp[XX];
+ +        fshift[    skj][YY] += a*temp[YY];
+ +        fshift[    skj][ZZ] += a*temp[ZZ];
+ +    }
   
       if (VirCorr)
       {
+ +        /* When VirCorr=TRUE, the virial for the current forces is not
+ +         * calculated from the redistributed forces. This means that
+ +         * the effect of non-linear virtual site constructions on the virial
+ +         * needs to be added separately. This contribution can be calculated
+ +         * in many ways, but the simplest and cheapest way is to use
+ +         * the first constructing atom ai as a reference position in space:
+ +         * subtract (xv-xi)*fv and add (xj-xi)*fj + (xk-xi)*fk.
+ +         */
           rvec xiv;
- -        int  i,j;
+ +        int  i, j;
   
- -        pbc_rvec_sub(pbc,x[av],x[ai],xiv);
+ +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
   
- -        for(i=0; i<DIM; i++)
+ +        for (i = 0; i < DIM; i++)
           {
- -            for(j=0; j<DIM; j++)
+ +            for (j = 0; j < DIM; j++)
               {
+ +                /* As xix is a linear combination of j and k, use that here */
                   dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
               }
           }
       }
   
- -  /* TOTAL: 77 flops */
+ +    /* TOTAL: 61 flops */
   }
   
- -
- -static void spread_vsite4FDN(t_iatom ia[],real a,real b,real c,
- -                             rvec x[],rvec f[],rvec fshift[],
- -                             gmx_bool VirCorr,matrix dxdf,
- -                             t_pbc *pbc,t_graph *g)
+ +static void spread_vsite3FAD(t_iatom ia[], real a, real b,
+ +                             rvec x[], rvec f[], rvec fshift[],
+ +                             gmx_bool VirCorr, matrix dxdf,
+ +                             t_pbc *pbc, t_graph *g)
   {
- -    rvec xvi,xij,xik,xil,ra,rb,rja,rjb,rab,rm,rt;
- -    rvec fv,fj,fk,fl;
- -    real invrm,denom;
- -    real cfx,cfy,cfz;
- -    ivec di;
- -    int  av,ai,aj,ak,al;
- -    int  svi,sij,sik,sil;
+ +    rvec    xvi, xij, xjk, xperp, Fpij, Fppp, fv, f1, f2, f3;
+ +    real    a1, b1, c1, c2, invdij, invdij2, invdp, fproj;
+ +    t_iatom av, ai, aj, ak;
+ +    int     svi, sji, skj, d;
+ +    ivec    di;
   
- -    /* DEBUG: check atom indices */
       av = ia[1];
       ai = ia[2];
       aj = ia[3];
       ak = ia[4];
- -    al = ia[5];
+ +    copy_rvec(f[ia[1]], fv);
   
- -    copy_rvec(f[av],fv);
- -    
- -    sij = pbc_rvec_sub(pbc,x[aj],x[ai],xij);
- -    sik = pbc_rvec_sub(pbc,x[ak],x[ai],xik);
- -    sil = pbc_rvec_sub(pbc,x[al],x[ai],xil);
- -    /* 9 flops */
- -    
- -    ra[XX] = a*xik[XX];
- -    ra[YY] = a*xik[YY];
- -    ra[ZZ] = a*xik[ZZ];
- -    
- -    rb[XX] = b*xil[XX];
- -    rb[YY] = b*xil[YY];
- -    rb[ZZ] = b*xil[ZZ];
- -    
+ +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
       /* 6 flops */
- -    
- -    rvec_sub(ra,xij,rja);
- -    rvec_sub(rb,xij,rjb);
- -    rvec_sub(rb,ra,rab);
- -    /* 9 flops */
- -    
- -    cprod(rja,rjb,rm);
- -    /* 9 flops */
- -
- -    invrm=gmx_invsqrt(norm2(rm));
- -    denom=invrm*invrm;
- -    /* 5+5+2 flops */
- -    
- -    cfx = c*invrm*fv[XX];
- -    cfy = c*invrm*fv[YY];
- -    cfz = c*invrm*fv[ZZ];
- -    /* 6 Flops */
- -    
- -    cprod(rm,rab,rt);
- -    /* 9 flops */
+ +
+ +    invdij    = gmx_invsqrt(iprod(xij, xij));
+ +    invdij2   = invdij * invdij;
+ +    c1        = iprod(xij, xjk) * invdij2;
+ +    xperp[XX] = xjk[XX] - c1*xij[XX];
+ +    xperp[YY] = xjk[YY] - c1*xij[YY];
+ +    xperp[ZZ] = xjk[ZZ] - c1*xij[ZZ];
+ +    /* xperp in plane ijk, perp. to ij */
+ +    invdp = gmx_invsqrt(iprod(xperp, xperp));
+ +    a1    = a*invdij;
+ +    b1    = b*invdp;
+ +    /* 45 flops */
+ +
+ +    /* a1, b1 and c1 are already calculated in constr_vsite3FAD
+ +       storing them somewhere will save 45 flops!     */
+ +
+ +    fproj = iprod(xij, fv)*invdij2;
+ +    svmul(fproj,                      xij,  Fpij);    /* proj. f on xij */
+ +    svmul(iprod(xperp, fv)*invdp*invdp, xperp, Fppp); /* proj. f on xperp */
+ +    svmul(b1*fproj,                   xperp, f3);
+ +    /* 23 flops */
+ +
+ +    rvec_sub(fv, Fpij, f1); /* f1 = f - Fpij */
+ +    rvec_sub(f1, Fppp, f2); /* f2 = f - Fpij - Fppp */
+ +    for (d = 0; (d < DIM); d++)
+ +    {
+ +        f1[d] *= a1;
+ +        f2[d] *= b1;
+ +    }
+ +    /* 12 flops */
+ +
+ +    c2         = 1+c1;
+ +    f[ai][XX] += fv[XX] - f1[XX] + c1*f2[XX] + f3[XX];
+ +    f[ai][YY] += fv[YY] - f1[YY] + c1*f2[YY] + f3[YY];
+ +    f[ai][ZZ] += fv[ZZ] - f1[ZZ] + c1*f2[ZZ] + f3[ZZ];
+ +    f[aj][XX] +=          f1[XX] - c2*f2[XX] - f3[XX];
+ +    f[aj][YY] +=          f1[YY] - c2*f2[YY] - f3[YY];
+ +    f[aj][ZZ] +=          f1[ZZ] - c2*f2[ZZ] - f3[ZZ];
+ +    f[ak][XX] +=                      f2[XX];
+ +    f[ak][YY] +=                      f2[YY];
+ +    f[ak][ZZ] +=                      f2[ZZ];
+ +    /* 30 Flops */
+ +
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
+ +        svi = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
+ +        sji = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
+ +        skj = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ +    }
+ +    else
+ +    {
+ +        svi = CENTRAL;
+ +    }
+ +
+ +    if (fshift && (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL))
+ +    {
+ +        rvec_dec(fshift[svi], fv);
+ +        fshift[CENTRAL][XX] += fv[XX] - f1[XX] - (1-c1)*f2[XX] + f3[XX];
+ +        fshift[CENTRAL][YY] += fv[YY] - f1[YY] - (1-c1)*f2[YY] + f3[YY];
+ +        fshift[CENTRAL][ZZ] += fv[ZZ] - f1[ZZ] - (1-c1)*f2[ZZ] + f3[ZZ];
+ +        fshift[    sji][XX] +=          f1[XX] -    c1 *f2[XX] - f3[XX];
+ +        fshift[    sji][YY] +=          f1[YY] -    c1 *f2[YY] - f3[YY];
+ +        fshift[    sji][ZZ] +=          f1[ZZ] -    c1 *f2[ZZ] - f3[ZZ];
+ +        fshift[    skj][XX] +=                          f2[XX];
+ +        fshift[    skj][YY] +=                          f2[YY];
+ +        fshift[    skj][ZZ] +=                          f2[ZZ];
+ +    }
+ +
+ +    if (VirCorr)
+ +    {
+ +        rvec xiv;
+ +        int  i, j;
+ +
+ +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
+ +
+ +        for (i = 0; i < DIM; i++)
+ +        {
+ +            for (j = 0; j < DIM; j++)
+ +            {
+ +                /* Note that xik=xij+xjk, so we have to add xij*f2 */
+ +                dxdf[i][j] +=
+ +                    -xiv[i]*fv[j]
+ +                    + xij[i]*(f1[j] + (1 - c2)*f2[j] - f3[j])
+ +                    + xjk[i]*f2[j];
+ +            }
+ +        }
+ +    }
+ +
+ +    /* TOTAL: 113 flops */
+ +}
+ +
+ +static void spread_vsite3OUT(t_iatom ia[], real a, real b, real c,
+ +                             rvec x[], rvec f[], rvec fshift[],
+ +                             gmx_bool VirCorr, matrix dxdf,
+ +                             t_pbc *pbc, t_graph *g)
+ +{
+ +    rvec    xvi, xij, xik, fv, fj, fk;
+ +    real    cfx, cfy, cfz;
+ +    atom_id av, ai, aj, ak;
+ +    ivec    di;
+ +    int     svi, sji, ski;
+ +
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +    ak = ia[4];
+ +
+ +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ +    ski = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
+ +    /* 6 Flops */
+ +
+ +    copy_rvec(f[av], fv);
+ +
+ +    cfx = c*fv[XX];
+ +    cfy = c*fv[YY];
+ +    cfz = c*fv[ZZ];
+ +    /* 3 Flops */
+ +
+ +    fj[XX] = a*fv[XX]     -  xik[ZZ]*cfy +  xik[YY]*cfz;
+ +    fj[YY] =  xik[ZZ]*cfx + a*fv[YY]     -  xik[XX]*cfz;
+ +    fj[ZZ] = -xik[YY]*cfx +  xik[XX]*cfy + a*fv[ZZ];
+ +
+ +    fk[XX] = b*fv[XX]     +  xij[ZZ]*cfy -  xij[YY]*cfz;
+ +    fk[YY] = -xij[ZZ]*cfx + b*fv[YY]     +  xij[XX]*cfz;
+ +    fk[ZZ] =  xij[YY]*cfx -  xij[XX]*cfy + b*fv[ZZ];
+ +    /* 30 Flops */
+ +
+ +    f[ai][XX] += fv[XX] - fj[XX] - fk[XX];
+ +    f[ai][YY] += fv[YY] - fj[YY] - fk[YY];
+ +    f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
+ +    rvec_inc(f[aj], fj);
+ +    rvec_inc(f[ak], fk);
+ +    /* 15 Flops */
+ +
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
+ +        svi = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
+ +        sji = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
+ +        ski = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ +    }
+ +    else
+ +    {
+ +        svi = CENTRAL;
+ +    }
+ +
+ +    if (fshift && (svi != CENTRAL || sji != CENTRAL || ski != CENTRAL))
+ +    {
+ +        rvec_dec(fshift[svi], fv);
+ +        fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX];
+ +        fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY];
+ +        fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ];
+ +        rvec_inc(fshift[sji], fj);
+ +        rvec_inc(fshift[ski], fk);
+ +    }
+ +
+ +    if (VirCorr)
+ +    {
+ +        rvec xiv;
+ +        int  i, j;
+ +
+ +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
+ +
+ +        for (i = 0; i < DIM; i++)
+ +        {
+ +            for (j = 0; j < DIM; j++)
+ +            {
+ +                dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j];
+ +            }
+ +        }
+ +    }
+ +
+ +    /* TOTAL: 54 flops */
+ +}
+ +
+ +static void spread_vsite4FD(t_iatom ia[], real a, real b, real c,
+ +                            rvec x[], rvec f[], rvec fshift[],
+ +                            gmx_bool VirCorr, matrix dxdf,
+ +                            t_pbc *pbc, t_graph *g)
+ +{
+ +    real    d, invl, fproj, a1;
+ +    rvec    xvi, xij, xjk, xjl, xix, fv, temp;
+ +    atom_id av, ai, aj, ak, al;
+ +    ivec    di;
+ +    int     svi, sji, skj, slj, m;
+ +
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +    ak = ia[4];
+ +    al = ia[5];
+ +
+ +    sji = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ +    skj = pbc_rvec_sub(pbc, x[ak], x[aj], xjk);
+ +    slj = pbc_rvec_sub(pbc, x[al], x[aj], xjl);
+ +    /* 9 flops */
+ +
+ +    /* xix goes from i to point x on the plane jkl */
+ +    for (m = 0; m < DIM; m++)
+ +    {
+ +        xix[m] = xij[m] + a*xjk[m] + b*xjl[m];
+ +    }
+ +    /* 12 flops */
+ +
+ +    invl = gmx_invsqrt(iprod(xix, xix));
+ +    d    = c*invl;
+ +    /* 4 + ?10? flops */
+ +
+ +    copy_rvec(f[av], fv);
+ +
+ +    fproj = iprod(xix, fv)*invl*invl; /* = (xix . f)/(xix . xix) */
+ +
+ +    for (m = 0; m < DIM; m++)
+ +    {
+ +        temp[m] = d*(fv[m] - fproj*xix[m]);
+ +    }
+ +    /* 16 */
+ +
+ +    /* c is already calculated in constr_vsite3FD
+ +       storing c somewhere will save 35 flops!     */
+ +
+ +    a1 = 1 - a - b;
+ +    for (m = 0; m < DIM; m++)
+ +    {
+ +        f[ai][m] += fv[m] - temp[m];
+ +        f[aj][m] += a1*temp[m];
+ +        f[ak][m] += a*temp[m];
+ +        f[al][m] += b*temp[m];
+ +    }
+ +    /* 26 Flops */
+ +
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, ia[1]), SHIFT_IVEC(g, ai), di);
+ +        svi = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
+ +        sji = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, aj), di);
+ +        skj = IVEC2IS(di);
+ +        ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, aj), di);
+ +        slj = IVEC2IS(di);
+ +    }
+ +    else if (pbc)
+ +    {
+ +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ +    }
+ +    else
+ +    {
+ +        svi = CENTRAL;
+ +    }
+ +
+ +    if (fshift &&
+ +        (svi != CENTRAL || sji != CENTRAL || skj != CENTRAL || slj != CENTRAL))
+ +    {
+ +        rvec_dec(fshift[svi], fv);
+ +        for (m = 0; m < DIM; m++)
+ +        {
+ +            fshift[CENTRAL][m] += fv[m] - (1 + a + b)*temp[m];
+ +            fshift[    sji][m] += temp[m];
+ +            fshift[    skj][m] += a*temp[m];
+ +            fshift[    slj][m] += b*temp[m];
+ +        }
+ +    }
+ +
+ +    if (VirCorr)
+ +    {
+ +        rvec xiv;
+ +        int  i, j;
+ +
+ +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
+ +
+ +        for (i = 0; i < DIM; i++)
+ +        {
+ +            for (j = 0; j < DIM; j++)
+ +            {
+ +                dxdf[i][j] += -xiv[i]*fv[j] + xix[i]*temp[j];
+ +            }
+ +        }
+ +    }
+ +
+ +    /* TOTAL: 77 flops */
+ +}
+ +
+ +
+ +static void spread_vsite4FDN(t_iatom ia[], real a, real b, real c,
+ +                             rvec x[], rvec f[], rvec fshift[],
+ +                             gmx_bool VirCorr, matrix dxdf,
+ +                             t_pbc *pbc, t_graph *g)
+ +{
+ +    rvec xvi, xij, xik, xil, ra, rb, rja, rjb, rab, rm, rt;
+ +    rvec fv, fj, fk, fl;
+ +    real invrm, denom;
+ +    real cfx, cfy, cfz;
+ +    ivec di;
+ +    int  av, ai, aj, ak, al;
+ +    int  svi, sij, sik, sil;
+ +
+ +    /* DEBUG: check atom indices */
+ +    av = ia[1];
+ +    ai = ia[2];
+ +    aj = ia[3];
+ +    ak = ia[4];
+ +    al = ia[5];
+ +
+ +    copy_rvec(f[av], fv);
+ +
+ +    sij = pbc_rvec_sub(pbc, x[aj], x[ai], xij);
+ +    sik = pbc_rvec_sub(pbc, x[ak], x[ai], xik);
+ +    sil = pbc_rvec_sub(pbc, x[al], x[ai], xil);
+ +    /* 9 flops */
+ +
+ +    ra[XX] = a*xik[XX];
+ +    ra[YY] = a*xik[YY];
+ +    ra[ZZ] = a*xik[ZZ];
+ +
+ +    rb[XX] = b*xil[XX];
+ +    rb[YY] = b*xil[YY];
+ +    rb[ZZ] = b*xil[ZZ];
+ +
+ +    /* 6 flops */
+ +
+ +    rvec_sub(ra, xij, rja);
+ +    rvec_sub(rb, xij, rjb);
+ +    rvec_sub(rb, ra, rab);
+ +    /* 9 flops */
+ +
+ +    cprod(rja, rjb, rm);
+ +    /* 9 flops */
+ +
+ +    invrm = gmx_invsqrt(norm2(rm));
+ +    denom = invrm*invrm;
+ +    /* 5+5+2 flops */
+ +
+ +    cfx = c*invrm*fv[XX];
+ +    cfy = c*invrm*fv[YY];
+ +    cfz = c*invrm*fv[ZZ];
+ +    /* 6 Flops */
+ +
+ +    cprod(rm, rab, rt);
+ +    /* 9 flops */
   
       rt[XX] *= denom;
       rt[YY] *= denom;
       rt[ZZ] *= denom;
       /* 3flops */
- -    
+ +
       fj[XX] = (        -rm[XX]*rt[XX]) * cfx + ( rab[ZZ]-rm[YY]*rt[XX]) * cfy + (-rab[YY]-rm[ZZ]*rt[XX]) * cfz;
       fj[YY] = (-rab[ZZ]-rm[XX]*rt[YY]) * cfx + (        -rm[YY]*rt[YY]) * cfy + ( rab[XX]-rm[ZZ]*rt[YY]) * cfz;
       fj[ZZ] = ( rab[YY]-rm[XX]*rt[ZZ]) * cfx + (-rab[XX]-rm[YY]*rt[ZZ]) * cfy + (        -rm[ZZ]*rt[ZZ]) * cfz;
       /* 30 flops */
- -        
- -    cprod(rjb,rm,rt);
+ +
+ +    cprod(rjb, rm, rt);
       /* 9 flops */
   
       rt[XX] *= denom*a;
       rt[YY] *= denom*a;
       rt[ZZ] *= denom*a;
       /* 3flops */
- -    
+ +
       fk[XX] = (          -rm[XX]*rt[XX]) * cfx + (-a*rjb[ZZ]-rm[YY]*rt[XX]) * cfy + ( a*rjb[YY]-rm[ZZ]*rt[XX]) * cfz;
       fk[YY] = ( a*rjb[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + (-a*rjb[XX]-rm[ZZ]*rt[YY]) * cfz;
       fk[ZZ] = (-a*rjb[YY]-rm[XX]*rt[ZZ]) * cfx + ( a*rjb[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
       /* 36 flops */
- -    
- -    cprod(rm,rja,rt);
+ +
+ +    cprod(rm, rja, rt);
       /* 9 flops */
- -    
+ +
       rt[XX] *= denom*b;
       rt[YY] *= denom*b;
       rt[ZZ] *= denom*b;
       /* 3flops */
- -    
+ +
       fl[XX] = (          -rm[XX]*rt[XX]) * cfx + ( b*rja[ZZ]-rm[YY]*rt[XX]) * cfy + (-b*rja[YY]-rm[ZZ]*rt[XX]) * cfz;
       fl[YY] = (-b*rja[ZZ]-rm[XX]*rt[YY]) * cfx + (          -rm[YY]*rt[YY]) * cfy + ( b*rja[XX]-rm[ZZ]*rt[YY]) * cfz;
       fl[ZZ] = ( b*rja[YY]-rm[XX]*rt[ZZ]) * cfx + (-b*rja[XX]-rm[YY]*rt[ZZ]) * cfy + (          -rm[ZZ]*rt[ZZ]) * cfz;
@@@ -1323,862 -1195,489 +1329,862 @@@
       f[ai][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
       f[ai][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
       f[ai][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
- -    rvec_inc(f[aj],fj);
- -    rvec_inc(f[ak],fk);
- -    rvec_inc(f[al],fl);
+ +    rvec_inc(f[aj], fj);
+ +    rvec_inc(f[ak], fk);
+ +    rvec_inc(f[al], fl);
       /* 21 flops */
   
- -    if (g) {
- -        ivec_sub(SHIFT_IVEC(g,av),SHIFT_IVEC(g,ai),di);
+ +    if (g)
+ +    {
+ +        ivec_sub(SHIFT_IVEC(g, av), SHIFT_IVEC(g, ai), di);
           svi = IVEC2IS(di);
- -        ivec_sub(SHIFT_IVEC(g,aj),SHIFT_IVEC(g,ai),di);
+ +        ivec_sub(SHIFT_IVEC(g, aj), SHIFT_IVEC(g, ai), di);
           sij = IVEC2IS(di);
- -        ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,ai),di);
+ +        ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, ai), di);
           sik = IVEC2IS(di);
- -        ivec_sub(SHIFT_IVEC(g,al),SHIFT_IVEC(g,ai),di);
+ +        ivec_sub(SHIFT_IVEC(g, al), SHIFT_IVEC(g, ai), di);
           sil = IVEC2IS(di);
- -    } else if (pbc) {
- -        svi = pbc_rvec_sub(pbc,x[av],x[ai],xvi);
- -    } else {
+ +    }
+ +    else if (pbc)
+ +    {
+ +        svi = pbc_rvec_sub(pbc, x[av], x[ai], xvi);
+ +    }
+ +    else
+ +    {
           svi = CENTRAL;
       }
- -    
- -    if (fshift && (svi!=CENTRAL || sij!=CENTRAL || sik!=CENTRAL || sil!=CENTRAL)) {
- -        rvec_dec(fshift[svi],fv);
+ +
+ +    if (fshift && (svi != CENTRAL || sij != CENTRAL || sik != CENTRAL || sil != CENTRAL))
+ +    {
+ +        rvec_dec(fshift[svi], fv);
           fshift[CENTRAL][XX] += fv[XX] - fj[XX] - fk[XX] - fl[XX];
           fshift[CENTRAL][YY] += fv[YY] - fj[YY] - fk[YY] - fl[YY];
           fshift[CENTRAL][ZZ] += fv[ZZ] - fj[ZZ] - fk[ZZ] - fl[ZZ];
- -        rvec_inc(fshift[sij],fj);
- -        rvec_inc(fshift[sik],fk);
- -        rvec_inc(fshift[sil],fl);
+ +        rvec_inc(fshift[sij], fj);
+ +        rvec_inc(fshift[sik], fk);
+ +        rvec_inc(fshift[sil], fl);
       }
   
       if (VirCorr)
       {
           rvec xiv;
- -        int  i,j;
+ +        int  i, j;
   
- -        pbc_rvec_sub(pbc,x[av],x[ai],xiv);
+ +        pbc_rvec_sub(pbc, x[av], x[ai], xiv);
   
- -        for(i=0; i<DIM; i++)
+ +        for (i = 0; i < DIM; i++)
           {
- -            for(j=0; j<DIM; j++)
+ +            for (j = 0; j < DIM; j++)
               {
                   dxdf[i][j] += -xiv[i]*fv[j] + xij[i]*fj[j] + xik[i]*fk[j] + xil[i]*fl[j];
               }
           }
       }
- -    
+ +
       /* Total: 207 flops (Yuck!) */
   }
   
   
- -static int spread_vsiten(t_iatom ia[],t_iparams ip[],
- -                       rvec x[],rvec f[],rvec fshift[],
- -                       t_pbc *pbc,t_graph *g)
+ +static int spread_vsiten(t_iatom ia[], t_iparams ip[],
+ +                         rvec x[], rvec f[], rvec fshift[],
+ +                         t_pbc *pbc, t_graph *g)
   {
- -  rvec xv,dx,fi;
- -  int  n3,av,i,ai;
- -  real a;
- -  ivec di;
- -  int  siv;
- -
- -  n3 = 3*ip[ia[0]].vsiten.n;
- -  av = ia[1];
- -  copy_rvec(x[av],xv);
- -  
- -  for(i=0; i<n3; i+=3) {
- -    ai = ia[i+2];
- -    if (g) {
- -      ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,av),di);
- -      siv = IVEC2IS(di);
- -    } else if (pbc) {
- -      siv = pbc_dx_aiuc(pbc,x[ai],xv,dx);
- -    } else {
- -      siv = CENTRAL;
- -    }
- -    a = ip[ia[i]].vsiten.a;
- -    svmul(a,f[av],fi);
- -    rvec_inc(f[ai],fi);
- -    if (fshift && siv != CENTRAL) {
- -      rvec_inc(fshift[siv],fi);
- -      rvec_dec(fshift[CENTRAL],fi);
+ +    rvec xv, dx, fi;
+ +    int  n3, av, i, ai;
+ +    real a;
+ +    ivec di;
+ +    int  siv;
+ +
+ +    n3 = 3*ip[ia[0]].vsiten.n;
+ +    av = ia[1];
+ +    copy_rvec(x[av], xv);
+ +
+ +    for (i = 0; i < n3; i += 3)
+ +    {
+ +        ai = ia[i+2];
+ +        if (g)
+ +        {
+ +            ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, av), di);
+ +            siv = IVEC2IS(di);
+ +        }
+ +        else if (pbc)
+ +        {
+ +            siv = pbc_dx_aiuc(pbc, x[ai], xv, dx);
+ +        }
+ +        else
+ +        {
+ +            siv = CENTRAL;
+ +        }
+ +        a = ip[ia[i]].vsiten.a;
+ +        svmul(a, f[av], fi);
+ +        rvec_inc(f[ai], fi);
+ +        if (fshift && siv != CENTRAL)
+ +        {
+ +            rvec_inc(fshift[siv], fi);
+ +            rvec_dec(fshift[CENTRAL], fi);
+ +        }
+ +        /* 6 Flops */
       }
- -    /* 6 Flops */
- -  }
   
- -  return n3;
+ +    return n3;
   }
   
   
- -void spread_vsite_f(FILE *log,gmx_vsite_t *vsite,
- -                    rvec x[],rvec f[],rvec *fshift,
- -                    gmx_bool VirCorr,matrix vir,
- -                    t_nrnb *nrnb,t_idef *idef,
- -                    int ePBC,gmx_bool bMolPBC,t_graph *g,matrix box,
- -                    t_commrec *cr)
+ +static int vsite_count(const t_ilist *ilist, int ftype)
   {
- -  real      a1,b1,c1;
- -  int       i,inc,m,nra,nr,tp,ftype;
- -  int       nd2,nd3,nd3FD,nd3FAD,nd3OUT,nd4FD,nd4FDN,ndN;
- -  t_iatom   *ia;
- -  t_iparams *ip;
- -  t_pbc     pbc,*pbc_null,*pbc_null2;
- -  int       *vsite_pbc;
- -  matrix    dxdf;
- -
- -  /* We only need to do pbc when we have inter-cg vsites */
- -  if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite) {
- -    /* This is wasting some CPU time as we now do this multiple times
- -     * per MD step. But how often do we have vsites with full pbc?
- -     */
- -    pbc_null = set_pbc_dd(&pbc,ePBC,cr->dd,FALSE,box);
- -  } else {
- -    pbc_null = NULL;
- -  }
- -  
- -  if (DOMAINDECOMP(cr)) 
- -  {
- -    dd_clear_f_vsites(cr->dd,f);
- -  } 
- -  else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
- -  {
- -    pd_clear_nonlocal_constructs(vsite->vsitecomm,f);
- -  }
- -
- -    if (vir != NULL)
+ +    if (ftype == F_VSITEN)
+ +    {
+ +        return ilist[ftype].nr/3;
+ +    }
+ +    else
+ +    {
+ +        return ilist[ftype].nr/(1 + interaction_function[ftype].nratoms);
+ +    }
+ +}
+ +
+ +static void spread_vsite_f_thread(gmx_vsite_t *vsite,
+ +                                  rvec x[], rvec f[], rvec *fshift,
+ +                                  gmx_bool VirCorr, matrix dxdf,
+ +                                  t_iparams ip[], t_ilist ilist[],
+ +                                  t_graph *g, t_pbc *pbc_null)
+ +{
+ +    gmx_bool   bPBCAll;
+ +    real       a1, b1, c1;
+ +    int        i, inc, m, nra, nr, tp, ftype;
+ +    t_iatom   *ia;
+ +    t_pbc     *pbc_null2;
+ +    int       *vsite_pbc;
+ +
+ +    if (VirCorr)
       {
           clear_mat(dxdf);
       }
- -      
- -  ip     = idef->iparams;
- -
- -  nd2        = 0;
- -  nd3        = 0;
- -  nd3FD      = 0;
- -  nd3FAD     = 0;
- -  nd3OUT     = 0;
- -  nd4FD      = 0;
- -  nd4FDN     = 0;
- -  ndN        = 0;
- -   
- -  /* this loop goes backwards to be able to build *
- -   * higher type vsites from lower types         */
- -  pbc_null2 = NULL;
- -  for(ftype=F_NRE-1; (ftype>=0); ftype--) {
- -    if (interaction_function[ftype].flags & IF_VSITE) {
- -      nra    = interaction_function[ftype].nratoms;
- -      nr     = idef->il[ftype].nr;
- -      ia     = idef->il[ftype].iatoms;
- -
- -      if (pbc_null) {
- -      vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
- -      } else {
- -      vsite_pbc = NULL;
- -      }
- -     
- -      for(i=0; (i<nr); ) {
- -      /* Check if we need to apply pbc for this vsite */
- -      if (vsite_pbc) {
- -        if (vsite_pbc[i/(1+nra)] > -2)
- -          pbc_null2 = pbc_null;
- -        else
- -          pbc_null2 = NULL;
- -      }
- -
- -      tp   = ia[0];
- -      if (ftype != idef->functype[tp])
- -        gmx_incons("Functiontypes for vsites wrong");
- -
- -      /* Constants for constructing */
- -      a1   = ip[tp].vsite.a; 
- -      /* Construct the vsite depending on type */
- -      inc = nra+1;
- -      switch (ftype) {
- -      case F_VSITE2:
- -        spread_vsite2(ia,a1,x,f,fshift,pbc_null2,g);
- -        nd2++;
- -        break;
- -      case F_VSITE3:
- -        b1 = ip[tp].vsite.b;
- -        spread_vsite3(ia,a1,b1,x,f,fshift,pbc_null2,g);
- -        nd3++;
- -        break;
- -      case F_VSITE3FD:
- -        b1 = ip[tp].vsite.b;
- -        spread_vsite3FD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
- -        nd3FD++;
- -        break;
- -      case F_VSITE3FAD:
- -        b1 = ip[tp].vsite.b;
- -        spread_vsite3FAD(ia,a1,b1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
- -        nd3FAD++;
- -        break;
- -      case F_VSITE3OUT:
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        spread_vsite3OUT(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
- -        nd3OUT++;
- -        break;
- -      case F_VSITE4FD:
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        spread_vsite4FD(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
- -        nd4FD++;
- -        break;
- -      case F_VSITE4FDN:
- -        b1 = ip[tp].vsite.b;
- -        c1 = ip[tp].vsite.c;
- -        spread_vsite4FDN(ia,a1,b1,c1,x,f,fshift,VirCorr,dxdf,pbc_null2,g);
- -        nd4FDN++;
- -        break;
- -      case F_VSITEN:
- -        inc = spread_vsiten(ia,ip,x,f,fshift,pbc_null2,g);
- -        ndN += inc;
- -        break;
- -      default:
- -        gmx_fatal(FARGS,"No such vsite type %d in %s, line %d",
- -                    ftype,__FILE__,__LINE__);
- -      }
- -      clear_rvec(f[ia[1]]);
- -      
- -      /* Increment loop variables */
- -      i  += inc;
- -      ia += inc;
- -      }
- -    }
- -  }
+ +
+ +    bPBCAll = (pbc_null != NULL && !vsite->bHaveChargeGroups);
+ +
+ +    /* this loop goes backwards to be able to build *
+ +     * higher type vsites from lower types         */
+ +    pbc_null2 = NULL;
+ +    vsite_pbc = NULL;
+ +    for (ftype = F_NRE-1; (ftype >= 0); ftype--)
+ +    {
+ +        if ((interaction_function[ftype].flags & IF_VSITE) &&
+ +            ilist[ftype].nr > 0)
+ +        {
+ +            nra    = interaction_function[ftype].nratoms;
+ +            inc    = 1 + nra;
+ +            nr     = ilist[ftype].nr;
+ +            ia     = ilist[ftype].iatoms;
+ +
+ +            if (bPBCAll)
+ +            {
+ +                pbc_null2 = pbc_null;
+ +            }
+ +            else if (pbc_null != NULL)
+ +            {
+ +                vsite_pbc = vsite->vsite_pbc_loc[ftype-F_VSITE2];
+ +            }
+ +
+ +            for (i = 0; i < nr; )
+ +            {
+ +                if (vsite_pbc != NULL)
+ +                {
+ +                    if (vsite_pbc[i/(1+nra)] > -2)
+ +                    {
+ +                        pbc_null2 = pbc_null;
+ +                    }
+ +                    else
+ +                    {
+ +                        pbc_null2 = NULL;
+ +                    }
+ +                }
+ +
+ +                tp   = ia[0];
+ +
+ +                /* Constants for constructing */
+ +                a1   = ip[tp].vsite.a;
+ +                /* Construct the vsite depending on type */
+ +                switch (ftype)
+ +                {
+ +                    case F_VSITE2:
+ +                        spread_vsite2(ia, a1, x, f, fshift, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE3:
+ +                        b1 = ip[tp].vsite.b;
+ +                        spread_vsite3(ia, a1, b1, x, f, fshift, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE3FD:
+ +                        b1 = ip[tp].vsite.b;
+ +                        spread_vsite3FD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE3FAD:
+ +                        b1 = ip[tp].vsite.b;
+ +                        spread_vsite3FAD(ia, a1, b1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE3OUT:
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        spread_vsite3OUT(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE4FD:
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        spread_vsite4FD(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITE4FDN:
+ +                        b1 = ip[tp].vsite.b;
+ +                        c1 = ip[tp].vsite.c;
+ +                        spread_vsite4FDN(ia, a1, b1, c1, x, f, fshift, VirCorr, dxdf, pbc_null2, g);
+ +                        break;
+ +                    case F_VSITEN:
+ +                        inc = spread_vsiten(ia, ip, x, f, fshift, pbc_null2, g);
+ +                        break;
+ +                    default:
+ +                        gmx_fatal(FARGS, "No such vsite type %d in %s, line %d",
+ +                                  ftype, __FILE__, __LINE__);
+ +                }
+ +                clear_rvec(f[ia[1]]);
+ +
+ +                /* Increment loop variables */
+ +                i  += inc;
+ +                ia += inc;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void spread_vsite_f(FILE *log, gmx_vsite_t *vsite,
+ +                    rvec x[], rvec f[], rvec *fshift,
+ +                    gmx_bool VirCorr, matrix vir,
+ +                    t_nrnb *nrnb, t_idef *idef,
+ +                    int ePBC, gmx_bool bMolPBC, t_graph *g, matrix box,
+ +                    t_commrec *cr)
+ +{
+ +    t_pbc pbc, *pbc_null;
+ +    int   th;
+ +
+ +    /* We only need to do pbc when we have inter-cg vsites */
+ +    if ((DOMAINDECOMP(cr) || bMolPBC) && vsite->n_intercg_vsite)
+ +    {
+ +        /* This is wasting some CPU time as we now do this multiple times
+ +         * per MD step. But how often do we have vsites with full pbc?
+ +         */
+ +        pbc_null = set_pbc_dd(&pbc, ePBC, cr->dd, FALSE, box);
+ +    }
+ +    else
+ +    {
+ +        pbc_null = NULL;
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        dd_clear_f_vsites(cr->dd, f);
+ +    }
+ +    else if (PARTDECOMP(cr) && vsite->vsitecomm != NULL)
+ +    {
+ +        pd_clear_nonlocal_constructs(vsite->vsitecomm, f);
+ +    }
+ +
+ +    if (vsite->nthreads == 1)
+ +    {
+ +        spread_vsite_f_thread(vsite,
+ +                              x, f, fshift,
+ +                              VirCorr, vsite->tdata[0].dxdf,
+ +                              idef->iparams, idef->il,
+ +                              g, pbc_null);
+ +    }
+ +    else
+ +    {
+ +        /* First spread the vsites that might depend on other vsites */
+ +        spread_vsite_f_thread(vsite,
+ +                              x, f, fshift,
+ +                              VirCorr, vsite->tdata[vsite->nthreads].dxdf,
+ +                              idef->iparams,
+ +                              vsite->tdata[vsite->nthreads].ilist,
+ +                              g, pbc_null);
+ +
+ +#pragma omp parallel num_threads(vsite->nthreads)
+ +        {
+ +            int   thread;
+ +            rvec *fshift_t;
+ +
+ +            thread = gmx_omp_get_thread_num();
+ +
+ +            if (thread == 0 || fshift == NULL)
+ +            {
+ +                fshift_t = fshift;
+ +            }
+ +            else
+ +            {
+ +                int i;
+ +
+ +                fshift_t = vsite->tdata[thread].fshift;
+ +
+ +                for (i = 0; i < SHIFTS; i++)
+ +                {
+ +                    clear_rvec(fshift_t[i]);
+ +                }
+ +            }
+ +
+ +            spread_vsite_f_thread(vsite,
+ +                                  x, f, fshift_t,
+ +                                  VirCorr, vsite->tdata[thread].dxdf,
+ +                                  idef->iparams,
+ +                                  vsite->tdata[thread].ilist,
+ +                                  g, pbc_null);
+ +        }
+ +
+ +        if (fshift != NULL)
+ +        {
+ +            int i;
+ +
+ +            for (th = 1; th < vsite->nthreads; th++)
+ +            {
+ +                for (i = 0; i < SHIFTS; i++)
+ +                {
+ +                    rvec_inc(fshift[i], vsite->tdata[th].fshift[i]);
+ +                }
+ +            }
+ +        }
+ +    }
   
       if (VirCorr)
       {
- -        int i,j;
+ +        int i, j;
   
- -        for(i=0; i<DIM; i++)
+ +        for (th = 0; th < (vsite->nthreads == 1 ? 1 : vsite->nthreads+1); th++)
           {
- -            for(j=0; j<DIM; j++)
+ +            for (i = 0; i < DIM; i++)
               {
- -                vir[i][j] += -0.5*dxdf[i][j];
+ +                for (j = 0; j < DIM; j++)
+ +                {
+ +                    vir[i][j] += -0.5*vsite->tdata[th].dxdf[i][j];
+ +                }
               }
           }
       }
   
- -  inc_nrnb(nrnb,eNR_VSITE2,   nd2     );
- -  inc_nrnb(nrnb,eNR_VSITE3,   nd3     );
- -  inc_nrnb(nrnb,eNR_VSITE3FD, nd3FD   );
- -  inc_nrnb(nrnb,eNR_VSITE3FAD,nd3FAD  );
- -  inc_nrnb(nrnb,eNR_VSITE3OUT,nd3OUT  );
- -  inc_nrnb(nrnb,eNR_VSITE4FD, nd4FD   );
- -  inc_nrnb(nrnb,eNR_VSITE4FDN,nd4FDN  );
- -  inc_nrnb(nrnb,eNR_VSITEN,   ndN     );
- -
- -  if (DOMAINDECOMP(cr)) {
- -    dd_move_f_vsites(cr->dd,f,fshift);
- -  } else if (vsite->bPDvsitecomm) {
- -    /* We only move forces here, and they are independent of shifts */
- -    move_construct_f(vsite->vsitecomm,f,cr);
- -  }
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        dd_move_f_vsites(cr->dd, f, fshift);
+ +    }
+ +    else if (vsite->bPDvsitecomm)
+ +    {
+ +        /* We only move forces here, and they are independent of shifts */
+ +        move_construct_f(vsite->vsitecomm, f, cr);
+ +    }
+ +
+ +    inc_nrnb(nrnb, eNR_VSITE2,   vsite_count(idef->il, F_VSITE2));
+ +    inc_nrnb(nrnb, eNR_VSITE3,   vsite_count(idef->il, F_VSITE3));
+ +    inc_nrnb(nrnb, eNR_VSITE3FD, vsite_count(idef->il, F_VSITE3FD));
+ +    inc_nrnb(nrnb, eNR_VSITE3FAD, vsite_count(idef->il, F_VSITE3FAD));
+ +    inc_nrnb(nrnb, eNR_VSITE3OUT, vsite_count(idef->il, F_VSITE3OUT));
+ +    inc_nrnb(nrnb, eNR_VSITE4FD, vsite_count(idef->il, F_VSITE4FD));
+ +    inc_nrnb(nrnb, eNR_VSITE4FDN, vsite_count(idef->il, F_VSITE4FDN));
+ +    inc_nrnb(nrnb, eNR_VSITEN,   vsite_count(idef->il, F_VSITEN));
   }
   
   static int *atom2cg(t_block *cgs)
   {
- -  int *a2cg,cg,i;
- -  
- -  snew(a2cg,cgs->index[cgs->nr]);
- -  for(cg=0; cg<cgs->nr; cg++) {
- -    for(i=cgs->index[cg]; i<cgs->index[cg+1]; i++)
- -      a2cg[i] = cg;
- -  }
- -  
- -  return a2cg;
+ +    int *a2cg, cg, i;
+ +
+ +    snew(a2cg, cgs->index[cgs->nr]);
+ +    for (cg = 0; cg < cgs->nr; cg++)
+ +    {
+ +        for (i = cgs->index[cg]; i < cgs->index[cg+1]; i++)
+ +        {
+ +            a2cg[i] = cg;
+ +        }
+ +    }
+ +
+ +    return a2cg;
   }
   
- -static int count_intercg_vsite(gmx_mtop_t *mtop)
+ +static int count_intercg_vsite(gmx_mtop_t *mtop,
+ +                               gmx_bool   *bHaveChargeGroups)
   {
- -  int  mb,mt,ftype,nral,i,cg,a;
- -  gmx_molblock_t *molb;
- -  gmx_moltype_t *molt;
- -  int  *a2cg;
- -  t_ilist *il;
- -  t_iatom *ia;
- -  int  n_intercg_vsite;
- -
- -  n_intercg_vsite = 0;
- -  for(mb=0; mb<mtop->nmolblock; mb++) {
- -    molb = &mtop->molblock[mb];
- -    molt = &mtop->moltype[molb->type];
- -    a2cg = atom2cg(&molt->cgs);
- -    for(ftype=0; ftype<F_NRE; ftype++) {
- -      if (interaction_function[ftype].flags & IF_VSITE) {
- -      nral = NRAL(ftype);
- -      il = &molt->ilist[ftype];
- -      ia  = il->iatoms;
- -      for(i=0; i<il->nr; i+=1+nral) {
- -        cg = a2cg[ia[1+i]];
- -        for(a=1; a<nral; a++) {
- -          if (a2cg[ia[1+a]] != cg) {
- -            n_intercg_vsite += molb->nmol;
- -            break;
- -          }
- -        }
- -      }
- -      }
- -    }
- -    sfree(a2cg);
- -  }
- -
- -  return n_intercg_vsite;
+ +    int             mb, mt, ftype, nral, i, cg, a;
+ +    gmx_molblock_t *molb;
+ +    gmx_moltype_t  *molt;
+ +    int            *a2cg;
+ +    t_ilist        *il;
+ +    t_iatom        *ia;
+ +    int             n_intercg_vsite;
+ +
+ +    *bHaveChargeGroups = FALSE;
+ +
+ +    n_intercg_vsite = 0;
+ +    for (mb = 0; mb < mtop->nmolblock; mb++)
+ +    {
+ +        molb = &mtop->molblock[mb];
+ +        molt = &mtop->moltype[molb->type];
+ +
+ +        if (molt->cgs.nr < molt->atoms.nr)
+ +        {
+ +            *bHaveChargeGroups = TRUE;
+ +        }
+ +
+ +        a2cg = atom2cg(&molt->cgs);
+ +        for (ftype = 0; ftype < F_NRE; ftype++)
+ +        {
+ +            if (interaction_function[ftype].flags & IF_VSITE)
+ +            {
+ +                nral = NRAL(ftype);
+ +                il   = &molt->ilist[ftype];
+ +                ia   = il->iatoms;
+ +                for (i = 0; i < il->nr; i += 1+nral)
+ +                {
+ +                    cg = a2cg[ia[1+i]];
+ +                    for (a = 1; a < nral; a++)
+ +                    {
+ +                        if (a2cg[ia[1+a]] != cg)
+ +                        {
+ +                            n_intercg_vsite += molb->nmol;
+ +                            break;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        sfree(a2cg);
+ +    }
+ +
+ +    return n_intercg_vsite;
   }
   
- -static int **get_vsite_pbc(t_iparams *iparams,t_ilist *ilist,
- -                         t_atom *atom,t_mdatoms *md,
- -                         t_block *cgs,int *a2cg)
+ +static int **get_vsite_pbc(t_iparams *iparams, t_ilist *ilist,
+ +                           t_atom *atom, t_mdatoms *md,
+ +                           t_block *cgs, int *a2cg)
   {
- -  int  ftype,nral,i,j,vsi,vsite,cg_v,cg_c,a,nc3=0;
- -  t_ilist *il;
- -  t_iatom *ia;
- -  int  **vsite_pbc,*vsite_pbc_f;
- -  char *pbc_set;
- -  gmx_bool bViteOnlyCG_and_FirstAtom;
- -
- -  /* Make an array that tells if the pbc of an atom is set */
- -  snew(pbc_set,cgs->index[cgs->nr]);
- -  /* PBC is set for all non vsites */
- -  for(a=0; a<cgs->index[cgs->nr]; a++) {
- -    if ((atom && atom[a].ptype != eptVSite) ||
- -      (md   && md->ptype[a]  != eptVSite)) {
- -      pbc_set[a] = 1;
- -    }
- -  }
- -
- -  snew(vsite_pbc,F_VSITEN-F_VSITE2+1);
- -  
- -  for(ftype=0; ftype<F_NRE; ftype++) {
- -    if (interaction_function[ftype].flags & IF_VSITE) {
- -      nral = NRAL(ftype);
- -      il = &ilist[ftype];
- -      ia  = il->iatoms;
- -
- -      snew(vsite_pbc[ftype-F_VSITE2],il->nr/(1+nral));
- -      vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
- -
- -      i = 0;
- -      while (i < il->nr) {
- -      vsi = i/(1+nral);
- -      vsite = ia[i+1];
- -      cg_v = a2cg[vsite];
- -      /* A value of -2 signals that this vsite and its contructing
- -       * atoms are all within the same cg, so no pbc is required.
- -       */
- -      vsite_pbc_f[vsi] = -2;
- -      /* Check if constructing atoms are outside the vsite's cg */
- -      nc3 = 0;
- -      if (ftype == F_VSITEN) {
- -        nc3 = 3*iparams[ia[i]].vsiten.n;
- -        for(j=0; j<nc3; j+=3) {
- -          if (a2cg[ia[i+j+2]] != cg_v)
- -            vsite_pbc_f[vsi] = -1;
- -        }
- -      } else {
- -        for(a=1; a<nral; a++) {
- -          if (a2cg[ia[i+1+a]] != cg_v)
- -            vsite_pbc_f[vsi] = -1;
- -        }
- -      }
- -      if (vsite_pbc_f[vsi] == -1) {
- -        /* Check if this is the first processed atom of a vsite only cg */
- -        bViteOnlyCG_and_FirstAtom = TRUE;
- -        for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
- -          /* Non-vsites already have pbc set, so simply check for pbc_set */
- -          if (pbc_set[a]) {
- -            bViteOnlyCG_and_FirstAtom = FALSE;
- -            break;
- -          }
- -        }
- -        if (bViteOnlyCG_and_FirstAtom) {
- -          /* First processed atom of a vsite only charge group.
- -           * The pbc of the input coordinates to construct_vsites
- -           * should be preserved.
- -           */
- -          vsite_pbc_f[vsi] = vsite;
- -        } else if (cg_v != a2cg[ia[1+i+1]]) {
- -          /* This vsite has a different charge group index
- -           * than it's first constructing atom
- -           * and the charge group has more than one atom,
- -           * search for the first normal particle
- -           * or vsite that already had its pbc defined.
- -           * If nothing is found, use full pbc for this vsite.
- -           */
- -          for(a=cgs->index[cg_v]; a<cgs->index[cg_v+1]; a++) {
- -            if (a != vsite && pbc_set[a]) {
- -              vsite_pbc_f[vsi] = a;
- -              if (gmx_debug_at)
- -                fprintf(debug,"vsite %d match pbc with atom %d\n",
- -                        vsite+1,a+1);
- -              break;
- -            }
- -          }
- -          if (gmx_debug_at)
- -            fprintf(debug,"vsite atom %d  cg %d - %d pbc atom %d\n",
- -                    vsite+1,cgs->index[cg_v]+1,cgs->index[cg_v+1],
- -                    vsite_pbc_f[vsi]+1);
- -        }
- -      }
- -      if (ftype == F_VSITEN) {
- -        /* The other entries in vsite_pbc_f are not used for center vsites */
- -        i += nc3;
- -      } else {
- -        i += 1+nral;
- -      }
- -      
- -      /* This vsite now has its pbc defined */
- -      pbc_set[vsite] = 1;
- -      }
- -    }
- -  }
- -
- -  sfree(pbc_set);
- -
- -  return vsite_pbc;
+ +    int      ftype, nral, i, j, vsi, vsite, cg_v, cg_c, a, nc3 = 0;
+ +    t_ilist *il;
+ +    t_iatom *ia;
+ +    int    **vsite_pbc, *vsite_pbc_f;
+ +    char    *pbc_set;
+ +    gmx_bool bViteOnlyCG_and_FirstAtom;
+ +
+ +    /* Make an array that tells if the pbc of an atom is set */
+ +    snew(pbc_set, cgs->index[cgs->nr]);
+ +    /* PBC is set for all non vsites */
+ +    for (a = 0; a < cgs->index[cgs->nr]; a++)
+ +    {
+ +        if ((atom && atom[a].ptype != eptVSite) ||
+ +            (md   && md->ptype[a]  != eptVSite))
+ +        {
+ +            pbc_set[a] = 1;
+ +        }
+ +    }
+ +
+ +    snew(vsite_pbc, F_VSITEN-F_VSITE2+1);
+ +
+ +    for (ftype = 0; ftype < F_NRE; ftype++)
+ +    {
+ +        if (interaction_function[ftype].flags & IF_VSITE)
+ +        {
+ +            nral = NRAL(ftype);
+ +            il   = &ilist[ftype];
+ +            ia   = il->iatoms;
+ +
+ +            snew(vsite_pbc[ftype-F_VSITE2], il->nr/(1+nral));
+ +            vsite_pbc_f = vsite_pbc[ftype-F_VSITE2];
+ +
+ +            i = 0;
+ +            while (i < il->nr)
+ +            {
+ +                vsi   = i/(1+nral);
+ +                vsite = ia[i+1];
+ +                cg_v  = a2cg[vsite];
+ +                /* A value of -2 signals that this vsite and its contructing
+ +                 * atoms are all within the same cg, so no pbc is required.
+ +                 */
+ +                vsite_pbc_f[vsi] = -2;
+ +                /* Check if constructing atoms are outside the vsite's cg */
+ +                nc3 = 0;
+ +                if (ftype == F_VSITEN)
+ +                {
+ +                    nc3 = 3*iparams[ia[i]].vsiten.n;
+ +                    for (j = 0; j < nc3; j += 3)
+ +                    {
+ +                        if (a2cg[ia[i+j+2]] != cg_v)
+ +                        {
+ +                            vsite_pbc_f[vsi] = -1;
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    for (a = 1; a < nral; a++)
+ +                    {
+ +                        if (a2cg[ia[i+1+a]] != cg_v)
+ +                        {
+ +                            vsite_pbc_f[vsi] = -1;
+ +                        }
+ +                    }
+ +                }
+ +                if (vsite_pbc_f[vsi] == -1)
+ +                {
+ +                    /* Check if this is the first processed atom of a vsite only cg */
+ +                    bViteOnlyCG_and_FirstAtom = TRUE;
+ +                    for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
+ +                    {
+ +                        /* Non-vsites already have pbc set, so simply check for pbc_set */
+ +                        if (pbc_set[a])
+ +                        {
+ +                            bViteOnlyCG_and_FirstAtom = FALSE;
+ +                            break;
+ +                        }
+ +                    }
+ +                    if (bViteOnlyCG_and_FirstAtom)
+ +                    {
+ +                        /* First processed atom of a vsite only charge group.
+ +                         * The pbc of the input coordinates to construct_vsites
+ +                         * should be preserved.
+ +                         */
+ +                        vsite_pbc_f[vsi] = vsite;
+ +                    }
+ +                    else if (cg_v != a2cg[ia[1+i+1]])
+ +                    {
+ +                        /* This vsite has a different charge group index
+ +                         * than it's first constructing atom
+ +                         * and the charge group has more than one atom,
+ +                         * search for the first normal particle
+ +                         * or vsite that already had its pbc defined.
+ +                         * If nothing is found, use full pbc for this vsite.
+ +                         */
+ +                        for (a = cgs->index[cg_v]; a < cgs->index[cg_v+1]; a++)
+ +                        {
+ +                            if (a != vsite && pbc_set[a])
+ +                            {
+ +                                vsite_pbc_f[vsi] = a;
+ +                                if (gmx_debug_at)
+ +                                {
+ +                                    fprintf(debug, "vsite %d match pbc with atom %d\n",
+ +                                            vsite+1, a+1);
+ +                                }
+ +                                break;
+ +                            }
+ +                        }
+ +                        if (gmx_debug_at)
+ +                        {
+ +                            fprintf(debug, "vsite atom %d  cg %d - %d pbc atom %d\n",
+ +                                    vsite+1, cgs->index[cg_v]+1, cgs->index[cg_v+1],
+ +                                    vsite_pbc_f[vsi]+1);
+ +                        }
+ +                    }
+ +                }
+ +                if (ftype == F_VSITEN)
+ +                {
+ +                    /* The other entries in vsite_pbc_f are not used for center vsites */
+ +                    i += nc3;
+ +                }
+ +                else
+ +                {
+ +                    i += 1+nral;
+ +                }
+ +
+ +                /* This vsite now has its pbc defined */
+ +                pbc_set[vsite] = 1;
+ +            }
+ +        }
+ +    }
+ +
+ +    sfree(pbc_set);
+ +
+ +    return vsite_pbc;
   }
   
- -gmx_vsite_t *init_vsite(gmx_mtop_t *mtop,t_commrec *cr)
+ +
+ +gmx_vsite_t *init_vsite(gmx_mtop_t *mtop, t_commrec *cr,
+ +                        gmx_bool bSerial_NoPBC)
   {
- -  int nvsite,i;
- -  int *a2cg,cg;
- -  gmx_vsite_t *vsite;
- -  int mt;
- -  gmx_moltype_t *molt;
- -  
- -  /* check if there are vsites */
- -  nvsite = 0;
- -  for(i=0; i<F_NRE; i++) {
- -    if (interaction_function[i].flags & IF_VSITE) {
- -      nvsite += gmx_mtop_ftype_count(mtop,i);
- -    }
- -  }
- -
- -  if (nvsite == 0) {
- -    return NULL;
- -  }
- -
- -  snew(vsite,1);
- -
- -  vsite->n_intercg_vsite = count_intercg_vsite(mtop);
- -
- -  if (vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr)) {
- -    vsite->nvsite_pbc_molt = mtop->nmoltype;
- -    snew(vsite->vsite_pbc_molt,vsite->nvsite_pbc_molt);
- -    for(mt=0; mt<mtop->nmoltype; mt++) {
- -      molt = &mtop->moltype[mt];
- -      /* Make an atom to charge group index */
- -      a2cg = atom2cg(&molt->cgs);
- -      vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
- -                                              molt->ilist,
- -                                              molt->atoms.atom,NULL,
- -                                              &molt->cgs,a2cg);
- -      sfree(a2cg);
- -    }
- -
- -    snew(vsite->vsite_pbc_loc_nalloc,F_VSITEN-F_VSITE2+1);
- -    snew(vsite->vsite_pbc_loc       ,F_VSITEN-F_VSITE2+1);
- -  }
- -
- -
- -  return vsite;
+ +    int            nvsite, i;
+ +    int           *a2cg, cg;
+ +    gmx_vsite_t   *vsite;
+ +    int            mt;
+ +    gmx_moltype_t *molt;
+ +    int            nthreads;
+ +
+ +    /* check if there are vsites */
+ +    nvsite = 0;
+ +    for (i = 0; i < F_NRE; i++)
+ +    {
+ +        if (interaction_function[i].flags & IF_VSITE)
+ +        {
+ +            nvsite += gmx_mtop_ftype_count(mtop, i);
+ +        }
+ +    }
+ +
+ +    if (nvsite == 0)
+ +    {
+ +        return NULL;
+ +    }
+ +
+ +    snew(vsite, 1);
+ +
+ +    vsite->n_intercg_vsite = count_intercg_vsite(mtop,
+ +                                                 &vsite->bHaveChargeGroups);
+ +
+ +    /* If we don't have charge groups, the vsite follows its own pbc */
+ +    if (!bSerial_NoPBC &&
+ +        vsite->bHaveChargeGroups &&
+ +        vsite->n_intercg_vsite > 0 && DOMAINDECOMP(cr))
+ +    {
+ +        vsite->nvsite_pbc_molt = mtop->nmoltype;
+ +        snew(vsite->vsite_pbc_molt, vsite->nvsite_pbc_molt);
+ +        for (mt = 0; mt < mtop->nmoltype; mt++)
+ +        {
+ +            molt = &mtop->moltype[mt];
+ +            /* Make an atom to charge group index */
+ +            a2cg = atom2cg(&molt->cgs);
+ +            vsite->vsite_pbc_molt[mt] = get_vsite_pbc(mtop->ffparams.iparams,
+ +                                                      molt->ilist,
+ +                                                      molt->atoms.atom, NULL,
+ +                                                      &molt->cgs, a2cg);
+ +            sfree(a2cg);
+ +        }
+ +
+ +        snew(vsite->vsite_pbc_loc_nalloc, F_VSITEN-F_VSITE2+1);
+ +        snew(vsite->vsite_pbc_loc, F_VSITEN-F_VSITE2+1);
+ +    }
+ +
+ +    if (bSerial_NoPBC)
+ +    {
+ +        vsite->nthreads = 1;
+ +    }
+ +    else
+ +    {
+ +        vsite->nthreads = gmx_omp_nthreads_get(emntVSITE);
+ +    }
+ +    if (!bSerial_NoPBC)
+ +    {
+ +        /* We need one extra thread data structure for the overlap vsites */
+ +        snew(vsite->tdata, vsite->nthreads+1);
+ +    }
+ +
+ +    vsite->th_ind        = NULL;
+ +    vsite->th_ind_nalloc = 0;
+ +
+ +    return vsite;
   }
   
- -void set_vsite_top(gmx_vsite_t *vsite,gmx_localtop_t *top,t_mdatoms *md,
- -                 t_commrec *cr)
+ +static void prepare_vsite_thread(const t_ilist      *ilist,
+ +                                 gmx_vsite_thread_t *vsite_th)
   {
- -  int *a2cg;
+ +    int ftype;
+ +
+ +    for (ftype = 0; ftype < F_NRE; ftype++)
+ +    {
+ +        if (interaction_function[ftype].flags & IF_VSITE)
+ +        {
+ +            if (ilist[ftype].nr > vsite_th->ilist[ftype].nalloc)
+ +            {
+ +                vsite_th->ilist[ftype].nalloc = over_alloc_large(ilist[ftype].nr);
+ +                srenew(vsite_th->ilist[ftype].iatoms, vsite_th->ilist[ftype].nalloc);
+ +            }
+ +
+ +            vsite_th->ilist[ftype].nr = 0;
+ +        }
+ +    }
+ +}
+ +
+ +void split_vsites_over_threads(const t_ilist   *ilist,
+ +                               const t_mdatoms *mdatoms,
+ +                               gmx_bool         bLimitRange,
+ +                               gmx_vsite_t     *vsite)
+ +{
+ +    int      th;
+ +    int      vsite_atom_range, natperthread;
+ +    int     *th_ind;
+ +    int      ftype;
+ +    t_iatom *iat;
+ +    t_ilist *il_th;
+ +    int      nral1, inc, i, j;
+ +
+ +    if (vsite->nthreads == 1)
+ +    {
+ +        /* Nothing to do */
+ +        return;
+ +    }
+ +
+ +#pragma omp parallel for num_threads(vsite->nthreads) schedule(static)
+ +    for (th = 0; th < vsite->nthreads; th++)
+ +    {
+ +        prepare_vsite_thread(ilist, &vsite->tdata[th]);
+ +    }
+ +    /* Master threads does the (potential) overlap vsites */
+ +    prepare_vsite_thread(ilist, &vsite->tdata[vsite->nthreads]);
+ +
+ +    /* The current way of distributing the vsites over threads in primitive.
+ +     * We divide the atom range 0 - natoms_in_vsite uniformly over threads,
+ +     * without taking into account how the vsites are distributed.
+ +     * Without domain decomposition we bLimitRange=TRUE and we at least
+ +     * tighten the upper bound of the range (useful for common systems
+ +     * such as a vsite-protein in 3-site water).
+ +     */
+ +    if (bLimitRange)
+ +    {
+ +        vsite_atom_range = -1;
+ +        for (ftype = 0; ftype < F_NRE; ftype++)
+ +        {
+ +            if ((interaction_function[ftype].flags & IF_VSITE) &&
+ +                ftype != F_VSITEN)
+ +            {
+ +                nral1 = 1 + NRAL(ftype);
+ +                iat   = ilist[ftype].iatoms;
+ +                for (i = 0; i < ilist[ftype].nr; i += nral1)
+ +                {
+ +                    for (j = i+1; j < i+nral1; j++)
+ +                    {
+ +                        vsite_atom_range = max(vsite_atom_range, iat[j]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        vsite_atom_range++;
+ +    }
+ +    else
+ +    {
+ +        vsite_atom_range = mdatoms->homenr;
+ +    }
+ +    natperthread = (vsite_atom_range + vsite->nthreads - 1)/vsite->nthreads;
   
- -  /* Make an atom to charge group index */
- -  a2cg = atom2cg(&top->cgs);
+ +    if (debug)
+ +    {
+ +        fprintf(debug, "virtual site thread dist: natoms %d, range %d, natperthread %d\n", mdatoms->nr, vsite_atom_range, natperthread);
+ +    }
   
- -  if (vsite->n_intercg_vsite > 0) {
- -    vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
- -                                       top->idef.il,NULL,md,
- -                                       &top->cgs,a2cg);
+ +    /* To simplify the vsite assignment, we make an index which tells us
+ +     * to which thread particles, both non-vsites and vsites, are assigned.
+ +     */
+ +    if (mdatoms->nr > vsite->th_ind_nalloc)
+ +    {
+ +        vsite->th_ind_nalloc = over_alloc_large(mdatoms->nr);
+ +        srenew(vsite->th_ind, vsite->th_ind_nalloc);
+ +    }
+ +    th_ind = vsite->th_ind;
+ +    th     = 0;
+ +    for (i = 0; i < mdatoms->nr; i++)
+ +    {
+ +        if (mdatoms->ptype[i] == eptVSite)
+ +        {
+ +            /* vsites are not assigned to a thread yet */
+ +            th_ind[i] = -1;
+ +        }
+ +        else
+ +        {
+ +            /* assign non-vsite particles to thread th */
+ +            th_ind[i] = th;
+ +        }
+ +        if (i == (th + 1)*natperthread && th < vsite->nthreads)
+ +        {
+ +            th++;
+ +        }
+ +    }
   
- -    if (PARTDECOMP(cr)) {
- -      snew(vsite->vsitecomm,1);
- -              vsite->bPDvsitecomm =
- -              setup_parallel_vsites(&(top->idef),cr,vsite->vsitecomm);
+ +    for (ftype = 0; ftype < F_NRE; ftype++)
+ +    {
+ +        if ((interaction_function[ftype].flags & IF_VSITE) &&
+ +            ftype != F_VSITEN)
+ +        {
+ +            nral1 = 1 + NRAL(ftype);
+ +            inc   = nral1;
+ +            iat   = ilist[ftype].iatoms;
+ +            for (i = 0; i < ilist[ftype].nr; )
+ +            {
+ +                th = iat[1+i]/natperthread;
+ +                /* We would like to assign this vsite the thread th,
+ +                 * but it might depend on atoms outside the atom range of th
+ +                 * or on another vsite not assigned to thread th.
+ +                 */
+ +                if (ftype != F_VSITEN)
+ +                {
+ +                    for (j = i+2; j < i+nral1; j++)
+ +                    {
+ +                        if (th_ind[iat[j]] != th)
+ +                        {
+ +                            /* Some constructing atoms are not assigned to
+ +                             * thread th, move this vsite to a separate batch.
+ +                             */
+ +                            th = vsite->nthreads;
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    inc = iat[i];
+ +                    for (j = i+2; j < i+inc; j += 3)
+ +                    {
+ +                        if (th_ind[iat[j]] != th)
+ +                        {
+ +                            th = vsite->nthreads;
+ +                        }
+ +                    }
+ +                }
+ +                /* Copy this vsite to the thread data struct of thread th */
+ +                il_th = &vsite->tdata[th].ilist[ftype];
+ +                for (j = i; j < i+inc; j++)
+ +                {
+ +                    il_th->iatoms[il_th->nr++] = iat[j];
+ +                }
+ +                /* Update this vsite's thread index entry */
+ +                th_ind[iat[1+i]] = th;
+ +
+ +                i += inc;
+ +            }
+ +        }
       }
- -  }
   
- -  sfree(a2cg);
+ +    if (debug)
+ +    {
+ +        for (ftype = 0; ftype < F_NRE; ftype++)
+ +        {
+ +            if ((interaction_function[ftype].flags & IF_VSITE) &&
+ +                ilist[ftype].nr > 0)
+ +            {
+ +                fprintf(debug, "%-20s thread dist:",
+ +                        interaction_function[ftype].longname);
+ +                for (th = 0; th < vsite->nthreads+1; th++)
+ +                {
+ +                    fprintf(debug, " %4d", vsite->tdata[th].ilist[ftype].nr);
+ +                }
+ +                fprintf(debug, "\n");
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void set_vsite_top(gmx_vsite_t *vsite, gmx_localtop_t *top, t_mdatoms *md,
+ +                   t_commrec *cr)
+ +{
+ +    int *a2cg;
+ +
+ +    if (vsite->n_intercg_vsite > 0)
+ +    {
+ +        if (vsite->bHaveChargeGroups)
+ +        {
+ +            /* Make an atom to charge group index */
+ +            a2cg                 = atom2cg(&top->cgs);
+ +            vsite->vsite_pbc_loc = get_vsite_pbc(top->idef.iparams,
+ +                                                 top->idef.il, NULL, md,
+ +                                                 &top->cgs, a2cg);
+ +            sfree(a2cg);
+ +        }
+ +
+ +        if (PARTDECOMP(cr))
+ +        {
+ +            snew(vsite->vsitecomm, 1);
+ +            vsite->bPDvsitecomm =
+ +                setup_parallel_vsites(&(top->idef), cr, vsite->vsitecomm);
+ +        }
+ +    }
+ +
+ +    if (vsite->nthreads > 1)
+ +    {
+ +        if (vsite->bHaveChargeGroups || PARTDECOMP(cr))
+ +        {
+ +            gmx_incons("Can not use threads virtual sites combined with charge groups or particle decomposition");
+ +        }
+ +
+ +        split_vsites_over_threads(top->idef.il, md, !DOMAINDECOMP(cr), vsite);
+ +    }
   }
author	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 19 Feb 2013 13:01:57 +0000 (14:01 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 20 Feb 2013 09:35:31 +0000 (10:35 +0100)
		1	2
src/gmxlib/copyrite.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/mdlib/vsite.c	patch \|	diff1 \|	diff2 \|	blob \| history