From: Roland Schulz <roland@utk.edu>
Date: Thu, 9 Jan 2014 18:48:12 +0000 (-0500)
Subject: Merge release-4-6 into master
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=00638ed3313bcf7cd5ebe37f807b7c62b858622a;p=alexxy%2Fgromacs.git

Merge release-4-6 into master

Conflicts:
	src/gromacs/gmxlib/checkpoint.c
	src/kernel/genalg.c
	src/programs/mdrun/md.c
             (moved change to trajectory_writing.c)

Change-Id: Ia9c8722c7fb80d48d8e40cec2af0cb93dc7e468e
---

00638ed3313bcf7cd5ebe37f807b7c62b858622a
diff --cc CMakeLists.txt
index f70661b19e,b95dab8bef..7d1bbadd8e
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -843,10 -1055,21 +843,20 @@@ if (${CMAKE_SYSTEM_NAME} MATCHES "BlueG
      endif()
  endif()
  
+ 
+ option(GMX_NACL "Configure for Native Client builds" OFF)
+ if (GMX_NACL)
+   list(APPEND GMX_EXTRA_LIBRARIES nosys)
+   set(GMX_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lnosys")
+   set(GMX_NO_NICE 1)
+   set(GMX_NO_RENAME 1)
+ endif()
+ mark_as_advanced(GMX_NACL)
+ 
  if(GMX_FAHCORE)
 -  set(COREWRAP_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../corewrap" CACHE STRING 
 +  set(COREWRAP_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/../corewrap" CACHE STRING
        "Path to swindirect.h")
    include_directories(${COREWRAP_INCLUDE_DIR})
 -  set_property(CACHE GMX_COOL_QUOTES PROPERTY VALUE OFF)
  endif(GMX_FAHCORE)
  
  # # # # # # # # # # NO MORE TESTS AFTER THIS LINE! # # # # # # # # # # #
diff --cc src/gromacs/fileio/futil.cpp
index 6cacc9562d,0000000000..4d5056676a
mode 100644,000000..100644
--- a/src/gromacs/fileio/futil.cpp
+++ b/src/gromacs/fileio/futil.cpp
@@@ -1,1158 -1,0 +1,1155 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include "config.h"
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +
 +#include <sys/types.h>
 +#include <sys/stat.h>
 +#include <fcntl.h>
 +
 +#ifdef HAVE_DIRENT_H
 +/* POSIX */
 +#include <dirent.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <direct.h>
 +#include <io.h>
 +#endif
 +
 +/* Windows file stuff, only necessary for visual studio */
 +#ifdef _MSC_VER
 +#include <windows.h>
 +#endif
 +
 +#include "gromacs/legacyheaders/gmx_fatal.h"
 +#include "gromacs/legacyheaders/network.h"
 +#include "gromacs/legacyheaders/smalloc.h"
 +#include "gromacs/legacyheaders/string2.h"
 +
 +#include "gromacs/fileio/futil.h"
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/path.h"
 +#include "gromacs/utility/programinfo.h"
 +#include "gromacs/utility/stringutil.h"
 +
 +#include "gromacs/legacyheaders/thread_mpi/threads.h"
 +
 +/* we keep a linked list of all files opened through pipes (i.e.
 +   compressed or .gzipped files. This way we can distinguish between them
 +   without having to change the semantics of reading from/writing to files)
 + */
 +typedef struct t_pstack {
 +    FILE            *fp;
 +    struct t_pstack *prev;
 +} t_pstack;
 +
 +static t_pstack    *pstack      = NULL;
 +static gmx_bool     bUnbuffered = FALSE;
 +
 +/* this linked list is an intrinsically globally shared object, so we have
 +   to protect it with mutexes */
 +static tMPI_Thread_mutex_t pstack_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +
 +void no_buffers(void)
 +{
 +    bUnbuffered = TRUE;
 +}
 +
 +void push_ps(FILE *fp)
 +{
 +    t_pstack *ps;
 +
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +
 +    snew(ps, 1);
 +    ps->fp   = fp;
 +    ps->prev = pstack;
 +    pstack   = ps;
 +
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +}
 +
 +#ifdef GMX_FAHCORE
 +/* don't use pipes!*/
 +#define popen fah_fopen
 +#define pclose fah_fclose
 +#define SKIP_FFOPS 1
 +#else
 +#ifdef ffclose
 +#undef ffclose
 +#endif
- #endif
- 
- #ifndef GMX_FAHCORE
- #ifndef HAVE_PIPES
++#if (!defined(HAVE_PIPES) && !defined(__native_client__))
 +static FILE *popen(const char *nm, const char *mode)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return NULL;
 +}
 +
 +static int pclose(FILE *fp)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return 0;
 +}
- #endif
- #endif
++#endif /* !defined(HAVE_PIPES) && !defined(__native_client__) */
++#endif /* GMX_FAHCORE */
 +
 +int ffclose(FILE *fp)
 +{
 +#ifdef SKIP_FFOPS
 +    return fclose(fp);
 +#else
 +    t_pstack *ps, *tmp;
 +    int       ret = 0;
 +
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +
 +    ps = pstack;
 +    if (ps == NULL)
 +    {
 +        if (fp != NULL)
 +        {
 +            ret = fclose(fp);
 +        }
 +    }
 +    else if (ps->fp == fp)
 +    {
 +        if (fp != NULL)
 +        {
 +            ret = pclose(fp);
 +        }
 +        pstack = pstack->prev;
 +        sfree(ps);
 +    }
 +    else
 +    {
 +        while ((ps->prev != NULL) && (ps->prev->fp != fp))
 +        {
 +            ps = ps->prev;
 +        }
 +        if ((ps->prev != NULL) && ps->prev->fp == fp)
 +        {
 +            if (ps->prev->fp != NULL)
 +            {
 +                ret = pclose(ps->prev->fp);
 +            }
 +            tmp      = ps->prev;
 +            ps->prev = ps->prev->prev;
 +            sfree(tmp);
 +        }
 +        else
 +        {
 +            if (fp != NULL)
 +            {
 +                ret = fclose(fp);
 +            }
 +        }
 +    }
 +
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +    return ret;
 +#endif
 +}
 +
 +
 +#ifdef rewind
 +#undef rewind
 +#endif
 +
 +void frewind(FILE *fp)
 +{
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +
 +    t_pstack *ps = pstack;
 +    while (ps != NULL)
 +    {
 +        if (ps->fp == fp)
 +        {
 +            fprintf(stderr, "Cannot rewind compressed file!\n");
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +            return;
 +        }
 +        ps = ps->prev;
 +    }
 +    rewind(fp);
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +}
 +
 +int gmx_fseek(FILE *stream, gmx_off_t offset, int whence)
 +{
 +#ifdef HAVE_FSEEKO
 +    return fseeko(stream, offset, whence);
 +#else
 +#ifdef HAVE__FSEEKI64
 +    return _fseeki64(stream, offset, whence);
 +#else
 +    return fseek(stream, offset, whence);
 +#endif
 +#endif
 +}
 +
 +gmx_off_t gmx_ftell(FILE *stream)
 +{
 +#ifdef HAVE_FSEEKO
 +    return ftello(stream);
 +#else
 +#ifdef HAVE__FSEEKI64
 +    return _ftelli64(stream);
 +#else
 +    return ftell(stream);
 +#endif
 +#endif
 +}
 +
 +
 +gmx_bool is_pipe(FILE *fp)
 +{
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +
 +    t_pstack *ps = pstack;
 +    while (ps != NULL)
 +    {
 +        if (ps->fp == fp)
 +        {
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +            return TRUE;
 +        }
 +        ps = ps->prev;
 +    }
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +    return FALSE;
 +}
 +
 +
 +static FILE *uncompress(const char *fn, const char *mode)
 +{
 +    FILE *fp;
 +    char  buf[256];
 +
 +    sprintf(buf, "uncompress -c < %s", fn);
 +    fprintf(stderr, "Going to execute '%s'\n", buf);
 +    if ((fp = popen(buf, mode)) == NULL)
 +    {
 +        gmx_open(fn);
 +    }
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +static FILE *gunzip(const char *fn, const char *mode)
 +{
 +    FILE *fp;
 +    char  buf[256];
 +
 +    sprintf(buf, "gunzip -c < %s", fn);
 +    fprintf(stderr, "Going to execute '%s'\n", buf);
 +    if ((fp = popen(buf, mode)) == NULL)
 +    {
 +        gmx_open(fn);
 +    }
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +gmx_bool gmx_fexist(const char *fname)
 +{
 +    FILE *test;
 +
 +    if (fname == NULL)
 +    {
 +        return FALSE;
 +    }
 +    test = fopen(fname, "r");
 +    if (test == NULL)
 +    {
 +        /*Windows doesn't allow fopen of directory - so we need to check this seperately */
 +        #ifdef GMX_NATIVE_WINDOWS
 +        DWORD attr = GetFileAttributes(fname);
 +        return (attr != INVALID_FILE_ATTRIBUTES) && (attr & FILE_ATTRIBUTE_DIRECTORY);
 +        #else
 +        return FALSE;
 +        #endif
 +    }
 +    else
 +    {
 +        fclose(test);
 +        return TRUE;
 +    }
 +}
 +
 +
 +gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 +{
 +    gmx_bool bExist;
 +
 +    if (SIMMASTER(cr))
 +    {
 +        bExist = gmx_fexist(fname);
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(bExist), &bExist, cr);
 +    }
 +    return bExist;
 +}
 +
 +gmx_bool gmx_eof(FILE *fp)
 +{
 +    char     data[4];
 +    gmx_bool beof;
 +
 +    if (is_pipe(fp))
 +    {
 +        return feof(fp);
 +    }
 +    else
 +    {
 +        if ((beof = fread(data, 1, 1, fp)) == 1)
 +        {
 +            gmx_fseek(fp, -1, SEEK_CUR);
 +        }
 +        return !beof;
 +    }
 +}
 +
 +static char *backup_fn(const char *file, int count_max)
 +{
 +    /* Use a reasonably low value for countmax; we might
 +     * generate 4-5 files in each round, and we dont
 +     * want to hit directory limits of 1024 or 2048 files.
 +     */
 +#define COUNTMAX 99
 +    int          i, count = 1;
 +    char        *directory, *fn;
 +    char        *buf;
 +
 +    if (count_max == -1)
 +    {
 +        count_max = COUNTMAX;
 +    }
 +
 +    smalloc(buf, GMX_PATH_MAX);
 +
 +    for (i = strlen(file)-1; ((i > 0) && (file[i] != DIR_SEPARATOR)); i--)
 +    {
 +        ;
 +    }
 +    /* Must check whether i > 0, i.e. whether there is a directory
 +     * in the file name. In that case we overwrite the / sign with
 +     * a '\0' to end the directory string .
 +     */
 +    if (i > 0)
 +    {
 +        directory    = gmx_strdup(file);
 +        directory[i] = '\0';
 +        fn           = gmx_strdup(file+i+1);
 +    }
 +    else
 +    {
 +        directory    = gmx_strdup(".");
 +        fn           = gmx_strdup(file);
 +    }
 +    do
 +    {
 +        sprintf(buf, "%s/#%s.%d#", directory, fn, count);
 +        count++;
 +    }
 +    while ((count <= count_max) && gmx_fexist(buf));
 +
 +    /* Arbitrarily bail out */
 +    if (count > count_max)
 +    {
 +        gmx_fatal(FARGS, "Won't make more than %d backups of %s for you.\n"
 +                  "The env.var. GMX_MAXBACKUP controls this maximum, -1 disables backups.",
 +                  count_max, fn);
 +    }
 +
 +    sfree(directory);
 +    sfree(fn);
 +
 +    return buf;
 +}
 +
 +gmx_bool make_backup(const char * name)
 +{
 +    char * env;
 +    int    count_max;
 +    char * backup;
 +
 +#ifdef GMX_FAHCORE
 +    return FALSE; /* skip making backups */
 +#else
 +
 +    if (gmx_fexist(name))
 +    {
 +        env = getenv("GMX_MAXBACKUP");
 +        if (env != NULL)
 +        {
 +            count_max = strtol(env, NULL, 10);
 +            if (count_max == -1)
 +            {
 +                /* Do not make backups and possibly overwrite old files */
 +                return TRUE;
 +            }
 +        }
 +        else
 +        {
 +            /* Use the default maximum */
 +            count_max = -1;
 +        }
 +        backup = backup_fn(name, count_max);
 +        if (rename(name, backup) == 0)
 +        {
 +            fprintf(stderr, "\nBack Off! I just backed up %s to %s\n",
 +                    name, backup);
 +        }
 +        else
 +        {
 +            fprintf(stderr, "Sorry couldn't backup %s to %s\n", name, backup);
 +            return FALSE;
 +        }
 +        sfree(backup);
 +    }
 +    return TRUE;
 +#endif
 +}
 +
 +FILE *ffopen(const char *file, const char *mode)
 +{
 +#ifdef SKIP_FFOPS
 +    return fopen(file, mode);
 +#else
 +    FILE    *ff = NULL;
 +    char     buf[256], *bufsize = 0, *ptr;
 +    gmx_bool bRead;
 +    int      bs;
 +
 +    if (file == NULL)
 +    {
 +        return NULL;
 +    }
 +
 +    if (mode[0] == 'w')
 +    {
 +        make_backup(file);
 +    }
 +    where();
 +
 +    bRead = (mode[0] == 'r' && mode[1] != '+');
 +    strcpy(buf, file);
 +    if (!bRead || gmx_fexist(buf))
 +    {
 +        if ((ff = fopen(buf, mode)) == NULL)
 +        {
 +            gmx_file(buf);
 +        }
 +        where();
 +        /* Check whether we should be using buffering (default) or not
 +         * (for debugging)
 +         */
 +        if (bUnbuffered || ((bufsize = getenv("LOG_BUFS")) != NULL))
 +        {
 +            /* Check whether to use completely unbuffered */
 +            if (bUnbuffered)
 +            {
 +                bs = 0;
 +            }
 +            else
 +            {
 +                bs = strtol(bufsize, NULL, 10);
 +            }
 +            if (bs <= 0)
 +            {
 +                setbuf(ff, NULL);
 +            }
 +            else
 +            {
 +                snew(ptr, bs+8);
 +                if (setvbuf(ff, ptr, _IOFBF, bs) != 0)
 +                {
 +                    gmx_file("Buffering File");
 +                }
 +            }
 +        }
 +        where();
 +    }
 +    else
 +    {
 +        sprintf(buf, "%s.Z", file);
 +        if (gmx_fexist(buf))
 +        {
 +            ff = uncompress(buf, mode);
 +        }
 +        else
 +        {
 +            sprintf(buf, "%s.gz", file);
 +            if (gmx_fexist(buf))
 +            {
 +                ff = gunzip(buf, mode);
 +            }
 +            else
 +            {
 +                gmx_file(file);
 +            }
 +        }
 +    }
 +    return ff;
 +#endif
 +}
 +
 +/* Our own implementation of dirent-like functionality to scan directories. */
 +struct gmx_directory
 +{
 +#ifdef HAVE_DIRENT_H
 +    DIR  *               dirent_handle;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    intptr_t             windows_handle;
 +    struct _finddata_t   finddata;
 +    int                  first;
 +#else
 +    int                  dummy;
 +#endif
 +};
 +
 +
 +int
 +gmx_directory_open(gmx_directory_t *p_gmxdir, const char *dirname)
 +{
 +    struct gmx_directory *  gmxdir;
 +    int                     rc;
 +
 +    snew(gmxdir, 1);
 +
 +    *p_gmxdir = gmxdir;
 +
 +#ifdef HAVE_DIRENT_H
 +    if ( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
 +    {
 +        rc = 0;
 +    }
 +    else
 +    {
 +        sfree(gmxdir);
 +        *p_gmxdir = NULL;
 +        rc        = EINVAL;
 +    }
 +#elif (defined GMX_NATIVE_WINDOWS)
 +
 +    if (dirname != NULL && strlen(dirname) > 0)
 +    {
 +        char *     tmpname;
 +        size_t     namelength;
 +        int        len;
 +
 +        len = strlen(dirname);
 +        snew(tmpname, len+3);
 +
 +        strncpy(tmpname, dirname, len+1);
 +
 +        /* Remove possible trailing directory separator */
 +        if (tmpname[len] == '/' || tmpname[len] == '\\')
 +        {
 +            tmpname[len] = '\0';
 +        }
 +
 +        /* Add wildcard */
 +        strcat(tmpname, "/*");
 +
 +        gmxdir->first = 1;
 +        if ( (gmxdir->windows_handle = _findfirst(tmpname, &gmxdir->finddata)) > 0L)
 +        {
 +            rc = 0;
 +        }
 +        else
 +        {
 +            if (errno == EINVAL)
 +            {
 +                sfree(gmxdir);
 +                *p_gmxdir = NULL;
 +                rc        = EINVAL;
 +            }
 +            else
 +            {
 +                rc        = 0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        rc = EINVAL;
 +    }
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
 +              "In the very unlikely event this is not a compile-time mistake you could consider\n"
 +              "implementing support for your platform in futil.c, but contact the developers\n"
 +              "to make sure it's really necessary!\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int
 +gmx_directory_nextfile(gmx_directory_t gmxdir, char *name, int maxlength_name)
 +{
 +    int                     rc;
 +
 +#ifdef HAVE_DIRENT_H
 +
 +    struct dirent *         direntp_large;
 +    struct dirent *         p;
 +
 +
 +    if (gmxdir != NULL && gmxdir->dirent_handle != NULL)
 +    {
 +        /* On some platforms no space is present for d_name in dirent.
 +         * Since d_name is guaranteed to be the last entry, allocating
 +         * extra space for dirent will allow more size for d_name.
 +         * GMX_MAX_PATH should always be >= the max possible d_name.
 +         */
 +        smalloc(direntp_large, sizeof(*direntp_large) + GMX_PATH_MAX);
 +        rc = readdir_r(gmxdir->dirent_handle, direntp_large, &p);
 +
 +        if (p != NULL && rc == 0)
 +        {
 +            strncpy(name, direntp_large->d_name, maxlength_name);
 +        }
 +        else
 +        {
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        sfree(direntp_large);
 +    }
 +    else
 +    {
 +        name[0] = '\0';
 +        rc      = EINVAL;
 +    }
 +
 +#elif (defined GMX_NATIVE_WINDOWS)
 +
 +    if (gmxdir != NULL)
 +    {
 +        if (gmxdir->windows_handle <= 0)
 +        {
 +
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        else if (gmxdir->first == 1)
 +        {
 +            strncpy(name, gmxdir->finddata.name, maxlength_name);
 +            rc            = 0;
 +            gmxdir->first = 0;
 +        }
 +        else
 +        {
 +            if (_findnext(gmxdir->windows_handle, &gmxdir->finddata) == 0)
 +            {
 +                strncpy(name, gmxdir->finddata.name, maxlength_name);
 +                rc      = 0;
 +            }
 +            else
 +            {
 +                name[0] = '\0';
 +                rc      = ENOENT;
 +            }
 +        }
 +    }
 +
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int
 +gmx_directory_close(gmx_directory_t gmxdir)
 +{
 +    int                     rc;
 +#ifdef HAVE_DIRENT_H
 +    rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +
 +    sfree(gmxdir);
 +    return rc;
 +}
 +
 +
 +static gmx_bool search_subdirs(const char *parent, char *libdir)
 +{
 +    char    *ptr;
 +    gmx_bool found;
 +
 +    /* Search a few common subdirectory names for the gromacs library dir */
 +    sprintf(libdir, "%s%cshare%ctop%cgurgle.dat", parent,
 +            DIR_SEPARATOR, DIR_SEPARATOR, DIR_SEPARATOR);
 +    found = gmx_fexist(libdir);
 +    if (!found)
 +    {
 +        sprintf(libdir, "%s%c%s%cgurgle.dat", parent,
 +                DIR_SEPARATOR, GMXLIB_SEARCH_DIR, DIR_SEPARATOR);
 +        found = gmx_fexist(libdir);
 +    }
 +
 +    /* Remove the gurgle.dat part from libdir if we found something */
 +    if (found)
 +    {
 +        ptr  = strrchr(libdir, DIR_SEPARATOR); /* slash or backslash always present, no check necessary */
 +        *ptr = '\0';
 +    }
 +    return found;
 +}
 +
 +
 +void get_libdir(char *libdir)
 +{
 +    // TODO: There is a potential buffer overrun in the way libdir is passed in.
 +    try
 +    {
 +        std::string fullPath = gmx::ProgramInfo::getInstance().fullBinaryPath();
 +
 +        // If running directly from the build tree, try to use the source
 +        // directory.
 +#if (defined CMAKE_SOURCE_DIR && defined CMAKE_BINARY_DIR)
 +        // TODO: Consider adding Path::startsWith(), as this may not work as
 +        // expected.
 +        if (gmx::startsWith(fullPath, CMAKE_BINARY_DIR))
 +        {
 +            if (search_subdirs(CMAKE_SOURCE_DIR, libdir))
 +            {
 +                return;
 +            }
 +        }
 +#endif
 +
 +        // Remove the executable name
 +        fullPath = gmx::Path::splitToPathAndFilename(fullPath).first;
 +        // Now we have the full path to the gromacs executable.
 +        // Use it to find the library dir.
 +        while (!fullPath.empty())
 +        {
 +            if (search_subdirs(fullPath.c_str(), libdir))
 +            {
 +                return;
 +            }
 +            fullPath = gmx::Path::splitToPathAndFilename(fullPath).first;
 +        }
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +
 +    /* End of smart searching. If we didn't find it in our parent tree,
 +     * or if the program name wasn't set, at least try some standard
 +     * locations before giving up, in case we are running from e.g.
 +     * a users home directory. This only works on unix or cygwin...
 +     */
 +    bool found = false;
 +#ifndef GMX_NATIVE_WINDOWS
 +    if (!found)
 +    {
 +        found = search_subdirs("/usr/local", libdir);
 +    }
 +    if (!found)
 +    {
 +        found = search_subdirs("/usr", libdir);
 +    }
 +    if (!found)
 +    {
 +        found = search_subdirs("/opt", libdir);
 +    }
 +#endif
 +    if (!found)
 +    {
 +        strcpy(libdir, GMXLIB_FALLBACK);
 +    }
 +}
 +
 +
 +char *low_gmxlibfn(const char *file, gmx_bool bAddCWD, gmx_bool bFatal)
 +{
 +    char    *ret;
 +    char    *lib, *dir;
 +    char     buf[1024];
 +    char     libpath[GMX_PATH_MAX];
 +    gmx_bool env_is_set = FALSE;
 +    char    *s, tmppath[GMX_PATH_MAX];
 +
 +    /* GMXLIB can be a path now */
 +    lib = getenv("GMXLIB");
 +    if (lib != NULL)
 +    {
 +        env_is_set = TRUE;
 +        strncpy(libpath, lib, GMX_PATH_MAX);
 +    }
 +    else
 +    {
 +        get_libdir(libpath);
 +    }
 +
 +    ret = NULL;
 +    if (bAddCWD && gmx_fexist(file))
 +    {
 +        ret = gmx_strdup(file);
 +    }
 +    else
 +    {
 +        strncpy(tmppath, libpath, GMX_PATH_MAX);
 +        s = tmppath;
 +        while (ret == NULL && (dir = gmx_strsep(&s, PATH_SEPARATOR)) != NULL)
 +        {
 +            sprintf(buf, "%s%c%s", dir, DIR_SEPARATOR, file);
 +            if (gmx_fexist(buf))
 +            {
 +                ret = gmx_strdup(buf);
 +            }
 +        }
 +        if (ret == NULL && bFatal)
 +        {
 +            if (env_is_set)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin your GMXLIB path.",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +            else
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin default directories.\n"
 +                          "(You can set the directories to search with the GMXLIB path variable)",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +
 +FILE *low_libopen(const char *file, gmx_bool bFatal)
 +{
 +    FILE *ff;
 +    char *fn;
 +
 +    fn = low_gmxlibfn(file, TRUE, bFatal);
 +
 +    if (fn == NULL)
 +    {
 +        ff = NULL;
 +    }
 +    else
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "Opening library file %s\n", fn);
 +        }
 +        ff = fopen(fn, "r");
 +    }
 +    sfree(fn);
 +
 +    return ff;
 +}
 +
 +char *gmxlibfn(const char *file)
 +{
 +    return low_gmxlibfn(file, TRUE, TRUE);
 +}
 +
 +FILE *libopen(const char *file)
 +{
 +    return low_libopen(file, TRUE);
 +}
 +
 +void gmx_tmpnam(char *buf)
 +{
 +    int i, len, fd;
 +
 +    if ((len = strlen(buf)) < 7)
 +    {
 +        gmx_fatal(FARGS, "Buf passed to gmx_tmpnam must be at least 7 bytes long");
 +    }
 +    for (i = len-6; (i < len); i++)
 +    {
 +        buf[i] = 'X';
 +    }
 +    /* mktemp is dangerous and we should use mkstemp instead, but
 +     * since windows doesnt support it we have to separate the cases.
 +     * 20090307: mktemp deprecated, use iso c++ _mktemp instead.
 +     */
 +#ifdef GMX_NATIVE_WINDOWS
 +    _mktemp(buf);
 +#else
 +    fd = mkstemp(buf);
 +
 +    switch (fd)
 +    {
 +        case EINVAL:
 +            gmx_fatal(FARGS, "Invalid template %s for mkstemp", buf);
 +            break;
 +        case EEXIST:
 +            gmx_fatal(FARGS, "mkstemp created existing file", buf);
 +            break;
 +        case EACCES:
 +            gmx_fatal(FARGS, "Permission denied for opening %s", buf);
 +            break;
 +        default:
 +            break;
 +    }
 +    close(fd);
 +#endif
 +    /* name in Buf should now be OK */
 +}
 +
 +int gmx_truncatefile(char *path, gmx_off_t length)
 +{
 +#ifdef _MSC_VER
 +    /* Microsoft visual studio does not have "truncate" */
 +    HANDLE        fh;
 +    LARGE_INTEGER win_length;
 +
 +    win_length.QuadPart = length;
 +
 +    fh = CreateFile(path, GENERIC_READ | GENERIC_WRITE, 0, NULL,
 +                    OPEN_EXISTING, 0, NULL);
 +    SetFilePointerEx(fh, win_length, NULL, FILE_BEGIN);
 +    SetEndOfFile(fh);
 +    CloseHandle(fh);
 +
 +    return 0;
 +#else
 +    return truncate(path, length);
 +#endif
 +}
 +
 +
 +int gmx_file_rename(const char *oldname, const char *newname)
 +{
 +#ifndef GMX_NATIVE_WINDOWS
 +    /* under unix, rename() is atomic (at least, it should be). */
 +    return rename(oldname, newname);
 +#else
 +    if (MoveFileEx(oldname, newname,
 +                   MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
 +    {
 +        return 0;
 +    }
 +    else
 +    {
 +        return 1;
 +    }
 +#endif
 +}
 +
 +int gmx_file_copy(const char *oldname, const char *newname, gmx_bool copy_if_empty)
 +{
 +/* the full copy buffer size: */
 +#define FILECOPY_BUFSIZE (1<<16)
 +    FILE *in  = NULL;
 +    FILE *out = NULL;
 +    char *buf;
 +
 +    snew(buf, FILECOPY_BUFSIZE);
 +
 +    in = fopen(oldname, "rb");
 +    if (!in)
 +    {
 +        goto error;
 +    }
 +
 +    /* If we don't copy when empty, we postpone opening the file
 +       until we're actually ready to write. */
 +    if (copy_if_empty)
 +    {
 +        out = fopen(newname, "wb");
 +        if (!out)
 +        {
 +            goto error;
 +        }
 +    }
 +
 +    while (!feof(in))
 +    {
 +        size_t nread;
 +
 +        nread = fread(buf, sizeof(char), FILECOPY_BUFSIZE, in);
 +        if (nread > 0)
 +        {
 +            size_t ret;
 +            if (!out)
 +            {
 +                /* so this is where we open when copy_if_empty is false:
 +                   here we know we read something. */
 +                out = fopen(newname, "wb");
 +                if (!out)
 +                {
 +                    goto error;
 +                }
 +            }
 +            ret = fwrite(buf, sizeof(char), nread, out);
 +            if (ret != nread)
 +            {
 +                goto error;
 +            }
 +        }
 +        if (ferror(in))
 +        {
 +            goto error;
 +        }
 +    }
 +    sfree(buf);
 +    fclose(in);
 +    fclose(out);
 +    return 0;
 +error:
 +    sfree(buf);
 +    if (in)
 +    {
 +        fclose(in);
 +    }
 +    if (out)
 +    {
 +        fclose(out);
 +    }
 +    return 1;
 +#undef FILECOPY_BUFSIZE
 +}
 +
 +
 +int gmx_fsync(FILE *fp)
 +{
 +    int rc = 0;
 +
 +#ifdef GMX_FAHCORE
 +    /* the fahcore defines its own os-independent fsync */
 +    rc = fah_fsync(fp);
 +#else /* GMX_FAHCORE */
 +    {
 +        int fn = -1;
 +
 +        /* get the file number */
 +#if defined(HAVE_FILENO)
 +        fn = fileno(fp);
 +#elif defined(HAVE__FILENO)
 +        fn = _fileno(fp);
 +#endif
 +
 +        /* do the actual fsync */
 +        if (fn >= 0)
 +        {
 +#if (defined(HAVE_FSYNC))
 +            rc = fsync(fn);
 +#elif (defined(HAVE__COMMIT))
 +            rc = _commit(fn);
 +#endif
 +        }
 +    }
 +#endif /* GMX_FAHCORE */
 +
 +    /* We check for these error codes this way because POSIX requires them
 +       to be defined, and using anything other than macros is unlikely: */
 +#ifdef EINTR
 +    /* we don't want to report an error just because fsync() caught a signal.
 +       For our purposes, we can just ignore this. */
 +    if (rc && errno == EINTR)
 +    {
 +        rc = 0;
 +    }
 +#endif
 +#ifdef EINVAL
 +    /* we don't want to report an error just because we tried to fsync()
 +       stdout, a socket or a pipe. */
 +    if (rc && errno == EINVAL)
 +    {
 +        rc = 0;
 +    }
 +#endif
 +    return rc;
 +}
 +
 +void gmx_chdir(const char *directory)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    int rc = _chdir(directory);
 +#else
 +    int rc = chdir(directory);
 +#endif
 +    if (rc != 0)
 +    {
 +        gmx_fatal(FARGS, "Cannot change directory to '%s'. Reason: %s",
 +                  directory, strerror(errno));
 +    }
 +}
 +
 +void gmx_getcwd(char *buffer, size_t size)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    char *pdum = _getcwd(buffer, size);
 +#else
 +    char *pdum = getcwd(buffer, size);
 +#endif
 +    if (pdum == NULL)
 +    {
 +        gmx_fatal(FARGS, "Cannot get working directory. Reason: %s",
 +                  strerror(errno));
 +    }
 +}
diff --cc src/gromacs/fileio/trajectory_writing.c
index e109fc75ed,0000000000..d148c03574
mode 100644,000000..100644
--- a/src/gromacs/fileio/trajectory_writing.c
+++ b/src/gromacs/fileio/trajectory_writing.c
@@@ -1,184 -1,0 +1,188 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "sim_util.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "trajectory_writing.h"
 +#include "mdoutf.h"
 +
 +#include "gromacs/timing/wallcycle.h"
 +
 +void
 +do_trajectory_writing(FILE           *fplog,
 +                      t_commrec      *cr,
 +                      int             nfile,
 +                      const t_filenm  fnm[],
 +                      gmx_int64_t     step,
 +                      gmx_int64_t     step_rel,
 +                      double          t,
 +                      t_inputrec     *ir,
 +                      t_state        *state,
 +                      t_state        *state_global,
 +                      gmx_mtop_t     *top_global,
 +                      t_forcerec     *fr,
 +                      gmx_update_t    upd,
 +                      gmx_mdoutf_t   *outf,
 +                      t_mdebin       *mdebin,
 +                      gmx_ekindata_t *ekind,
 +                      rvec           *f,
 +                      rvec           *f_global,
 +                      gmx_wallcycle_t wcycle,
 +                      gmx_rng_t       mcrng,
 +                      int            *nchkpt,
 +                      gmx_bool        bCPT,
 +                      gmx_bool        bRerunMD,
 +                      gmx_bool        bLastStep,
 +                      gmx_bool        bDoConfOut,
 +                      gmx_bool        bSumEkinhOld
 +                      )
 +{
 +    int   mdof_flags;
 +    int   n_xtc    = -1;
 +    rvec *x_xtc    = NULL;
 +
 +    mdof_flags = 0;
 +    if (do_per_step(step, ir->nstxout))
 +    {
 +        mdof_flags |= MDOF_X;
 +    }
 +    if (do_per_step(step, ir->nstvout))
 +    {
 +        mdof_flags |= MDOF_V;
 +    }
 +    if (do_per_step(step, ir->nstfout))
 +    {
 +        mdof_flags |= MDOF_F;
 +    }
 +    if (do_per_step(step, ir->nstxtcout))
 +    {
 +        mdof_flags |= MDOF_XTC;
 +    }
 +    if (bCPT)
 +    {
 +        mdof_flags |= MDOF_CPT;
 +    }
 +    ;
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +    if (bLastStep)
 +    {
 +        /* Enforce writing positions and velocities at end of run */
 +        mdof_flags |= (MDOF_X | MDOF_V);
 +    }
 +#endif
 +#ifdef GMX_FAHCORE
 +    if (MASTER(cr))
 +    {
 +        fcReportProgress( ir->nsteps, step );
 +    }
 +
++#if defined(__native_client__)
++    fcCheckin(MASTER(cr));
++#endif
++
 +    /* sync bCPT and fc record-keeping */
 +    if (bCPT && MASTER(cr))
 +    {
 +        fcRequestCheckPoint();
 +    }
 +#endif
 +
 +    if (mdof_flags != 0)
 +    {
 +        wallcycle_start(wcycle, ewcTRAJ);
 +        if (bCPT)
 +        {
 +            if (state->flags & (1<<estLD_RNG))
 +            {
 +                get_stochd_state(upd, state);
 +            }
 +            if (state->flags  & (1<<estMC_RNG))
 +            {
 +                get_mc_state(mcrng, state);
 +            }
 +            if (MASTER(cr))
 +            {
 +                if (bSumEkinhOld)
 +                {
 +                    state_global->ekinstate.bUpToDate = FALSE;
 +                }
 +                else
 +                {
 +                    update_ekinstate(&state_global->ekinstate, ekind);
 +                    state_global->ekinstate.bUpToDate = TRUE;
 +                }
 +                update_energyhistory(&state_global->enerhist, mdebin);
 +            }
 +        }
 +        write_traj(fplog, cr, outf, mdof_flags, top_global,
 +                   step, t, state, state_global, f, f_global, &n_xtc, &x_xtc);
 +        if (bCPT)
 +        {
 +            (*nchkpt)++;
 +            bCPT = FALSE;
 +        }
 +        debug_gmx();
 +        if (bLastStep && step_rel == ir->nsteps &&
 +            bDoConfOut && MASTER(cr) &&
 +            !bRerunMD)
 +        {
 +            /* x and v have been collected in write_traj,
 +             * because a checkpoint file will always be written
 +             * at the last step.
 +             */
 +            fprintf(stderr, "\nWriting final coordinates.\n");
 +            if (fr->bMolPBC)
 +            {
 +                /* Make molecules whole only for confout writing */
 +                do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x);
 +            }
 +            write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm),
 +                                *top_global->name, top_global,
 +                                state_global->x, state_global->v,
 +                                ir->ePBC, state->box);
 +            debug_gmx();
 +        }
 +        wallcycle_stop(wcycle, ewcTRAJ);
 +    }
 +}
diff --cc src/gromacs/gmxlib/checkpoint.c
index fe19557760,0000000000..5a3fb148b0
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/checkpoint.c
+++ b/src/gromacs/gmxlib/checkpoint.c
@@@ -1,2561 -1,0 +1,2567 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 2008,2009,2010,2011,2012,2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +
 +/* The source code in this file should be thread-safe.
 +   Please keep it that way. */
 +
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +/* _chsize_s */
 +#include <io.h>
 +#include <sys/locking.h>
 +#endif
 +
 +#include "copyrite.h"
 +#include "names.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "gmx_random.h"
 +#include "checkpoint.h"
++#include "main.h"
 +#include "string2.h"
 +#include <fcntl.h>
 +
 +#include "gromacs/fileio/filenm.h"
 +#include "gromacs/fileio/futil.h"
 +#include "gromacs/fileio/gmxfio.h"
 +#include "gromacs/fileio/xdrf.h"
 +#include "gromacs/fileio/xdr_datatype.h"
 +
 +#include "buildinfo.h"
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#define CPT_MAGIC1 171817
 +#define CPT_MAGIC2 171819
 +#define CPTSTRLEN 1024
 +
 +#ifdef GMX_DOUBLE
 +#define GMX_CPT_BUILD_DP 1
 +#else
 +#define GMX_CPT_BUILD_DP 0
 +#endif
 +
 +/* cpt_version should normally only be changed
 + * when the header of footer format changes.
 + * The state data format itself is backward and forward compatible.
 + * But old code can not read a new entry that is present in the file
 + * (but can read a new format when new entries are not present).
 + */
 +static const int cpt_version = 15;
 +
 +
 +const char *est_names[estNR] =
 +{
 +    "FE-lambda",
 +    "box", "box-rel", "box-v", "pres_prev",
 +    "nosehoover-xi", "thermostat-integral",
 +    "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
 +    "disre_initf", "disre_rm3tav",
 +    "orire_initf", "orire_Dtav",
 +    "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev", "fep_state", "MC-rng", "MC-rng-i"
 +};
 +
 +enum {
 +    eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR
 +};
 +
 +const char *eeks_names[eeksNR] =
 +{
 +    "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
 +    "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC", "Vscale_NHC", "Ekin_Total"
 +};
 +
 +enum {
 +    eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
 +    eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
 +    eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM,
 +    eenhENERGY_DELTA_H_NN,
 +    eenhENERGY_DELTA_H_LIST,
 +    eenhENERGY_DELTA_H_STARTTIME,
 +    eenhENERGY_DELTA_H_STARTLAMBDA,
 +    eenhNR
 +};
 +
 +const char *eenh_names[eenhNR] =
 +{
 +    "energy_n", "energy_aver", "energy_sum", "energy_nsum",
 +    "energy_sum_sim", "energy_nsum_sim",
 +    "energy_nsteps", "energy_nsteps_sim",
 +    "energy_delta_h_nn",
 +    "energy_delta_h_list",
 +    "energy_delta_h_start_time",
 +    "energy_delta_h_start_lambda"
 +};
 +
 +/* free energy history variables -- need to be preserved over checkpoint */
 +enum {
 +    edfhBEQUIL, edfhNATLAMBDA, edfhWLHISTO, edfhWLDELTA, edfhSUMWEIGHTS, edfhSUMDG, edfhSUMMINVAR, edfhSUMVAR,
 +    edfhACCUMP, edfhACCUMM, edfhACCUMP2, edfhACCUMM2, edfhTIJ, edfhTIJEMP, edfhNR
 +};
 +/* free energy history variable names  */
 +const char *edfh_names[edfhNR] =
 +{
 +    "bEquilibrated", "N_at_state", "Wang-Landau Histogram", "Wang-Landau Delta", "Weights", "Free Energies", "minvar", "variance",
 +    "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
 +};
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +static int
 +gmx_wintruncate(const char *filename, __int64 size)
 +{
 +#ifdef GMX_FAHCORE
 +    /*we do this elsewhere*/
 +    return 0;
 +#else
 +    FILE *fp;
 +    int   rc;
 +
 +    fp = fopen(filename, "rb+");
 +
 +    if (fp == NULL)
 +    {
 +        return -1;
 +    }
 +
 +    return _chsize_s( fileno(fp), size);
 +#endif
 +}
 +#endif
 +
 +
 +enum {
 +    ecprREAL, ecprRVEC, ecprMATRIX
 +};
 +
 +enum {
 +    cptpEST, cptpEEKS, cptpEENH, cptpEDFH
 +};
 +/* enums for the different components of checkpoint variables, replacing the hard coded ones.
 +   cptpEST - state variables.
 +   cptpEEKS - Kinetic energy state variables.
 +   cptpEENH - Energy history state variables.
 +   cptpEDFH - free energy history variables.
 + */
 +
 +
 +static const char *st_names(int cptp, int ecpt)
 +{
 +    switch (cptp)
 +    {
 +        case cptpEST: return est_names [ecpt]; break;
 +        case cptpEEKS: return eeks_names[ecpt]; break;
 +        case cptpEENH: return eenh_names[ecpt]; break;
 +        case cptpEDFH: return edfh_names[ecpt]; break;
 +    }
 +
 +    return NULL;
 +}
 +
 +static void cp_warning(FILE *fp)
 +{
 +    fprintf(fp, "\nWARNING: Checkpoint file is corrupted or truncated\n\n");
 +}
 +
 +static void cp_error()
 +{
 +    gmx_fatal(FARGS, "Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
 +}
 +
 +static void do_cpt_string_err(XDR *xd, gmx_bool bRead, const char *desc, char **s, FILE *list)
 +{
 +    bool_t res = 0;
 +
 +    if (bRead)
 +    {
 +        snew(*s, CPTSTRLEN);
 +    }
 +    res = xdr_string(xd, s, CPTSTRLEN);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list, "%s = %s\n", desc, *s);
 +        sfree(*s);
 +    }
 +}
 +
 +static int do_cpt_int(XDR *xd, const char *desc, int *i, FILE *list)
 +{
 +    bool_t res = 0;
 +
 +    res = xdr_int(xd, i);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        fprintf(list, "%s = %d\n", desc, *i);
 +    }
 +    return 0;
 +}
 +
 +static int do_cpt_u_chars(XDR *xd, const char *desc, int n, unsigned char *i, FILE *list)
 +{
 +    bool_t res = 1;
 +    int    j;
 +    if (list)
 +    {
 +        fprintf(list, "%s = ", desc);
 +    }
 +    for (j = 0; j < n && res; j++)
 +    {
 +        res &= xdr_u_char(xd, &i[j]);
 +        if (list)
 +        {
 +            fprintf(list, "%02x", i[j]);
 +        }
 +    }
 +    if (list)
 +    {
 +        fprintf(list, "\n");
 +    }
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +
 +    return 0;
 +}
 +
 +static void do_cpt_int_err(XDR *xd, const char *desc, int *i, FILE *list)
 +{
 +    if (do_cpt_int(xd, desc, i, list) < 0)
 +    {
 +        cp_error();
 +    }
 +}
 +
 +static void do_cpt_step_err(XDR *xd, const char *desc, gmx_int64_t *i, FILE *list)
 +{
 +    bool_t res = 0;
 +    char   buf[STEPSTRSIZE];
 +
 +    res = xdr_int64(xd, i);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list, "%s = %s\n", desc, gmx_step_str(*i, buf));
 +    }
 +}
 +
 +static void do_cpt_double_err(XDR *xd, const char *desc, double *f, FILE *list)
 +{
 +    bool_t res = 0;
 +
 +    res = xdr_double(xd, f);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list, "%s = %f\n", desc, *f);
 +    }
 +}
 +
 +static void do_cpt_real_err(XDR *xd, real *f)
 +{
 +    bool_t res = 0;
 +
 +#ifdef GMX_DOUBLE
 +    res = xdr_double(xd, f);
 +#else
 +    res = xdr_float(xd, f);
 +#endif
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +}
 +
 +static void do_cpt_n_rvecs_err(XDR *xd, const char *desc, int n, rvec f[], FILE *list)
 +{
 +    int i, j;
 +
 +    for (i = 0; i < n; i++)
 +    {
 +        for (j = 0; j < DIM; j++)
 +        {
 +            do_cpt_real_err(xd, &f[i][j]);
 +        }
 +    }
 +
 +    if (list)
 +    {
 +        pr_rvecs(list, 0, desc, f, n);
 +    }
 +}
 +
 +/* If nval >= 0, nval is used; on read this should match the passed value.
 + * If nval n<0, *nptr is used; on read the value is stored in nptr
 + */
 +static int do_cpte_reals_low(XDR *xd, int cptp, int ecpt, int sflags,
 +                             int nval, int *nptr, real **v,
 +                             FILE *list, int erealtype)
 +{
 +    bool_t  res = 0;
 +#ifndef GMX_DOUBLE
 +    int     dtc = xdr_datatype_float;
 +#else
 +    int     dtc = xdr_datatype_double;
 +#endif
 +    real   *vp, *va = NULL;
 +    float  *vf;
 +    double *vd;
 +    int     nf, dt, i;
 +
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            nf = nval;
 +        }
 +        else
 +        {
 +            if (nptr == NULL)
 +            {
 +                gmx_incons("*ntpr=NULL in do_cpte_reals_low");
 +            }
 +            nf = *nptr;
 +        }
 +    }
 +    res = xdr_int(xd, &nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            if (nf != nval)
 +            {
 +                gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), nval, nf);
 +            }
 +        }
 +        else
 +        {
 +            *nptr = nf;
 +        }
 +    }
 +    dt  = dtc;
 +    res = xdr_int(xd, &dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        fprintf(stderr, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                st_names(cptp, ecpt), xdr_datatype_names[dtc],
 +                xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va, nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v, nf);
 +        }
 +        vp = *v;
 +    }
 +    if (dt == xdr_datatype_float)
 +    {
 +        if (dtc == xdr_datatype_float)
 +        {
 +            vf = (float *)vp;
 +        }
 +        else
 +        {
 +            snew(vf, nf);
 +        }
 +        res = xdr_vector(xd, (char *)vf, nf,
 +                         (unsigned int)sizeof(float), (xdrproc_t)xdr_float);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_float)
 +        {
 +            for (i = 0; i < nf; i++)
 +            {
 +                vp[i] = vf[i];
 +            }
 +            sfree(vf);
 +        }
 +    }
 +    else
 +    {
 +        if (dtc == xdr_datatype_double)
 +        {
 +            vd = (double *)vp;
 +        }
 +        else
 +        {
 +            snew(vd, nf);
 +        }
 +        res = xdr_vector(xd, (char *)vd, nf,
 +                         (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_double)
 +        {
 +            for (i = 0; i < nf; i++)
 +            {
 +                vp[i] = vd[i];
 +            }
 +            sfree(vd);
 +        }
 +    }
 +
 +    if (list)
 +    {
 +        switch (erealtype)
 +        {
 +            case ecprREAL:
 +                pr_reals(list, 0, st_names(cptp, ecpt), vp, nf);
 +                break;
 +            case ecprRVEC:
 +                pr_rvecs(list, 0, st_names(cptp, ecpt), (rvec *)vp, nf/3);
 +                break;
 +            default:
 +                gmx_incons("Unknown checkpoint real type");
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +
 +/* This function stores n along with the reals for reading,
 + * but on reading it assumes that n matches the value in the checkpoint file,
 + * a fatal error is generated when this is not the case.
 + */
 +static int do_cpte_reals(XDR *xd, int cptp, int ecpt, int sflags,
 +                         int n, real **v, FILE *list)
 +{
 +    return do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, v, list, ecprREAL);
 +}
 +
 +/* This function does the same as do_cpte_reals,
 + * except that on reading it ignores the passed value of *n
 + * and stored the value read from the checkpoint file in *n.
 + */
 +static int do_cpte_n_reals(XDR *xd, int cptp, int ecpt, int sflags,
 +                           int *n, real **v, FILE *list)
 +{
 +    return do_cpte_reals_low(xd, cptp, ecpt, sflags, -1, n, v, list, ecprREAL);
 +}
 +
 +static int do_cpte_real(XDR *xd, int cptp, int ecpt, int sflags,
 +                        real *r, FILE *list)
 +{
 +    int n;
 +
 +    return do_cpte_reals_low(xd, cptp, ecpt, sflags, 1, NULL, &r, list, ecprREAL);
 +}
 +
 +static int do_cpte_ints(XDR *xd, int cptp, int ecpt, int sflags,
 +                        int n, int **v, FILE *list)
 +{
 +    bool_t res = 0;
 +    int    dtc = xdr_datatype_int;
 +    int   *vp, *va = NULL;
 +    int    nf, dt, i;
 +
 +    nf  = n;
 +    res = xdr_int(xd, &nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && v != NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
 +    }
 +    dt  = dtc;
 +    res = xdr_int(xd, &dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS, "Type mismatch for state entry %s, code type is %s, file type is %s\n",
 +                  st_names(cptp, ecpt), xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)) || v == NULL)
 +    {
 +        snew(va, nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v, nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd, (char *)vp, nf,
 +                     (unsigned int)sizeof(int), (xdrproc_t)xdr_int);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_ivec(list, 0, st_names(cptp, ecpt), vp, nf, TRUE);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_int(XDR *xd, int cptp, int ecpt, int sflags,
 +                       int *i, FILE *list)
 +{
 +    return do_cpte_ints(xd, cptp, ecpt, sflags, 1, &i, list);
 +}
 +
 +static int do_cpte_doubles(XDR *xd, int cptp, int ecpt, int sflags,
 +                           int n, double **v, FILE *list)
 +{
 +    bool_t  res = 0;
 +    int     dtc = xdr_datatype_double;
 +    double *vp, *va = NULL;
 +    int     nf, dt, i;
 +
 +    nf  = n;
 +    res = xdr_int(xd, &nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
 +    }
 +    dt  = dtc;
 +    res = xdr_int(xd, &dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS, "Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                  st_names(cptp, ecpt), xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va, nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v, nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd, (char *)vp, nf,
 +                     (unsigned int)sizeof(double), (xdrproc_t)xdr_double);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_doubles(list, 0, st_names(cptp, ecpt), vp, nf);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_double(XDR *xd, int cptp, int ecpt, int sflags,
 +                          double *r, FILE *list)
 +{
 +    return do_cpte_doubles(xd, cptp, ecpt, sflags, 1, &r, list);
 +}
 +
 +
 +static int do_cpte_rvecs(XDR *xd, int cptp, int ecpt, int sflags,
 +                         int n, rvec **v, FILE *list)
 +{
 +    int n3;
 +
 +    return do_cpte_reals_low(xd, cptp, ecpt, sflags,
 +                             n*DIM, NULL, (real **)v, list, ecprRVEC);
 +}
 +
 +static int do_cpte_matrix(XDR *xd, int cptp, int ecpt, int sflags,
 +                          matrix v, FILE *list)
 +{
 +    real *vr;
 +    real  ret;
 +
 +    vr  = (real *)&(v[0][0]);
 +    ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
 +                            DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
 +
 +    if (list && ret == 0)
 +    {
 +        pr_rvecs(list, 0, st_names(cptp, ecpt), v, DIM);
 +    }
 +
 +    return ret;
 +}
 +
 +
 +static int do_cpte_nmatrix(XDR *xd, int cptp, int ecpt, int sflags,
 +                           int n, real **v, FILE *list)
 +{
 +    int   i;
 +    real *vr;
 +    real  ret, reti;
 +    char  name[CPTSTRLEN];
 +
 +    ret = 0;
 +    if (v == NULL)
 +    {
 +        snew(v, n);
 +    }
 +    for (i = 0; i < n; i++)
 +    {
 +        reti = 0;
 +        vr   = v[i];
 +        reti = do_cpte_reals_low(xd, cptp, ecpt, sflags, n, NULL, &(v[i]), NULL, ecprREAL);
 +        if (list && reti == 0)
 +        {
 +            sprintf(name, "%s[%d]", st_names(cptp, ecpt), i);
 +            pr_reals(list, 0, name, v[i], n);
 +        }
 +        if (reti == 0)
 +        {
 +            ret = 0;
 +        }
 +    }
 +    return ret;
 +}
 +
 +static int do_cpte_matrices(XDR *xd, int cptp, int ecpt, int sflags,
 +                            int n, matrix **v, FILE *list)
 +{
 +    bool_t  res = 0;
 +    matrix *vp, *va = NULL;
 +    real   *vr;
 +    int     nf, i, j, k;
 +    int     ret;
 +
 +    nf  = n;
 +    res = xdr_int(xd, &nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS, "Count mismatch for state entry %s, code count is %d, file count is %d\n", st_names(cptp, ecpt), n, nf);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va, nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v, nf);
 +        }
 +        vp = *v;
 +    }
 +    snew(vr, nf*DIM*DIM);
 +    for (i = 0; i < nf; i++)
 +    {
 +        for (j = 0; j < DIM; j++)
 +        {
 +            for (k = 0; k < DIM; k++)
 +            {
 +                vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
 +            }
 +        }
 +    }
 +    ret = do_cpte_reals_low(xd, cptp, ecpt, sflags,
 +                            nf*DIM*DIM, NULL, &vr, NULL, ecprMATRIX);
 +    for (i = 0; i < nf; i++)
 +    {
 +        for (j = 0; j < DIM; j++)
 +        {
 +            for (k = 0; k < DIM; k++)
 +            {
 +                vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
 +            }
 +        }
 +    }
 +    sfree(vr);
 +
 +    if (list && ret == 0)
 +    {
 +        for (i = 0; i < nf; i++)
 +        {
 +            pr_rvecs(list, 0, st_names(cptp, ecpt), vp[i], DIM);
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return ret;
 +}
 +
 +static void do_cpt_header(XDR *xd, gmx_bool bRead, int *file_version,
 +                          char **version, char **btime, char **buser, char **bhost,
 +                          int *double_prec,
 +                          char **fprog, char **ftime,
 +                          int *eIntegrator, int *simulation_part,
 +                          gmx_int64_t *step, double *t,
 +                          int *nnodes, int *dd_nc, int *npme,
 +                          int *natoms, int *ngtc, int *nnhpres, int *nhchainlength,
 +                          int *nlambda, int *flags_state,
 +                          int *flags_eks, int *flags_enh, int *flags_dfh,
 +                          int *nED,
 +                          FILE *list)
 +{
 +    bool_t res = 0;
 +    int    magic;
 +    int    idum = 0;
 +    int    i;
 +    char  *fhost;
 +
 +    if (bRead)
 +    {
 +        magic = -1;
 +    }
 +    else
 +    {
 +        magic = CPT_MAGIC1;
 +    }
 +    res = xdr_int(xd, &magic);
 +    if (res == 0)
 +    {
 +        gmx_fatal(FARGS, "The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
 +    }
 +    if (magic != CPT_MAGIC1)
 +    {
 +        gmx_fatal(FARGS, "Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
 +                  "The checkpoint file is corrupted or not a checkpoint file",
 +                  magic, CPT_MAGIC1);
 +    }
 +    if (!bRead)
 +    {
 +        snew(fhost, 255);
- #ifdef HAVE_UNISTD_H
-         if (gethostname(fhost, 255) != 0)
-         {
-             sprintf(fhost, "unknown");
-         }
- #else
-         sprintf(fhost, "unknown");
- #endif
++        gmx_gethostname(fhost, 255);
 +    }
 +    do_cpt_string_err(xd, bRead, "GROMACS version", version, list);
 +    do_cpt_string_err(xd, bRead, "GROMACS build time", btime, list);
 +    do_cpt_string_err(xd, bRead, "GROMACS build user", buser, list);
 +    do_cpt_string_err(xd, bRead, "GROMACS build host", bhost, list);
 +    do_cpt_string_err(xd, bRead, "generating program", fprog, list);
 +    do_cpt_string_err(xd, bRead, "generation time", ftime, list);
 +    *file_version = cpt_version;
 +    do_cpt_int_err(xd, "checkpoint file version", file_version, list);
 +    if (*file_version > cpt_version)
 +    {
 +        gmx_fatal(FARGS, "Attempting to read a checkpoint file of version %d with code of version %d\n", *file_version, cpt_version);
 +    }
 +    if (*file_version >= 13)
 +    {
 +        do_cpt_int_err(xd, "GROMACS double precision", double_prec, list);
 +    }
 +    else
 +    {
 +        *double_prec = -1;
 +    }
 +    if (*file_version >= 12)
 +    {
 +        do_cpt_string_err(xd, bRead, "generating host", &fhost, list);
 +        if (list == NULL)
 +        {
 +            sfree(fhost);
 +        }
 +    }
 +    do_cpt_int_err(xd, "#atoms", natoms, list);
 +    do_cpt_int_err(xd, "#T-coupling groups", ngtc, list);
 +    if (*file_version >= 10)
 +    {
 +        do_cpt_int_err(xd, "#Nose-Hoover T-chains", nhchainlength, list);
 +    }
 +    else
 +    {
 +        *nhchainlength = 1;
 +    }
 +    if (*file_version >= 11)
 +    {
 +        do_cpt_int_err(xd, "#Nose-Hoover T-chains for barostat ", nnhpres, list);
 +    }
 +    else
 +    {
 +        *nnhpres = 0;
 +    }
 +    if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd, "# of total lambda states ", nlambda, list);
 +    }
 +    else
 +    {
 +        *nlambda = 0;
 +    }
 +    do_cpt_int_err(xd, "integrator", eIntegrator, list);
 +    if (*file_version >= 3)
 +    {
 +        do_cpt_int_err(xd, "simulation part #", simulation_part, list);
 +    }
 +    else
 +    {
 +        *simulation_part = 1;
 +    }
 +    if (*file_version >= 5)
 +    {
 +        do_cpt_step_err(xd, "step", step, list);
 +    }
 +    else
 +    {
 +        do_cpt_int_err(xd, "step", &idum, list);
 +        *step = idum;
 +    }
 +    do_cpt_double_err(xd, "t", t, list);
 +    do_cpt_int_err(xd, "#PP-nodes", nnodes, list);
 +    idum = 1;
 +    do_cpt_int_err(xd, "dd_nc[x]", dd_nc ? &(dd_nc[0]) : &idum, list);
 +    do_cpt_int_err(xd, "dd_nc[y]", dd_nc ? &(dd_nc[1]) : &idum, list);
 +    do_cpt_int_err(xd, "dd_nc[z]", dd_nc ? &(dd_nc[2]) : &idum, list);
 +    do_cpt_int_err(xd, "#PME-only nodes", npme, list);
 +    do_cpt_int_err(xd, "state flags", flags_state, list);
 +    if (*file_version >= 4)
 +    {
 +        do_cpt_int_err(xd, "ekin data flags", flags_eks, list);
 +        do_cpt_int_err(xd, "energy history flags", flags_enh, list);
 +    }
 +    else
 +    {
 +        *flags_eks   = 0;
 +        *flags_enh   = (*flags_state >> (estORIRE_DTAV+1));
 +        *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
 +                                         (1<<(estORIRE_DTAV+2)) |
 +                                         (1<<(estORIRE_DTAV+3))));
 +    }
 +    if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd, "df history flags", flags_dfh, list);
 +    }
 +    else
 +    {
 +        *flags_dfh = 0;
 +    }
 +
 +    if (*file_version >= 15)
 +    {
 +        do_cpt_int_err(xd, "ED data sets", nED, list);
 +    }
 +    else
 +    {
 +        *nED = 0;
 +    }
 +}
 +
 +static int do_cpt_footer(XDR *xd, int file_version)
 +{
 +    bool_t res = 0;
 +    int    magic;
 +
 +    if (file_version >= 2)
 +    {
 +        magic = CPT_MAGIC2;
 +        res   = xdr_int(xd, &magic);
 +        if (res == 0)
 +        {
 +            cp_error();
 +        }
 +        if (magic != CPT_MAGIC2)
 +        {
 +            return -1;
 +        }
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpt_state(XDR *xd, gmx_bool bRead,
 +                        int fflags, t_state *state,
 +                        gmx_bool bReadRNG, FILE *list)
 +{
 +    int    sflags;
 +    int  **rng_p, **rngi_p;
 +    int    i;
 +    int    ret;
 +    int    nnht, nnhtp;
 +
 +    ret = 0;
 +
 +    nnht  = state->nhchainlength*state->ngtc;
 +    nnhtp = state->nhchainlength*state->nnhpres;
 +
 +    if (bReadRNG)
 +    {
 +        rng_p  = (int **)&state->ld_rng;
 +        rngi_p = &state->ld_rngi;
 +    }
 +    else
 +    {
 +        /* Do not read the RNG data */
 +        rng_p  = NULL;
 +        rngi_p = NULL;
 +    }
 +
 +    if (bRead) /* we need to allocate space for dfhist if we are reading */
 +    {
 +        init_df_history(&state->dfhist, state->dfhist.nlambda);
 +    }
 +
 +    /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
 +
 +    sflags = state->flags;
 +    for (i = 0; (i < estNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                case estLAMBDA:  ret      = do_cpte_reals(xd, cptpEST, i, sflags, efptNR, &(state->lambda), list); break;
 +                case estFEPSTATE: ret     = do_cpte_int (xd, cptpEST, i, sflags, &state->fep_state, list); break;
 +                case estBOX:     ret      = do_cpte_matrix(xd, cptpEST, i, sflags, state->box, list); break;
 +                case estBOX_REL: ret      = do_cpte_matrix(xd, cptpEST, i, sflags, state->box_rel, list); break;
 +                case estBOXV:    ret      = do_cpte_matrix(xd, cptpEST, i, sflags, state->boxv, list); break;
 +                case estPRES_PREV: ret    = do_cpte_matrix(xd, cptpEST, i, sflags, state->pres_prev, list); break;
 +                case estSVIR_PREV:  ret   = do_cpte_matrix(xd, cptpEST, i, sflags, state->svir_prev, list); break;
 +                case estFVIR_PREV:  ret   = do_cpte_matrix(xd, cptpEST, i, sflags, state->fvir_prev, list); break;
 +                case estNH_XI:   ret      = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_xi, list); break;
 +                case estNH_VXI:  ret      = do_cpte_doubles(xd, cptpEST, i, sflags, nnht, &state->nosehoover_vxi, list); break;
 +                case estNHPRES_XI:   ret  = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_xi, list); break;
 +                case estNHPRES_VXI:  ret  = do_cpte_doubles(xd, cptpEST, i, sflags, nnhtp, &state->nhpres_vxi, list); break;
 +                case estTC_INT:  ret      = do_cpte_doubles(xd, cptpEST, i, sflags, state->ngtc, &state->therm_integral, list); break;
 +                case estVETA:    ret      = do_cpte_real(xd, cptpEST, i, sflags, &state->veta, list); break;
 +                case estVOL0:    ret      = do_cpte_real(xd, cptpEST, i, sflags, &state->vol0, list); break;
 +                case estX:       ret      = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->x, list); break;
 +                case estV:       ret      = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->v, list); break;
 +                case estSDX:     ret      = do_cpte_rvecs(xd, cptpEST, i, sflags, state->natoms, &state->sd_X, list); break;
 +                case estLD_RNG:  ret      = do_cpte_ints(xd, cptpEST, i, sflags, state->nrng, rng_p, list); break;
 +                case estLD_RNGI: ret      = do_cpte_ints(xd, cptpEST, i, sflags, state->nrngi, rngi_p, list); break;
 +                case estMC_RNG:  ret      = do_cpte_ints(xd, cptpEST, i, sflags, state->nmcrng, (int **)&state->mc_rng, list); break;
 +                case estMC_RNGI: ret      = do_cpte_ints(xd, cptpEST, i, sflags, 1, &state->mc_rngi, list); break;
 +                case estDISRE_INITF:  ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.disre_initf, list); break;
 +                case estDISRE_RM3TAV: ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.ndisrepairs, &state->hist.disre_rm3tav, list); break;
 +                case estORIRE_INITF:  ret = do_cpte_real (xd, cptpEST, i, sflags, &state->hist.orire_initf, list); break;
 +                case estORIRE_DTAV:   ret = do_cpte_n_reals(xd, cptpEST, i, sflags, &state->hist.norire_Dtav, &state->hist.orire_Dtav, list); break;
 +                default:
 +                    gmx_fatal(FARGS, "Unknown state entry %d\n"
 +                              "You are probably reading a new checkpoint file with old code", i);
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_ekinstate(XDR *xd, int fflags, ekinstate_t *ekins,
 +                            FILE *list)
 +{
 +    int  i;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    for (i = 0; (i < eeksNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +
 +                case eeksEKIN_N:     ret = do_cpte_int(xd, cptpEEKS, i, fflags, &ekins->ekin_n, list); break;
 +                case eeksEKINH:     ret  = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh, list); break;
 +                case eeksEKINF:      ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinf, list); break;
 +                case eeksEKINO:      ret = do_cpte_matrices(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinh_old, list); break;
 +                case eeksEKINTOTAL:  ret = do_cpte_matrix(xd, cptpEEKS, i, fflags, ekins->ekin_total, list); break;
 +                case eeksEKINSCALEF: ret = do_cpte_doubles(xd, cptpEEKS, i, fflags, ekins->ekin_n, &ekins->ekinscalef_nhc, list); break;
 +                case eeksVSCALE:     ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->vscale_nhc, list); break;
 +                case eeksEKINSCALEH: ret = do_cpte_doubles(xd, 1, cptpEEKS, fflags, ekins->ekin_n, &ekins->ekinscaleh_nhc, list); break;
 +                case eeksDEKINDL:   ret  = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->dekindl, list); break;
 +                case eeksMVCOS:      ret = do_cpte_real(xd, 1, cptpEEKS, fflags, &ekins->mvcos, list); break;
 +                default:
 +                    gmx_fatal(FARGS, "Unknown ekin data state entry %d\n"
 +                              "You are probably reading a new checkpoint file with old code", i);
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +static int do_cpt_enerhist(XDR *xd, gmx_bool bRead,
 +                           int fflags, energyhistory_t *enerhist,
 +                           FILE *list)
 +{
 +    int  i;
 +    int  j;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    if (bRead)
 +    {
 +        enerhist->nsteps     = 0;
 +        enerhist->nsum       = 0;
 +        enerhist->nsteps_sim = 0;
 +        enerhist->nsum_sim   = 0;
 +        enerhist->dht        = NULL;
 +
 +        if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
 +        {
 +            snew(enerhist->dht, 1);
 +            enerhist->dht->ndh              = NULL;
 +            enerhist->dht->dh               = NULL;
 +            enerhist->dht->start_lambda_set = FALSE;
 +        }
 +    }
 +
 +    for (i = 0; (i < eenhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                case eenhENERGY_N:     ret = do_cpte_int(xd, cptpEENH, i, fflags, &enerhist->nener, list); break;
 +                case eenhENERGY_AVER:  ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_ave, list); break;
 +                case eenhENERGY_SUM:   ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum, list); break;
 +                case eenhENERGY_NSUM:  do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum, list); break;
 +                case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd, cptpEENH, i, fflags, enerhist->nener, &enerhist->ener_sum_sim, list); break;
 +                case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd, eenh_names[i], &enerhist->nsum_sim, list); break;
 +                case eenhENERGY_NSTEPS:     do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps, list); break;
 +                case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd, eenh_names[i], &enerhist->nsteps_sim, list); break;
 +                case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd, eenh_names[i], &(enerhist->dht->nndh), list);
 +                    if (bRead) /* now allocate memory for it */
 +                    {
 +                        snew(enerhist->dht->dh, enerhist->dht->nndh);
 +                        snew(enerhist->dht->ndh, enerhist->dht->nndh);
 +                        for (j = 0; j < enerhist->dht->nndh; j++)
 +                        {
 +                            enerhist->dht->ndh[j] = 0;
 +                            enerhist->dht->dh[j]  = NULL;
 +                        }
 +                    }
 +                    break;
 +                case eenhENERGY_DELTA_H_LIST:
 +                    for (j = 0; j < enerhist->dht->nndh; j++)
 +                    {
 +                        ret = do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
 +                    }
 +                    break;
 +                case eenhENERGY_DELTA_H_STARTTIME:
 +                    ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
 +                case eenhENERGY_DELTA_H_STARTLAMBDA:
 +                    ret = do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
 +                default:
 +                    gmx_fatal(FARGS, "Unknown energy history entry %d\n"
 +                              "You are probably reading a new checkpoint file with old code", i);
 +            }
 +        }
 +    }
 +
 +    if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
 +    {
 +        /* Assume we have an old file format and copy sum to sum_sim */
 +        srenew(enerhist->ener_sum_sim, enerhist->nener);
 +        for (i = 0; i < enerhist->nener; i++)
 +        {
 +            enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
 +        }
 +        fflags |= (1<<eenhENERGY_SUM_SIM);
 +    }
 +
 +    if ( (fflags & (1<<eenhENERGY_NSUM)) &&
 +         !(fflags & (1<<eenhENERGY_NSTEPS)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps = enerhist->nsum;
 +        fflags          |= (1<<eenhENERGY_NSTEPS);
 +    }
 +    if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
 +         !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps_sim = enerhist->nsum_sim;
 +        fflags              |= (1<<eenhENERGY_NSTEPS_SIM);
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_df_hist(XDR *xd, int fflags, df_history_t *dfhist, FILE *list)
 +{
 +    int  i, nlambda;
 +    int  ret;
 +
 +    nlambda = dfhist->nlambda;
 +    ret     = 0;
 +
 +    for (i = 0; (i < edfhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                case edfhBEQUIL:       ret = do_cpte_int(xd, cptpEDFH, i, fflags, &dfhist->bEquil, list); break;
 +                case edfhNATLAMBDA:    ret = do_cpte_ints(xd, cptpEDFH, i, fflags, nlambda, &dfhist->n_at_lam, list); break;
 +                case edfhWLHISTO:      ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->wl_histo, list); break;
 +                case edfhWLDELTA:      ret = do_cpte_real(xd, cptpEDFH, i, fflags, &dfhist->wl_delta, list); break;
 +                case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_weights, list); break;
 +                case edfhSUMDG:        ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_dg, list); break;
 +                case edfhSUMMINVAR:    ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_minvar, list); break;
 +                case edfhSUMVAR:       ret = do_cpte_reals(xd, cptpEDFH, i, fflags, nlambda, &dfhist->sum_variance, list); break;
 +                case edfhACCUMP:       ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p, list); break;
 +                case edfhACCUMM:       ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m, list); break;
 +                case edfhACCUMP2:      ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_p2, list); break;
 +                case edfhACCUMM2:      ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->accum_m2, list); break;
 +                case edfhTIJ:          ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij, list); break;
 +                case edfhTIJEMP:       ret = do_cpte_nmatrix(xd, cptpEDFH, i, fflags, nlambda, dfhist->Tij_empirical, list); break;
 +
 +                default:
 +                    gmx_fatal(FARGS, "Unknown df history entry %d\n"
 +                              "You are probably reading a new checkpoint file with old code", i);
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +/* This function stores the last whole configuration of the reference and
 + * average structure in the .cpt file
 + */
 +static int do_cpt_EDstate(XDR *xd, gmx_bool bRead,
 +                          edsamstate_t *EDstate, FILE *list)
 +{
 +    int  i, j;
 +    int  ret = 0;
 +    char buf[STRLEN];
 +
 +
 +    EDstate->bFromCpt = bRead;
 +
 +    if (EDstate->nED <= 0)
 +    {
 +        return ret;
 +    }
 +
 +    /* When reading, init_edsam has not been called yet,
 +     * so we have to allocate memory first. */
 +    if (bRead)
 +    {
 +        snew(EDstate->nref, EDstate->nED);
 +        snew(EDstate->old_sref, EDstate->nED);
 +        snew(EDstate->nav, EDstate->nED);
 +        snew(EDstate->old_sav, EDstate->nED);
 +    }
 +
 +    /* Read/write the last whole conformation of SREF and SAV for each ED dataset (usually only one) */
 +    for (i = 0; i < EDstate->nED; i++)
 +    {
 +        /* Reference structure SREF */
 +        sprintf(buf, "ED%d # of atoms in reference structure", i+1);
 +        do_cpt_int_err(xd, buf, &EDstate->nref[i], list);
 +        sprintf(buf, "ED%d x_ref", i+1);
 +        if (bRead)
 +        {
 +            snew(EDstate->old_sref[i], EDstate->nref[i]);
 +            do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref[i], list);
 +        }
 +        else
 +        {
 +            do_cpt_n_rvecs_err(xd, buf, EDstate->nref[i], EDstate->old_sref_p[i], list);
 +        }
 +
 +        /* Average structure SAV */
 +        sprintf(buf, "ED%d # of atoms in average structure", i+1);
 +        do_cpt_int_err(xd, buf, &EDstate->nav[i], list);
 +        sprintf(buf, "ED%d x_av", i+1);
 +        if (bRead)
 +        {
 +            snew(EDstate->old_sav[i], EDstate->nav[i]);
 +            do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav[i], list);
 +        }
 +        else
 +        {
 +            do_cpt_n_rvecs_err(xd, buf, EDstate->nav[i], EDstate->old_sav_p[i], list);
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +static int do_cpt_files(XDR *xd, gmx_bool bRead,
 +                        gmx_file_position_t **p_outputfiles, int *nfiles,
 +                        FILE *list, int file_version)
 +{
 +    int                  i, j;
 +    gmx_off_t            offset;
 +    gmx_off_t            mask = 0xFFFFFFFFL;
 +    int                  offset_high, offset_low;
 +    char                *buf;
 +    gmx_file_position_t *outputfiles;
 +
 +    if (do_cpt_int(xd, "number of output files", nfiles, list) != 0)
 +    {
 +        return -1;
 +    }
 +
 +    if (bRead)
 +    {
 +        snew(*p_outputfiles, *nfiles);
 +    }
 +
 +    outputfiles = *p_outputfiles;
 +
 +    for (i = 0; i < *nfiles; i++)
 +    {
 +        /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
 +        if (bRead)
 +        {
 +            do_cpt_string_err(xd, bRead, "output filename", &buf, list);
 +            strncpy(outputfiles[i].filename, buf, CPTSTRLEN-1);
 +            if (list == NULL)
 +            {
 +                sfree(buf);
 +            }
 +
 +            if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
 +            {
 +                return -1;
 +            }
 +            outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
 +        }
 +        else
 +        {
 +            buf = outputfiles[i].filename;
 +            do_cpt_string_err(xd, bRead, "output filename", &buf, list);
 +            /* writing */
 +            offset      = outputfiles[i].offset;
 +            if (offset == -1)
 +            {
 +                offset_low  = -1;
 +                offset_high = -1;
 +            }
 +            else
 +            {
 +                offset_low  = (int) (offset & mask);
 +                offset_high = (int) ((offset >> 32) & mask);
 +            }
 +            if (do_cpt_int(xd, "file_offset_high", &offset_high, list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd, "file_offset_low", &offset_low, list) != 0)
 +            {
 +                return -1;
 +            }
 +        }
 +        if (file_version >= 8)
 +        {
 +            if (do_cpt_int(xd, "file_checksum_size", &(outputfiles[i].chksum_size),
 +                           list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_u_chars(xd, "file_checksum", 16, outputfiles[i].chksum, list) != 0)
 +            {
 +                return -1;
 +            }
 +        }
 +        else
 +        {
 +            outputfiles[i].chksum_size = -1;
 +        }
 +    }
 +    return 0;
 +}
 +
 +
 +void write_checkpoint(const char *fn, gmx_bool bNumberAndKeep,
 +                      FILE *fplog, t_commrec *cr,
 +                      int eIntegrator, int simulation_part,
 +                      gmx_bool bExpanded, int elamstats,
 +                      gmx_int64_t step, double t, t_state *state)
 +{
 +    t_fileio            *fp;
 +    int                  file_version;
 +    char                *version;
 +    char                *btime;
 +    char                *buser;
 +    char                *bhost;
 +    int                  double_prec;
 +    char                *fprog;
 +    char                *fntemp; /* the temporary checkpoint file name */
 +    time_t               now;
 +    char                 timebuf[STRLEN];
 +    int                  nppnodes, npmenodes, flag_64bit;
 +    char                 buf[1024], suffix[5+STEPSTRSIZE], sbuf[STEPSTRSIZE];
 +    gmx_file_position_t *outputfiles;
 +    int                  noutputfiles;
 +    char                *ftime;
 +    int                  flags_eks, flags_enh, flags_dfh, i;
 +    t_fileio            *ret;
 +
 +    if (PAR(cr))
 +    {
 +        if (DOMAINDECOMP(cr))
 +        {
 +            nppnodes  = cr->dd->nnodes;
 +            npmenodes = cr->npmenodes;
 +        }
 +        else
 +        {
 +            nppnodes  = cr->nnodes;
 +            npmenodes = 0;
 +        }
 +    }
 +    else
 +    {
 +        nppnodes  = 1;
 +        npmenodes = 0;
 +    }
 +
++#ifndef GMX_NO_RENAME
 +    /* make the new temporary filename */
 +    snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
 +    strcpy(fntemp, fn);
 +    fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +    sprintf(suffix, "_%s%s", "step", gmx_step_str(step, sbuf));
 +    strcat(fntemp, suffix);
 +    strcat(fntemp, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
- 
++#else
++    /* if we can't rename, we just overwrite the cpt file.
++     * dangerous if interrupted.
++     */
++    snew(fntemp, strlen(fn));
++    strcpy(fntemp, fn);
++#endif
 +    time(&now);
 +    gmx_ctime_r(&now, timebuf, STRLEN);
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog, "Writing checkpoint, step %s at %s\n\n",
 +                gmx_step_str(step, buf), timebuf);
 +    }
 +
 +    /* Get offsets for open files */
 +    gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
 +
 +    fp = gmx_fio_open(fntemp, "w");
 +
 +    if (state->ekinstate.bUpToDate)
 +    {
 +        flags_eks =
 +            ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) |
 +             (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) |
 +             (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
 +    }
 +    else
 +    {
 +        flags_eks = 0;
 +    }
 +
 +    flags_enh = 0;
 +    if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
 +    {
 +        flags_enh |= (1<<eenhENERGY_N);
 +        if (state->enerhist.nsum > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
 +                          (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
 +        }
 +        if (state->enerhist.nsum_sim > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
 +                          (1<<eenhENERGY_NSUM_SIM));
 +        }
 +        if (state->enerhist.dht)
 +        {
 +            flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
 +                           (1<< eenhENERGY_DELTA_H_LIST) |
 +                           (1<< eenhENERGY_DELTA_H_STARTTIME) |
 +                           (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
 +        }
 +    }
 +
 +    if (bExpanded)
 +    {
 +        flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
 +                     (1<<edfhTIJ) | (1<<edfhTIJEMP));
 +        if (EWL(elamstats))
 +        {
 +            flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
 +        }
 +        if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
 +        {
 +            flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
 +                          | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
 +        }
 +    }
 +    else
 +    {
 +        flags_dfh = 0;
 +    }
 +
 +    /* We can check many more things now (CPU, acceleration, etc), but
 +     * it is highly unlikely to have two separate builds with exactly
 +     * the same version, user, time, and build host!
 +     */
 +
 +    version = gmx_strdup(VERSION);
 +    btime   = gmx_strdup(BUILD_TIME);
 +    buser   = gmx_strdup(BUILD_USER);
 +    bhost   = gmx_strdup(BUILD_HOST);
 +
 +    double_prec = GMX_CPT_BUILD_DP;
 +    fprog       = gmx_strdup(Program());
 +
 +    ftime   = &(timebuf[0]);
 +
 +    do_cpt_header(gmx_fio_getxdr(fp), FALSE, &file_version,
 +                  &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
 +                  &eIntegrator, &simulation_part, &step, &t, &nppnodes,
 +                  DOMAINDECOMP(cr) ? cr->dd->nc : NULL, &npmenodes,
 +                  &state->natoms, &state->ngtc, &state->nnhpres,
 +                  &state->nhchainlength, &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
 +                  &state->edsamstate.nED,
 +                  NULL);
 +
 +    sfree(version);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +    sfree(fprog);
 +
 +    if ((do_cpt_state(gmx_fio_getxdr(fp), FALSE, state->flags, state, TRUE, NULL) < 0)        ||
 +        (do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL) < 0) ||
 +        (do_cpt_enerhist(gmx_fio_getxdr(fp), FALSE, flags_enh, &state->enerhist, NULL) < 0)  ||
 +        (do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL) < 0)  ||
 +        (do_cpt_EDstate(gmx_fio_getxdr(fp), FALSE, &state->edsamstate, NULL) < 0)      ||
 +        (do_cpt_files(gmx_fio_getxdr(fp), FALSE, &outputfiles, &noutputfiles, NULL,
 +                      file_version) < 0))
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    do_cpt_footer(gmx_fio_getxdr(fp), file_version);
 +
 +    /* we really, REALLY, want to make sure to physically write the checkpoint,
 +       and all the files it depends on, out to disk. Because we've
 +       opened the checkpoint with gmx_fio_open(), it's in our list
 +       of open files.  */
 +    ret = gmx_fio_all_output_fsync();
 +
 +    if (ret)
 +    {
 +        char buf[STRLEN];
 +        sprintf(buf,
 +                "Cannot fsync '%s'; maybe you are out of disk space?",
 +                gmx_fio_getname(ret));
 +
 +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV) == NULL)
 +        {
 +            gmx_file(buf);
 +        }
 +        else
 +        {
 +            gmx_warning(buf);
 +        }
 +    }
 +
 +    if (gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    /* we don't move the checkpoint if the user specified they didn't want it,
 +       or if the fsyncs failed */
++#ifndef GMX_NO_RENAME
 +    if (!bNumberAndKeep && !ret)
 +    {
 +        if (gmx_fexist(fn))
 +        {
 +            /* Rename the previous checkpoint file */
 +            strcpy(buf, fn);
 +            buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +            strcat(buf, "_prev");
 +            strcat(buf, fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +#ifndef GMX_FAHCORE
 +            /* we copy here so that if something goes wrong between now and
 +             * the rename below, there's always a state.cpt.
 +             * If renames are atomic (such as in POSIX systems),
 +             * this copying should be unneccesary.
 +             */
 +            gmx_file_copy(fn, buf, FALSE);
 +            /* We don't really care if this fails:
 +             * there's already a new checkpoint.
 +             */
 +#else
 +            gmx_file_rename(fn, buf);
 +#endif
 +        }
 +        if (gmx_file_rename(fntemp, fn) != 0)
 +        {
 +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 +        }
 +    }
++#endif  /* GMX_NO_RENAME */
 +
 +    sfree(outputfiles);
 +    sfree(fntemp);
 +
 +#ifdef GMX_FAHCORE
 +    /*code for alternate checkpointing scheme.  moved from top of loop over
 +       steps */
 +    fcRequestCheckPoint();
 +    if (fcCheckPointParallel( cr->nodeid, NULL, 0) == 0)
 +    {
 +        gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", step );
 +    }
 +#endif /* end GMX_FAHCORE block */
 +}
 +
 +static void print_flag_mismatch(FILE *fplog, int sflags, int fflags)
 +{
 +    int i;
 +
 +    fprintf(fplog, "\nState entry mismatch between the simulation and the checkpoint file\n");
 +    fprintf(fplog, "Entries which are not present in the checkpoint file will not be updated\n");
 +    fprintf(fplog, "  %24s    %11s    %11s\n", "", "simulation", "checkpoint");
 +    for (i = 0; i < estNR; i++)
 +    {
 +        if ((sflags & (1<<i)) || (fflags & (1<<i)))
 +        {
 +            fprintf(fplog, "  %24s    %11s    %11s\n",
 +                    est_names[i],
 +                    (sflags & (1<<i)) ? "  present  " : "not present",
 +                    (fflags & (1<<i)) ? "  present  " : "not present");
 +        }
 +    }
 +}
 +
 +static void check_int(FILE *fplog, const char *type, int p, int f, gmx_bool *mm)
 +{
 +    FILE *fp = fplog ? fplog : stderr;
 +
 +    if (p != f)
 +    {
 +        fprintf(fp, "  %s mismatch,\n", type);
 +        fprintf(fp, "    current program: %d\n", p);
 +        fprintf(fp, "    checkpoint file: %d\n", f);
 +        fprintf(fp, "\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_string(FILE *fplog, const char *type, const char *p,
 +                         const char *f, gmx_bool *mm)
 +{
 +    FILE *fp = fplog ? fplog : stderr;
 +
 +    if (strcmp(p, f) != 0)
 +    {
 +        fprintf(fp, "  %s mismatch,\n", type);
 +        fprintf(fp, "    current program: %s\n", p);
 +        fprintf(fp, "    checkpoint file: %s\n", f);
 +        fprintf(fp, "\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_match(FILE *fplog,
 +                        char *version,
 +                        char *btime, char *buser, char *bhost, int double_prec,
 +                        char *fprog,
 +                        t_commrec *cr, gmx_bool bPartDecomp, int npp_f, int npme_f,
 +                        ivec dd_nc, ivec dd_nc_f)
 +{
 +    int      npp;
 +    gmx_bool mm;
 +
 +    mm = FALSE;
 +
 +    check_string(fplog, "Version", VERSION, version, &mm);
 +    check_string(fplog, "Build time", BUILD_TIME, btime, &mm);
 +    check_string(fplog, "Build user", BUILD_USER, buser, &mm);
 +    check_string(fplog, "Build host", BUILD_HOST, bhost, &mm);
 +    check_int   (fplog, "Double prec.", GMX_CPT_BUILD_DP, double_prec, &mm);
 +    check_string(fplog, "Program name", Program(), fprog, &mm);
 +
 +    check_int   (fplog, "#nodes", cr->nnodes, npp_f+npme_f, &mm);
 +    if (bPartDecomp)
 +    {
 +        dd_nc[XX] = 1;
 +        dd_nc[YY] = 1;
 +        dd_nc[ZZ] = 1;
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        check_int (fplog, "#PME-nodes", cr->npmenodes, npme_f, &mm);
 +
 +        npp = cr->nnodes;
 +        if (cr->npmenodes >= 0)
 +        {
 +            npp -= cr->npmenodes;
 +        }
 +        if (npp == npp_f)
 +        {
 +            check_int (fplog, "#DD-cells[x]", dd_nc[XX], dd_nc_f[XX], &mm);
 +            check_int (fplog, "#DD-cells[y]", dd_nc[YY], dd_nc_f[YY], &mm);
 +            check_int (fplog, "#DD-cells[z]", dd_nc[ZZ], dd_nc_f[ZZ], &mm);
 +        }
 +    }
 +
 +    if (mm)
 +    {
 +        fprintf(stderr,
 +                "Gromacs binary or parallel settings not identical to previous run.\n"
 +                "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
 +                fplog ? ",\n see the log file for details" : "");
 +
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Gromacs binary or parallel settings not identical to previous run.\n"
 +                    "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
 +        }
 +    }
 +}
 +
 +static void read_checkpoint(const char *fn, FILE **pfplog,
 +                            t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
 +                            int eIntegrator, int *init_fep_state, gmx_int64_t *step, double *t,
 +                            t_state *state, gmx_bool *bReadRNG, gmx_bool *bReadEkin,
 +                            int *simulation_part,
 +                            gmx_bool bAppendOutputFiles, gmx_bool bForceAppend)
 +{
 +    t_fileio            *fp;
 +    int                  i, j, rc;
 +    int                  file_version;
 +    char                *version, *btime, *buser, *bhost, *fprog, *ftime;
 +    int                  double_prec;
 +    char                 filename[STRLEN], buf[STEPSTRSIZE];
 +    int                  nppnodes, eIntegrator_f, nppnodes_f, npmenodes_f;
 +    ivec                 dd_nc_f;
 +    int                  natoms, ngtc, nnhpres, nhchainlength, nlambda, fflags, flags_eks, flags_enh, flags_dfh;
 +    int                  d;
 +    int                  ret;
 +    gmx_file_position_t *outputfiles;
 +    int                  nfiles;
 +    t_fileio            *chksum_file;
 +    FILE               * fplog = *pfplog;
 +    unsigned char        digest[16];
- #ifndef GMX_NATIVE_WINDOWS
++#if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
 +    struct flock         fl; /* don't initialize here: the struct order is OS
 +                                dependent! */
 +#endif
 +
 +    const char *int_warn =
 +        "WARNING: The checkpoint file was generated with integrator %s,\n"
 +        "         while the simulation uses integrator %s\n\n";
 +    const char *sd_note =
 +        "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
 +        "      while the simulation uses %d SD or BD nodes,\n"
 +        "      continuation will be exact, except for the random state\n\n";
 +
- #ifndef GMX_NATIVE_WINDOWS
++#if !defined __native_client__ && !defined GMX_NATIVE_WINDOWS
 +    fl.l_type   = F_WRLCK;
 +    fl.l_whence = SEEK_SET;
 +    fl.l_start  = 0;
 +    fl.l_len    = 0;
 +    fl.l_pid    = 0;
 +#endif
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        gmx_fatal(FARGS,
 +                  "read_checkpoint not (yet) supported with particle decomposition");
 +    }
 +
 +    fp = gmx_fio_open(fn, "r");
 +    do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
 +                  &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
 +                  &eIntegrator_f, simulation_part, step, t,
 +                  &nppnodes_f, dd_nc_f, &npmenodes_f,
 +                  &natoms, &ngtc, &nnhpres, &nhchainlength, &nlambda,
 +                  &fflags, &flags_eks, &flags_enh, &flags_dfh,
 +                  &state->edsamstate.nED, NULL);
 +
 +    if (bAppendOutputFiles &&
 +        file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
 +    {
 +        gmx_fatal(FARGS, "Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
 +    }
 +
 +    if (cr == NULL || MASTER(cr))
 +    {
 +        fprintf(stderr, "\nReading checkpoint file %s generated: %s\n\n",
 +                fn, ftime);
 +    }
 +
 +    /* This will not be written if we do appending, since fplog is still NULL then */
 +    if (fplog)
 +    {
 +        fprintf(fplog, "\n");
 +        fprintf(fplog, "Reading checkpoint file %s\n", fn);
 +        fprintf(fplog, "  file generated by:     %s\n", fprog);
 +        fprintf(fplog, "  file generated at:     %s\n", ftime);
 +        fprintf(fplog, "  GROMACS build time:    %s\n", btime);
 +        fprintf(fplog, "  GROMACS build user:    %s\n", buser);
 +        fprintf(fplog, "  GROMACS build host:    %s\n", bhost);
 +        fprintf(fplog, "  GROMACS double prec.:  %d\n", double_prec);
 +        fprintf(fplog, "  simulation part #:     %d\n", *simulation_part);
 +        fprintf(fplog, "  step:                  %s\n", gmx_step_str(*step, buf));
 +        fprintf(fplog, "  time:                  %f\n", *t);
 +        fprintf(fplog, "\n");
 +    }
 +
 +    if (natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS, "Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms", natoms, state->natoms);
 +    }
 +    if (ngtc != state->ngtc)
 +    {
 +        gmx_fatal(FARGS, "Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups", ngtc, state->ngtc);
 +    }
 +    if (nnhpres != state->nnhpres)
 +    {
 +        gmx_fatal(FARGS, "Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables", nnhpres, state->nnhpres);
 +    }
 +
 +    if (nlambda != state->dfhist.nlambda)
 +    {
 +        gmx_fatal(FARGS, "Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states", nlambda, state->dfhist.nlambda);
 +    }
 +
 +    init_gtc_state(state, state->ngtc, state->nnhpres, nhchainlength); /* need to keep this here to keep the tpr format working */
 +    /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
 +
 +    if (eIntegrator_f != eIntegrator)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, int_warn, EI(eIntegrator_f), EI(eIntegrator));
 +        }
 +        if (bAppendOutputFiles)
 +        {
 +            gmx_fatal(FARGS,
 +                      "Output file appending requested, but input/checkpoint integrators do not match.\n"
 +                      "Stopping the run to prevent you from ruining all your data...\n"
 +                      "If you _really_ know what you are doing, try with the -noappend option.\n");
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog, int_warn, EI(eIntegrator_f), EI(eIntegrator));
 +        }
 +    }
 +
 +    if (!PAR(cr))
 +    {
 +        nppnodes      = 1;
 +        cr->npmenodes = 0;
 +    }
 +    else if (bPartDecomp)
 +    {
 +        nppnodes      = cr->nnodes;
 +        cr->npmenodes = 0;
 +    }
 +    else if (cr->nnodes == nppnodes_f + npmenodes_f)
 +    {
 +        if (cr->npmenodes < 0)
 +        {
 +            cr->npmenodes = npmenodes_f;
 +        }
 +        nppnodes = cr->nnodes - cr->npmenodes;
 +        if (nppnodes == nppnodes_f)
 +        {
 +            for (d = 0; d < DIM; d++)
 +            {
 +                if (dd_nc[d] == 0)
 +                {
 +                    dd_nc[d] = dd_nc_f[d];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* The number of PP nodes has not been set yet */
 +        nppnodes = -1;
 +    }
 +
 +    if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
 +    {
 +        /* Correct the RNG state size for the number of PP nodes.
 +         * Such assignments should all be moved to one central function.
 +         */
 +        state->nrng  = nppnodes*gmx_rng_n();
 +        state->nrngi = nppnodes;
 +    }
 +
 +    *bReadRNG = TRUE;
 +    if (fflags != state->flags)
 +    {
 +
 +        if (MASTER(cr))
 +        {
 +            if (bAppendOutputFiles)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Output file appending requested, but input and checkpoint states are not identical.\n"
 +                          "Stopping the run to prevent you from ruining all your data...\n"
 +                          "You can try with the -noappend option, and get more info in the log file.\n");
 +            }
 +
 +            if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
 +            {
 +                gmx_fatal(FARGS, "You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
 +            }
 +            else
 +            {
 +                fprintf(stderr,
 +                        "WARNING: The checkpoint state entries do not match the simulation,\n"
 +                        "         see the log file for details\n\n");
 +            }
 +        }
 +
 +        if (fplog)
 +        {
 +            print_flag_mismatch(fplog, state->flags, fflags);
 +        }
 +    }
 +    else
 +    {
 +        if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
 +            nppnodes != nppnodes_f)
 +        {
 +            *bReadRNG = FALSE;
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr, sd_note, nppnodes_f, nppnodes);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog, sd_note, nppnodes_f, nppnodes);
 +            }
 +        }
 +        if (MASTER(cr))
 +        {
 +            check_match(fplog, version, btime, buser, bhost, double_prec, fprog,
 +                        cr, bPartDecomp, nppnodes_f, npmenodes_f, dd_nc, dd_nc_f);
 +        }
 +    }
 +    ret             = do_cpt_state(gmx_fio_getxdr(fp), TRUE, fflags, state, *bReadRNG, NULL);
 +    *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
 +                                            Investigate for 5.0. */
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
 +                  ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
 +
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
 +                          flags_enh, &state->enerhist, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    if (file_version < 6)
 +    {
 +        const char *warn = "Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
 +
 +        fprintf(stderr, "\nWARNING: %s\n\n", warn);
 +        if (fplog)
 +        {
 +            fprintf(fplog, "\nWARNING: %s\n\n", warn);
 +        }
 +        state->enerhist.nsum     = *step;
 +        state->enerhist.nsum_sim = *step;
 +    }
 +
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, NULL, file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    if (gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +
 +    /* If the user wants to append to output files,
 +     * we use the file pointer positions of the output files stored
 +     * in the checkpoint file and truncate the files such that any frames
 +     * written after the checkpoint time are removed.
 +     * All files are md5sum checked such that we can be sure that
 +     * we do not truncate other (maybe imprortant) files.
 +     */
 +    if (bAppendOutputFiles)
 +    {
 +        if (fn2ftp(outputfiles[0].filename) != efLOG)
 +        {
 +            /* make sure first file is log file so that it is OK to use it for
 +             * locking
 +             */
 +            gmx_fatal(FARGS, "The first output file should always be the log "
 +                      "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
 +        }
 +        for (i = 0; i < nfiles; i++)
 +        {
 +            if (outputfiles[i].offset < 0)
 +            {
 +                gmx_fatal(FARGS, "The original run wrote a file called '%s' which "
 +                          "is larger than 2 GB, but mdrun did not support large file"
 +                          " offsets. Can not append. Run mdrun with -noappend",
 +                          outputfiles[i].filename);
 +            }
 +#ifdef GMX_FAHCORE
 +            chksum_file = gmx_fio_open(outputfiles[i].filename, "a");
 +
 +#else
 +            chksum_file = gmx_fio_open(outputfiles[i].filename, "r+");
 +
 +            /* lock log file */
 +            if (i == 0)
 +            {
 +                /* Note that there are systems where the lock operation
 +                 * will succeed, but a second process can also lock the file.
 +                 * We should probably try to detect this.
 +                 */
- #ifndef GMX_NATIVE_WINDOWS
-                 if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
-                     == -1)
- #else
++#if defined __native_client__
++                errno = ENOSYS;
++                if (1)
++
++#elif defined GMX_NATIVE_WINDOWS
 +                if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX) == -1)
++#else
++                if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl) == -1)
 +#endif
 +                {
 +                    if (errno == ENOSYS)
 +                    {
 +                        if (!bForceAppend)
 +                        {
 +                            gmx_fatal(FARGS, "File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr, "\nNOTE: File locking is not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
 +                            if (fplog)
 +                            {
 +                                fprintf(fplog, "\nNOTE: File locking not supported on this system, will not lock %s\n\n", outputfiles[i].filename);
 +                            }
 +                        }
 +                    }
 +                    else if (errno == EACCES || errno == EAGAIN)
 +                    {
 +                        gmx_fatal(FARGS, "Failed to lock: %s. Already running "
 +                                  "simulation?", outputfiles[i].filename);
 +                    }
 +                    else
 +                    {
 +                        gmx_fatal(FARGS, "Failed to lock: %s. %s.",
 +                                  outputfiles[i].filename, strerror(errno));
 +                    }
 +                }
 +            }
 +
 +            /* compute md5 chksum */
 +            if (outputfiles[i].chksum_size != -1)
 +            {
 +                if (gmx_fio_get_file_md5(chksum_file, outputfiles[i].offset,
 +                                         digest) != outputfiles[i].chksum_size) /*at the end of the call the file position is at the end of the file*/
 +                {
 +                    gmx_fatal(FARGS, "Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                              outputfiles[i].chksum_size,
 +                              outputfiles[i].filename);
 +                }
 +            }
 +            if (i == 0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
 +            {
 +                if (gmx_fio_seek(chksum_file, outputfiles[i].offset))
 +                {
 +                    gmx_fatal(FARGS, "Seek error! Failed to truncate log-file: %s.", strerror(errno));
 +                }
 +            }
 +#endif
 +
 +            if (i == 0) /*open log file here - so that lock is never lifted
 +                           after chksum is calculated */
 +            {
 +                *pfplog = gmx_fio_getfp(chksum_file);
 +            }
 +            else
 +            {
 +                gmx_fio_close(chksum_file);
 +            }
 +#ifndef GMX_FAHCORE
 +            /* compare md5 chksum */
 +            if (outputfiles[i].chksum_size != -1 &&
 +                memcmp(digest, outputfiles[i].chksum, 16) != 0)
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug, "chksum for %s: ", outputfiles[i].filename);
 +                    for (j = 0; j < 16; j++)
 +                    {
 +                        fprintf(debug, "%02x", digest[j]);
 +                    }
 +                    fprintf(debug, "\n");
 +                }
 +                gmx_fatal(FARGS, "Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                          outputfiles[i].filename);
 +            }
 +#endif
 +
 +
 +            if (i != 0) /*log file is already seeked to correct position */
 +            {
 +#ifdef GMX_NATIVE_WINDOWS
 +                rc = gmx_wintruncate(outputfiles[i].filename, outputfiles[i].offset);
 +#else
 +                rc = truncate(outputfiles[i].filename, outputfiles[i].offset);
 +#endif
 +                if (rc != 0)
 +                {
 +                    gmx_fatal(FARGS, "Truncation of file %s failed. Cannot do appending because of this failure.", outputfiles[i].filename);
 +                }
 +            }
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +}
 +
 +
 +void load_checkpoint(const char *fn, FILE **fplog,
 +                     t_commrec *cr, gmx_bool bPartDecomp, ivec dd_nc,
 +                     t_inputrec *ir, t_state *state,
 +                     gmx_bool *bReadRNG, gmx_bool *bReadEkin,
 +                     gmx_bool bAppend, gmx_bool bForceAppend)
 +{
 +    gmx_int64_t     step;
 +    double          t;
 +
 +    if (SIMMASTER(cr))
 +    {
 +        /* Read the state from the checkpoint file */
 +        read_checkpoint(fn, fplog,
 +                        cr, bPartDecomp, dd_nc,
 +                        ir->eI, &(ir->fepvals->init_fep_state), &step, &t, state, bReadRNG, bReadEkin,
 +                        &ir->simulation_part, bAppend, bForceAppend);
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(cr->npmenodes), &cr->npmenodes, cr);
 +        gmx_bcast(DIM*sizeof(dd_nc[0]), dd_nc, cr);
 +        gmx_bcast(sizeof(step), &step, cr);
 +        gmx_bcast(sizeof(*bReadRNG), bReadRNG, cr);
 +        gmx_bcast(sizeof(*bReadEkin), bReadEkin, cr);
 +    }
 +    ir->bContinuation    = TRUE;
 +    if (ir->nsteps >= 0)
 +    {
 +        ir->nsteps          += ir->init_step - step;
 +    }
 +    ir->init_step        = step;
 +    ir->simulation_part += 1;
 +}
 +
 +static void read_checkpoint_data(t_fileio *fp, int *simulation_part,
 +                                 gmx_int64_t *step, double *t, t_state *state,
 +                                 gmx_bool bReadRNG,
 +                                 int *nfiles, gmx_file_position_t **outputfiles)
 +{
 +    int                  file_version;
 +    char                *version, *btime, *buser, *bhost, *fprog, *ftime;
 +    int                  double_prec;
 +    int                  eIntegrator;
 +    int                  nppnodes, npme;
 +    ivec                 dd_nc;
 +    int                  flags_eks, flags_enh, flags_dfh;
 +    int                  nfiles_loc;
 +    gmx_file_position_t *files_loc = NULL;
 +    int                  ret;
 +
 +    do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
 +                  &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
 +                  &eIntegrator, simulation_part, step, t, &nppnodes, dd_nc, &npme,
 +                  &state->natoms, &state->ngtc, &state->nnhpres, &state->nhchainlength,
 +                  &(state->dfhist.nlambda), &state->flags, &flags_eks, &flags_enh, &flags_dfh,
 +                  &state->edsamstate.nED, NULL);
 +    ret =
 +        do_cpt_state(gmx_fio_getxdr(fp), TRUE, state->flags, state, bReadRNG, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state->ekinstate, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
 +                          flags_enh, &state->enerhist, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp), flags_dfh, &state->dfhist, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state->edsamstate, NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_files(gmx_fio_getxdr(fp), TRUE,
 +                       outputfiles != NULL ? outputfiles : &files_loc,
 +                       outputfiles != NULL ? nfiles : &nfiles_loc,
 +                       NULL, file_version);
 +    if (files_loc != NULL)
 +    {
 +        sfree(files_loc);
 +    }
 +
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +}
 +
 +void
 +read_checkpoint_state(const char *fn, int *simulation_part,
 +                      gmx_int64_t *step, double *t, t_state *state)
 +{
 +    t_fileio *fp;
 +
 +    fp = gmx_fio_open(fn, "r");
 +    read_checkpoint_data(fp, simulation_part, step, t, state, FALSE, NULL, NULL);
 +    if (gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +}
 +
 +void read_checkpoint_trxframe(t_fileio *fp, t_trxframe *fr)
 +{
 +    /* This next line is nasty because the sub-structures of t_state
 +     * cannot be assumed to be zeroed (or even initialized in ways the
 +     * rest of the code might assume). Using snew would be better, but
 +     * this will all go away for 5.0. */
 +    t_state         state;
 +    int             simulation_part;
 +    gmx_int64_t     step;
 +    double          t;
 +
 +    init_state(&state, 0, 0, 0, 0, 0);
 +
 +    read_checkpoint_data(fp, &simulation_part, &step, &t, &state, FALSE, NULL, NULL);
 +
 +    fr->natoms  = state.natoms;
 +    fr->bTitle  = FALSE;
 +    fr->bStep   = TRUE;
 +    fr->step    = gmx_int64_to_int(step,
 +                                   "conversion of checkpoint to trajectory");
 +    fr->bTime      = TRUE;
 +    fr->time       = t;
 +    fr->bLambda    = TRUE;
 +    fr->lambda     = state.lambda[efptFEP];
 +    fr->fep_state  = state.fep_state;
 +    fr->bAtoms     = FALSE;
 +    fr->bX         = (state.flags & (1<<estX));
 +    if (fr->bX)
 +    {
 +        fr->x     = state.x;
 +        state.x   = NULL;
 +    }
 +    fr->bV      = (state.flags & (1<<estV));
 +    if (fr->bV)
 +    {
 +        fr->v     = state.v;
 +        state.v   = NULL;
 +    }
 +    fr->bF      = FALSE;
 +    fr->bBox    = (state.flags & (1<<estBOX));
 +    if (fr->bBox)
 +    {
 +        copy_mat(state.box, fr->box);
 +    }
 +    done_state(&state);
 +}
 +
 +void list_checkpoint(const char *fn, FILE *out)
 +{
 +    t_fileio            *fp;
 +    int                  file_version;
 +    char                *version, *btime, *buser, *bhost, *fprog, *ftime;
 +    int                  double_prec;
 +    int                  eIntegrator, simulation_part, nppnodes, npme;
 +    gmx_int64_t          step;
 +    double               t;
 +    ivec                 dd_nc;
 +    t_state              state;
 +    int                  flags_eks, flags_enh, flags_dfh;
 +    int                  indent;
 +    int                  i, j;
 +    int                  ret;
 +    gmx_file_position_t *outputfiles;
 +    int                  nfiles;
 +
 +    init_state(&state, -1, -1, -1, -1, 0);
 +
 +    fp = gmx_fio_open(fn, "r");
 +    do_cpt_header(gmx_fio_getxdr(fp), TRUE, &file_version,
 +                  &version, &btime, &buser, &bhost, &double_prec, &fprog, &ftime,
 +                  &eIntegrator, &simulation_part, &step, &t, &nppnodes, dd_nc, &npme,
 +                  &state.natoms, &state.ngtc, &state.nnhpres, &state.nhchainlength,
 +                  &(state.dfhist.nlambda), &state.flags,
 +                  &flags_eks, &flags_enh, &flags_dfh, &state.edsamstate.nED, out);
 +    ret = do_cpt_state(gmx_fio_getxdr(fp), TRUE, state.flags, &state, TRUE, out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp), flags_eks, &state.ekinstate, out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp), TRUE,
 +                          flags_enh, &state.enerhist, out);
 +
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_df_hist(gmx_fio_getxdr(fp),
 +                             flags_dfh, &state.dfhist, out);
 +    }
 +
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_EDstate(gmx_fio_getxdr(fp), TRUE, &state.edsamstate, out);
 +    }
 +
 +    if (ret == 0)
 +    {
 +        do_cpt_files(gmx_fio_getxdr(fp), TRUE, &outputfiles, &nfiles, out, file_version);
 +    }
 +
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_footer(gmx_fio_getxdr(fp), file_version);
 +    }
 +
 +    if (ret)
 +    {
 +        cp_warning(out);
 +    }
 +    if (gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    done_state(&state);
 +}
 +
 +
 +static gmx_bool exist_output_file(const char *fnm_cp, int nfile, const t_filenm fnm[])
 +{
 +    int i;
 +
 +    /* Check if the output file name stored in the checkpoint file
 +     * is one of the output file names of mdrun.
 +     */
 +    i = 0;
 +    while (i < nfile &&
 +           !(is_output(&fnm[i]) && strcmp(fnm_cp, fnm[i].fns[0]) == 0))
 +    {
 +        i++;
 +    }
 +
 +    return (i < nfile && gmx_fexist(fnm_cp));
 +}
 +
 +/* This routine cannot print tons of data, since it is called before the log file is opened. */
 +gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
 +                                         gmx_int64_t *cpt_step, t_commrec *cr,
 +                                         gmx_bool bAppendReq,
 +                                         int nfile, const t_filenm fnm[],
 +                                         const char *part_suffix, gmx_bool *bAddPart)
 +{
 +    t_fileio            *fp;
 +    gmx_int64_t          step = 0;
 +    double               t;
 +    /* This next line is nasty because the sub-structures of t_state
 +     * cannot be assumed to be zeroed (or even initialized in ways the
 +     * rest of the code might assume). Using snew would be better, but
 +     * this will all go away for 5.0. */
 +    t_state              state;
 +    int                  nfiles;
 +    gmx_file_position_t *outputfiles;
 +    int                  nexist, f;
 +    gmx_bool             bAppend;
 +    char                *fn, suf_up[STRLEN];
 +
 +    bAppend = FALSE;
 +
 +    if (SIMMASTER(cr))
 +    {
 +        if (!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename, "r")) ))
 +        {
 +            *simulation_part = 0;
 +        }
 +        else
 +        {
 +            init_state(&state, 0, 0, 0, 0, 0);
 +
 +            read_checkpoint_data(fp, simulation_part, &step, &t, &state, FALSE,
 +                                 &nfiles, &outputfiles);
 +            if (gmx_fio_close(fp) != 0)
 +            {
 +                gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +            }
 +            done_state(&state);
 +
 +            if (bAppendReq)
 +            {
 +                nexist = 0;
 +                for (f = 0; f < nfiles; f++)
 +                {
 +                    if (exist_output_file(outputfiles[f].filename, nfile, fnm))
 +                    {
 +                        nexist++;
 +                    }
 +                }
 +                if (nexist == nfiles)
 +                {
 +                    bAppend = bAppendReq;
 +                }
 +                else if (nexist > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Output file appending has been requested,\n"
 +                            "but some output files listed in the checkpoint file %s\n"
 +                            "are not present or are named differently by the current program:\n",
 +                            filename);
 +                    fprintf(stderr, "output files present:");
 +                    for (f = 0; f < nfiles; f++)
 +                    {
 +                        if (exist_output_file(outputfiles[f].filename,
 +                                              nfile, fnm))
 +                        {
 +                            fprintf(stderr, " %s", outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr, "\n");
 +                    fprintf(stderr, "output files not present or named differently:");
 +                    for (f = 0; f < nfiles; f++)
 +                    {
 +                        if (!exist_output_file(outputfiles[f].filename,
 +                                               nfile, fnm))
 +                        {
 +                            fprintf(stderr, " %s", outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr, "\n");
 +
 +                    gmx_fatal(FARGS, "File appending requested, but only %d of the %d output files are present", nexist, nfiles);
 +                }
 +            }
 +
 +            if (bAppend)
 +            {
 +                if (nfiles == 0)
 +                {
 +                    gmx_fatal(FARGS, "File appending requested, but no output file information is stored in the checkpoint file");
 +                }
 +                fn = outputfiles[0].filename;
 +                if (strlen(fn) < 4 ||
 +                    gmx_strcasecmp(fn+strlen(fn)-4, ftp2ext(efLOG)) == 0)
 +                {
 +                    gmx_fatal(FARGS, "File appending requested, but the log file is not the first file listed in the checkpoint file");
 +                }
 +                /* Set bAddPart to whether the suffix string '.part' is present
 +                 * in the log file name.
 +                 */
 +                strcpy(suf_up, part_suffix);
 +                upstring(suf_up);
 +                *bAddPart = (strstr(fn, part_suffix) != NULL ||
 +                             strstr(fn, suf_up) != NULL);
 +            }
 +
 +            sfree(outputfiles);
 +        }
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(*simulation_part), simulation_part, cr);
 +
 +        if (*simulation_part > 0 && bAppendReq)
 +        {
 +            gmx_bcast(sizeof(bAppend), &bAppend, cr);
 +            gmx_bcast(sizeof(*bAddPart), bAddPart, cr);
 +        }
 +    }
 +    if (NULL != cpt_step)
 +    {
 +        *cpt_step = step;
 +    }
 +
 +    return bAppend;
 +}
diff --cc src/gromacs/gmxlib/copyrite.cpp
index 33d50dbf35,0000000000..514757d63f
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/copyrite.cpp
+++ b/src/gromacs/gmxlib/copyrite.cpp
@@@ -1,730 -1,0 +1,730 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#include "copyrite.h"
 +
 +#ifdef HAVE_CONFIG_H
 +#include "config.h"
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <time.h>
 +
 +#ifdef HAVE_LIBMKL
 +#include <mkl.h>
 +#endif
 +
 +/* This file is completely threadsafe - keep it that way! */
 +
 +#include "gromacs/fileio/futil.h"
 +#include "gromacs/legacyheaders/macros.h"
 +#include "gromacs/legacyheaders/random.h"
 +#include "gromacs/legacyheaders/smalloc.h"
 +#include "gromacs/legacyheaders/strdb.h"
 +#include "gromacs/legacyheaders/string2.h"
 +#include "gromacs/legacyheaders/vec.h"
 +
 +#include "gromacs/fft/fft.h"
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/gmxassert.h"
 +#include "gromacs/utility/programinfo.h"
 +
 +#include "buildinfo.h"
 +
 +static gmx_bool be_cool(void)
 +{
 +    /* Yes, it is bad to check the environment variable every call,
 +     * but we dont call this routine often, and it avoids using
 +     * a mutex for locking the variable...
 +     */
 +#ifdef GMX_COOL_QUOTES
 +    return (getenv("GMX_NO_QUOTES") == NULL);
 +#else
 +    /*be uncool*/
 +    return FALSE;
 +#endif
 +}
 +
 +static void pukeit(const char *db, const char *defstring, char *retstring,
 +                   int retsize, int *cqnum)
 +{
 +    FILE  *fp;
 +    char **help;
 +    int    i, nhlp;
 +    int    seed;
 +
 +    if (be_cool() && ((fp = low_libopen(db, FALSE)) != NULL))
 +    {
 +        nhlp = fget_lines(fp, &help);
 +        /* for libraries we can use the low-level close routines */
 +        ffclose(fp);
 +        seed   = time(NULL);
 +        *cqnum = static_cast<int>(nhlp*rando(&seed));
 +        if (strlen(help[*cqnum]) >= STRLEN)
 +        {
 +            help[*cqnum][STRLEN-1] = '\0';
 +        }
 +        strncpy(retstring, help[*cqnum], retsize);
 +        for (i = 0; (i < nhlp); i++)
 +        {
 +            sfree(help[i]);
 +        }
 +        sfree(help);
 +    }
 +    else
 +    {
 +        *cqnum = -1;
 +        strncpy(retstring, defstring, retsize);
 +    }
 +}
 +
 +void bromacs(char *retstring, int retsize)
 +{
 +    int dum;
 +
 +    pukeit("bromacs.dat",
 +           "Groningen Machine for Chemical Simulation",
 +           retstring, retsize, &dum);
 +}
 +
 +void cool_quote(char *retstring, int retsize, int *cqnum)
 +{
 +    char *tmpstr;
 +    char *ptr;
 +    int   tmpcq, *p;
 +
 +    if (cqnum != NULL)
 +    {
 +        p = cqnum;
 +    }
 +    else
 +    {
 +        p = &tmpcq;
 +    }
 +
 +    /* protect audience from explicit lyrics */
 +    snew(tmpstr, retsize+1);
 +    pukeit("gurgle.dat", "Thanx for Using GROMACS - Have a Nice Day",
 +           tmpstr, retsize-2, p);
 +
 +    if ((ptr = strchr(tmpstr, '_')) != NULL)
 +    {
 +        *ptr = '\0';
 +        ptr++;
 +        sprintf(retstring, "\"%s\" %s", tmpstr, ptr);
 +    }
 +    else
 +    {
 +        strcpy(retstring, tmpstr);
 +    }
 +    sfree(tmpstr);
 +}
 +
 +static void printCopyright(FILE *fp)
 +{
 +    static const char * const CopyrightText[] = {
 +        "GROMACS is written by:",
 +        "Emile Apol, Rossen Apostolov, Herman J.C. Berendsen,",
 +        "Aldert van Buuren, PÃ¤r Bjelkmar, Rudi van Drunen, Anton Feenstra,",
 +        "Gerrit Groenhof, Christoph Junghans, Peter Kasson, Carsten Kutzner",
 +        "Per Larsson, Magnus Lundborg, Pieter Meulenhoff, Teemu Murtola,",
 +        "Szilard Pall, Sander Pronk, Roland Schulz,",
 +        "Michael Shirts, Alfons Sijbers, Peter Tieleman,\n",
 +        "Christian Wennberg, Mark Abraham\n",
 +        "Berk Hess, David van der Spoel, and Erik Lindahl.\n",
 +        "Copyright (c) 1991-2000, University of Groningen, The Netherlands.",
 +        "Copyright (c) 2001-2013, The GROMACS development team at",
 +        "Uppsala University & The Royal Institute of Technology, Sweden.",
 +        "check out http://www.gromacs.org for more information."
 +    };
 +
 +    static const char * const LicenseText[] = {
 +        "GROMACS is free software; you can redistribute it and/or modify it",
 +        "under the terms of the GNU Lesser General Public License",
 +        "as published by the Free Software Foundation; either version 2.1",
 +        "of the License, or (at your option) any later version."
 +    };
 +
 +#define NCR (int)asize(CopyrightText)
 +// FAH has an exception permission from LGPL to allow digital signatures in Gromacs.
 +#ifdef GMX_FAHCORE
 +#define NLICENSE 0
 +#else
 +#define NLICENSE (int)asize(LicenseText)
 +#endif
 +
 +    for (int i = 0; i < NCR; ++i)
 +    {
 +        fprintf(fp, "%s\n", CopyrightText[i]);
 +    }
 +    fprintf(fp, "\n");
 +    for (int i = 0; i < NLICENSE; ++i)
 +    {
 +        fprintf(fp, "%s\n", LicenseText[i]);
 +    }
 +}
 +
 +
 +void gmx_thanx(FILE *fp)
 +{
 +    char cq[1024];
 +    int  cqnum = -1;
 +
 +    /* protect the audience from suggestive discussions */
 +    cool_quote(cq, 1023, &cqnum);
 +
 +    if (cqnum >= 0)
 +    {
 +        fprintf(fp, "\ngcq#%d: %s\n\n", cqnum, cq);
 +    }
 +    else
 +    {
 +        fprintf(fp, "\n%s\n\n", cq);
 +    }
 +}
 +
 +typedef struct {
 +    const char *key;
 +    const char *author;
 +    const char *title;
 +    const char *journal;
 +    int         volume, year;
 +    const char *pages;
 +} t_citerec;
 +
 +void please_cite(FILE *fp, const char *key)
 +{
 +    static const t_citerec citedb[] = {
 +        { "Allen1987a",
 +          "M. P. Allen and D. J. Tildesley",
 +          "Computer simulation of liquids",
 +          "Oxford Science Publications",
 +          1, 1987, "1" },
 +        { "Berendsen95a",
 +          "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
 +          "GROMACS: A message-passing parallel molecular dynamics implementation",
 +          "Comp. Phys. Comm.",
 +          91, 1995, "43-56" },
 +        { "Berendsen84a",
 +          "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
 +          "Molecular dynamics with coupling to an external bath",
 +          "J. Chem. Phys.",
 +          81, 1984, "3684-3690" },
 +        { "Ryckaert77a",
 +          "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
 +          "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
 +          "J. Comp. Phys.",
 +          23, 1977, "327-341" },
 +        { "Miyamoto92a",
 +          "S. Miyamoto and P. A. Kollman",
 +          "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
 +          "J. Comp. Chem.",
 +          13, 1992, "952-962" },
 +        { "Cromer1968a",
 +          "D. T. Cromer & J. B. Mann",
 +          "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
 +          "Acta Cryst. A",
 +          24, 1968, "321" },
 +        { "Barth95a",
 +          "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
 +          "Algorithms for Constrained Molecular Dynamics",
 +          "J. Comp. Chem.",
 +          16, 1995, "1192-1209" },
 +        { "Essmann95a",
 +          "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
 +          "A smooth particle mesh Ewald method",
 +          "J. Chem. Phys.",
 +          103, 1995, "8577-8592" },
 +        { "Torda89a",
 +          "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
 +          "Time-dependent distance restraints in molecular dynamics simulations",
 +          "Chem. Phys. Lett.",
 +          157, 1989, "289-294" },
 +        { "Tironi95a",
 +          "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
 +          "Generalized reaction field method for molecular dynamics simulations",
 +          "J. Chem. Phys",
 +          102, 1995, "5451-5459" },
 +        { "Hess97a",
 +          "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
 +          "LINCS: A Linear Constraint Solver for molecular simulations",
 +          "J. Comp. Chem.",
 +          18, 1997, "1463-1472" },
 +        { "Hess2008a",
 +          "B. Hess",
 +          "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
 +          "J. Chem. Theory Comput.",
 +          4, 2008, "116-122" },
 +        { "Hess2008b",
 +          "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
 +          "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
 +          "J. Chem. Theory Comput.",
 +          4, 2008, "435-447" },
 +        { "Hub2010",
 +          "J. S. Hub, B. L. de Groot and D. van der Spoel",
 +          "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
 +          "J. Chem. Theory Comput.",
 +          6, 2010, "3713-3720"},
 +        { "In-Chul99a",
 +          "Y. In-Chul and M. L. Berkowitz",
 +          "Ewald summation for systems with slab geometry",
 +          "J. Chem. Phys.",
 +          111, 1999, "3155-3162" },
 +        { "DeGroot97a",
 +          "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
 +          "Prediction of Protein Conformational Freedom From Distance Constrains",
 +          "Proteins",
 +          29, 1997, "240-251" },
 +        { "Spoel98a",
 +          "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
 +          "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
 +          "J. Chem. Phys.",
 +          108, 1998, "10220-10230" },
 +        { "Wishart98a",
 +          "D. S. Wishart and A. M. Nip",
 +          "Protein Chemical Shift Analysis: A Practical Guide",
 +          "Biochem. Cell Biol.",
 +          76, 1998, "153-163" },
 +        { "Maiorov95",
 +          "V. N. Maiorov and G. M. Crippen",
 +          "Size-Independent Comparison of Protein Three-Dimensional Structures",
 +          "PROTEINS: Struct. Funct. Gen.",
 +          22, 1995, "273-283" },
 +        { "Feenstra99",
 +          "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
 +          "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
 +          "J. Comput. Chem.",
 +          20, 1999, "786-798" },
 +        { "Lourenco2013a",
 +          "Tuanan C. Lourenco and Mariny F. C. Coelho and Teodorico C. Ramalho and David van der Spoel and Luciano T. Costa",
 +          "Insights on the Solubility of CO2 in 1-Ethyl-3-methylimidazolium Bis(trifluoromethylsulfonyl)imide from the Microscopic Point of View",
 +          "Environ. Sci. Technol.",
 +          47, 2013, "7421-7429" },
 +        { "Timneanu2004a",
 +          "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
 +          "Auger Electron Cascades in Water and Ice",
 +          "Chem. Phys.",
 +          299, 2004, "277-283" },
 +        { "Pascal2011a",
 +          "T. A. Pascal and S. T. Lin and W. A. Goddard III",
 +          "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
 +          "Phys. Chem. Chem. Phys.",
 +          13, 2011, "169-181" },
 +        { "Caleman2011b",
 +          "C. Caleman and P. J. van Maaren and M. Hong and J. S. Hub and L. T. da Costa and D. van der Spoel",
 +          "Force Field Benchmark of Organic Liquids: Density, Enthalpy of Vaporization, Heat Capacities, Surface Tension, Isothermal Compressibility, Volumetric Expansion Coefficient, and Dielectric Constant",
 +          "J. Chem. Theo. Comp.",
 +          8, 2012, "61" },
 +        { "Lindahl2001a",
 +          "E. Lindahl and B. Hess and D. van der Spoel",
 +          "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
 +          "J. Mol. Mod.",
 +          7, 2001, "306-317" },
 +        { "Wang2001a",
 +          "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
 +          "Solvation model based on weighted solvent accessible surface area",
 +          "J. Phys. Chem. B",
 +          105, 2001, "5055-5067" },
 +        { "Eisenberg86a",
 +          "D. Eisenberg and A. D. McLachlan",
 +          "Solvation energy in protein folding and binding",
 +          "Nature",
 +          319, 1986, "199-203" },
 +        { "Bondi1964a",
 +          "A. Bondi",
 +          "van der Waals Volumes and Radii",
 +          "J. Phys. Chem.",
 +          68, 1964, "441-451" },
 +        { "Eisenhaber95",
 +          "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
 +          "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
 +          "J. Comp. Chem.",
 +          16, 1995, "273-284" },
 +        { "Hess2002",
 +          "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
 +          "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
 +          "J. Chem. Phys.",
 +          116, 2002, "9602-9610" },
 +        { "Hetenyi2002b",
 +          "Csaba Hetenyi and David van der Spoel",
 +          "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
 +          "Prot. Sci.",
 +          11, 2002, "1729-1737" },
 +        { "Hess2003",
 +          "B. Hess and R.M. Scheek",
 +          "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
 +          "J. Magn. Res.",
 +          164, 2003, "19-27" },
 +        { "Rappe1991a",
 +          "A. K. Rappe and W. A. Goddard III",
 +          "Charge Equillibration for Molecular Dynamics Simulations",
 +          "J. Phys. Chem.",
 +          95, 1991, "3358-3363" },
 +        { "Mu2005a",
 +          "Y. Mu, P. H. Nguyen and G. Stock",
 +          "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
 +          "Prot. Struct. Funct. Bioinf.",
 +          58, 2005, "45-52" },
 +        { "Okabe2001a",
 +          "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
 +          "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
 +          "Chem. Phys. Lett.",
 +          335, 2001, "435-439" },
 +        { "Hukushima96a",
 +          "K. Hukushima and K. Nemoto",
 +          "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
 +          "J. Phys. Soc. Jpn.",
 +          65, 1996, "1604-1608" },
 +        { "Tropp80a",
 +          "J. Tropp",
 +          "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
 +          "J. Chem. Phys.",
 +          72, 1980, "6035-6043" },
 +        { "Bultinck2002a",
 +          "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
 +          "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
 +          "J. Phys. Chem. A",
 +          106, 2002, "7887-7894" },
 +        { "Yang2006b",
 +          "Q. Y. Yang and K. A. Sharp",
 +          "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
 +          "J. Chem. Theory Comput.",
 +          2, 2006, "1152-1167" },
 +        { "Spoel2005a",
 +          "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
 +          "GROMACS: Fast, Flexible and Free",
 +          "J. Comp. Chem.",
 +          26, 2005, "1701-1719" },
 +        { "Spoel2006b",
 +          "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
 +          "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
 +          "J. Phys. Chem. B",
 +          110, 2006, "4393-4398" },
 +        { "Spoel2006d",
 +          "D. van der Spoel and M. M. Seibert",
 +          "Protein folding kinetics and thermodynamics from atomistic simulations",
 +          "Phys. Rev. Letters",
 +          96, 2006, "238102" },
 +        { "Palmer94a",
 +          "B. J. Palmer",
 +          "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
 +          "Phys. Rev. E",
 +          49, 1994, "359-366" },
 +        { "Bussi2007a",
 +          "G. Bussi, D. Donadio and M. Parrinello",
 +          "Canonical sampling through velocity rescaling",
 +          "J. Chem. Phys.",
 +          126, 2007, "014101" },
 +        { "Hub2006",
 +          "J. S. Hub and B. L. de Groot",
 +          "Does CO2 permeate through Aquaporin-1?",
 +          "Biophys. J.",
 +          91, 2006, "842-848" },
 +        { "Hub2008",
 +          "J. S. Hub and B. L. de Groot",
 +          "Mechanism of selectivity in aquaporins and aquaglyceroporins",
 +          "PNAS",
 +          105, 2008, "1198-1203" },
 +        { "Friedrich2009",
 +          "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
 +          "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
 +          "J. Comp. Chem.",
 +          30, 2009, "864-872" },
 +        { "Engin2010",
 +          "O. Engin, A. Villa, M. Sayar and B. Hess",
 +          "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
 +          "J. Phys. Chem. B",
 +          114, 2010, "11093" },
 +        { "Fritsch12",
 +          "S. Fritsch, C. Junghans and K. Kremer",
 +          "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs",
 +          "J. Chem. Theo. Comp.",
 +          8, 2012, "398" },
 +        { "Junghans10",
 +          "C. Junghans and S. Poblete",
 +          "A reference implementation of the adaptive resolution scheme in ESPResSo",
 +          "Comp. Phys. Comm.",
 +          181, 2010, "1449" },
 +        { "Wang2010",
 +          "H. Wang, F. Dommert, C.Holm",
 +          "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
 +          "J. Chem. Phys. B",
 +          133, 2010, "034117" },
 +        { "Sugita1999a",
 +          "Y. Sugita, Y. Okamoto",
 +          "Replica-exchange molecular dynamics method for protein folding",
 +          "Chem. Phys. Lett.",
 +          314, 1999, "141-151" },
 +        { "Kutzner2011",
 +          "C. Kutzner and J. Czub and H. Grubmuller",
 +          "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS",
 +          "J. Chem. Theory Comput.",
 +          7, 2011, "1381-1393" },
 +        { "Hoefling2011",
 +          "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller",
 +          "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach",
 +          "PLoS ONE",
 +          6, 2011, "e19791" },
 +        { "Hockney1988",
 +          "R. W. Hockney and J. W. Eastwood",
 +          "Computer simulation using particles",
 +          "IOP, Bristol",
 +          1, 1988, "1" },
 +        { "Ballenegger2012",
 +          "V. Ballenegger, J.J. Cerda, and C. Holm",
 +          "How to Convert SPME to P3M: Influence Functions and Error Estimates",
 +          "J. Chem. Theory Comput.",
 +          8, 2012, "936-947" },
 +        { "Garmay2012",
 +          "Garmay Yu, Shvetsov A, Karelov D, Lebedev D, Radulescu A, Petukhov M, Isaev-Ivanov V",
 +          "Correlated motion of protein subdomains and large-scale conformational flexibility of RecA protein filament",
 +          "Journal of Physics: Conference Series",
 +          340, 2012, "012094" }
 +    };
 +#define NSTR (int)asize(citedb)
 +
 +    int   index;
 +    char *author;
 +    char *title;
 +#define LINE_WIDTH 79
 +
 +    if (fp == NULL)
 +    {
 +        return;
 +    }
 +
 +    for (index = 0; (index < NSTR) && (strcmp(citedb[index].key, key) != 0); index++)
 +    {
 +        ;
 +    }
 +
 +    fprintf(fp, "\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
 +    if (index < NSTR)
 +    {
 +        /* Insert newlines */
 +        author = wrap_lines(citedb[index].author, LINE_WIDTH, 0, FALSE);
 +        title  = wrap_lines(citedb[index].title, LINE_WIDTH, 0, FALSE);
 +        fprintf(fp, "%s\n%s\n%s %d (%d) pp. %s\n",
 +                author, title, citedb[index].journal,
 +                citedb[index].volume, citedb[index].year,
 +                citedb[index].pages);
 +        sfree(author);
 +        sfree(title);
 +    }
 +    else
 +    {
 +        fprintf(fp, "Entry %s not found in citation database\n", key);
 +    }
 +    fprintf(fp, "-------- -------- --- Thank You --- -------- --------\n\n");
 +    fflush(fp);
 +}
 +
 +#ifdef GMX_GIT_VERSION_INFO
 +/* Version information generated at compile time. */
 +#include "gromacs/utility/gitversion.h"
 +#else
 +/* Fall back to statically defined version. */
 +static const char _gmx_ver_string[] = "VERSION " VERSION;
 +#endif
 +
 +const char *GromacsVersion()
 +{
 +    return _gmx_ver_string;
 +}
 +
 +const char *ShortProgram(void)
 +{
 +    try
 +    {
 +        // TODO: Use the display name once it doesn't break anything.
 +        return gmx::ProgramInfo::getInstance().programName().c_str();
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +}
 +
 +const char *Program(void)
 +{
 +    try
 +    {
 +        return gmx::ProgramInfo::getInstance().fullBinaryPath().c_str();
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +}
 +
 +
 +extern void gmx_print_version_info_gpu(FILE *fp);
 +
 +static void gmx_print_version_info(FILE *fp)
 +{
 +    fprintf(fp, "Gromacs version:    %s\n", _gmx_ver_string);
 +#ifdef GMX_GIT_VERSION_INFO
 +    fprintf(fp, "GIT SHA1 hash:      %s\n", _gmx_full_git_hash);
 +    /* Only print out the branch information if present.
 +     * The generating script checks whether the branch point actually
 +     * coincides with the hash reported above, and produces an empty string
 +     * in such cases. */
 +    if (_gmx_central_base_hash[0] != 0)
 +    {
 +        fprintf(fp, "Branched from:      %s\n", _gmx_central_base_hash);
 +    }
 +#endif
 +
 +#ifdef GMX_DOUBLE
 +    fprintf(fp, "Precision:          double\n");
 +#else
 +    fprintf(fp, "Precision:          single\n");
 +#endif
-     fprintf(fp, "Memory model:       %lu bit\n", 8*sizeof(void *));
++    fprintf(fp, "Memory model:       %u bit\n", (unsigned)(8*sizeof(void *)));
 +
 +#ifdef GMX_THREAD_MPI
 +    fprintf(fp, "MPI library:        thread_mpi\n");
 +#elif defined(GMX_MPI)
 +    fprintf(fp, "MPI library:        MPI\n");
 +#else
 +    fprintf(fp, "MPI library:        none\n");
 +#endif
 +#ifdef GMX_OPENMP
 +    fprintf(fp, "OpenMP support:     enabled\n");
 +#else
 +    fprintf(fp, "OpenMP support:     disabled\n");
 +#endif
 +#ifdef GMX_GPU
 +    fprintf(fp, "GPU support:        enabled\n");
 +#else
 +    fprintf(fp, "GPU support:        disabled\n");
 +#endif
 +    /* A preprocessor trick to avoid duplicating logic from vec.h */
 +#define gmx_stringify2(x) #x
 +#define gmx_stringify(x) gmx_stringify2(x)
 +    fprintf(fp, "invsqrt routine:    %s\n", gmx_stringify(gmx_invsqrt(x)));
 +    fprintf(fp, "CPU acceleration:   %s\n", GMX_CPU_ACCELERATION_STRING);
 +
 +    fprintf(fp, "FFT library:        %s\n", gmx_fft_get_version_info());
 +#ifdef HAVE_RDTSCP
 +    fprintf(fp, "RDTSCP usage:       enabled\n");
 +#else
 +    fprintf(fp, "RDTSCP usage:       disabled\n");
 +#endif
 +
 +    fprintf(fp, "Built on:           %s\n", BUILD_TIME);
 +    fprintf(fp, "Built by:           %s\n", BUILD_USER);
 +    fprintf(fp, "Build OS/arch:      %s\n", BUILD_HOST);
 +    fprintf(fp, "Build CPU vendor:   %s\n", BUILD_CPU_VENDOR);
 +    fprintf(fp, "Build CPU brand:    %s\n", BUILD_CPU_BRAND);
 +    fprintf(fp, "Build CPU family:   %d   Model: %d   Stepping: %d\n",
 +            BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING);
 +    /* TODO: The below strings can be quite long, so it would be nice to wrap
 +     * them. Can wait for later, as the master branch has ready code to do all
 +     * that. */
 +    fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES);
 +    fprintf(fp, "C compiler:         %s\n", BUILD_C_COMPILER);
 +    fprintf(fp, "C compiler flags:   %s\n", BUILD_CFLAGS);
 +    fprintf(fp, "C++ compiler:       %s\n", BUILD_CXX_COMPILER);
 +    fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS);
 +#ifdef HAVE_LIBMKL
 +    /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */
 +    fprintf(fp, "Linked with Intel MKL version %d.%d.%d.\n",
 +            __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__);
 +#endif
 +#ifdef GMX_GPU
 +    gmx_print_version_info_gpu(fp);
 +#endif
 +}
 +
 +namespace gmx
 +{
 +
 +BinaryInformationSettings::BinaryInformationSettings()
 +    : bExtendedInfo_(false), bCopyright_(false),
 +      bGeneratedByHeader_(false), prefix_(""), suffix_("")
 +{
 +}
 +
 +void printBinaryInformation(FILE *fp, const ProgramInfo &programInfo)
 +{
 +    printBinaryInformation(fp, programInfo, BinaryInformationSettings());
 +}
 +
 +void printBinaryInformation(FILE *fp, const ProgramInfo &programInfo,
 +                            const BinaryInformationSettings &settings)
 +{
 +    const char *prefix          = settings.prefix_;
 +    const char *suffix          = settings.suffix_;
 +    const char *precisionString = "";
 +#ifdef GMX_DOUBLE
 +    precisionString = " (double precision)";
 +#endif
 +    const std::string &name = programInfo.displayName();
 +    if (settings.bGeneratedByHeader_)
 +    {
 +        fprintf(fp, "%sCreated by:%s\n", prefix, suffix);
 +    }
 +    if (settings.bCopyright_)
 +    {
 +        GMX_RELEASE_ASSERT(prefix[0] == '\0' && suffix[0] == '\0',
 +                           "Prefix/suffix not supported with copyright");
 +        // This line is printed again after the copyright notice to make it
 +        // appear together with all the other information, so that it is not
 +        // necessary to read stuff above the copyright notice.
 +        // The line above the copyright notice puts the copyright notice is
 +        // context, though.
 +        // TODO: It would be nice to know here whether we are really running a
 +        // Gromacs binary or some other binary that is calling Gromacs; we
 +        // could then print "%s is part of GROMACS" or some alternative text.
 +        fprintf(fp, "%sGROMACS:    %s, %s%s%s\n", prefix, name.c_str(),
 +                GromacsVersion(), precisionString, suffix);
 +        fprintf(fp, "\n");
 +        printCopyright(fp);
 +        fprintf(fp, "\n");
 +    }
 +    fprintf(fp, "%sGROMACS:    %s, %s%s%s\n", prefix, name.c_str(),
 +            GromacsVersion(), precisionString, suffix);
 +    fprintf(fp, "%sExecutable: %s%s\n", prefix,
 +            programInfo.fullBinaryPath().c_str(), suffix);
 +    fprintf(fp, "%sCommand line:%s\n%s  %s%s\n",
 +            prefix, suffix, prefix, programInfo.commandLine().c_str(), suffix);
 +    if (settings.bExtendedInfo_)
 +    {
 +        GMX_RELEASE_ASSERT(prefix[0] == '\0' && suffix[0] == '\0',
 +                           "Prefix/suffix not supported with extended info");
 +        fprintf(fp, "\n");
 +        gmx_print_version_info(fp);
 +    }
 +}
 +
 +} // namespace gmx
diff --cc src/gromacs/gmxlib/gmx_detect_hardware.c
index 26506521f7,0000000000..4332d1a701
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/gmx_detect_hardware.c
+++ b/src/gromacs/gmxlib/gmx_detect_hardware.c
@@@ -1,836 -1,0 +1,836 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdlib.h>
 +#include <assert.h>
 +#include <string.h>
 +
 +#ifdef HAVE_UNISTD_H
 +/* For sysconf */
 +#include <unistd.h>
 +#endif
 +
 +#include "types/enums.h"
 +#include "types/hw_info.h"
 +#include "types/commrec.h"
 +#include "gmx_fatal.h"
 +#include "gmx_fatal_collective.h"
 +#include "smalloc.h"
 +#include "gpu_utils.h"
 +#include "copyrite.h"
 +#include "gmx_detect_hardware.h"
 +#include "main.h"
 +#include "md_logging.h"
 +#include "gromacs/utility/gmxomp.h"
 +
 +#include "thread_mpi/threads.h"
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <windows.h>
 +#endif
 +
 +#ifdef GMX_GPU
 +const gmx_bool bGPUBinary = TRUE;
 +#else
 +const gmx_bool bGPUBinary = FALSE;
 +#endif
 +
 +static const char * invalid_gpuid_hint =
 +    "A delimiter-free sequence of valid numeric IDs of available GPUs is expected.";
 +
 +/* The globally shared hwinfo structure. */
 +static gmx_hw_info_t      *hwinfo_g;
 +/* A reference counter for the hwinfo structure */
 +static int                 n_hwinfo = 0;
 +/* A lock to protect the hwinfo structure */
 +static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER;
 +
 +
 +/* FW decl. */
 +static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count);
 +static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
 +                                    const gmx_gpu_opt_t  *gpu_opt);
 +
 +static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info)
 +{
 +    int      i, ndev;
 +    char     stmp[STRLEN];
 +
 +    ndev = gpu_info->ncuda_dev;
 +
 +    sbuf[0] = '\0';
 +    for (i = 0; i < ndev; i++)
 +    {
 +        get_gpu_device_info_string(stmp, gpu_info, i);
 +        strcat(sbuf, "  ");
 +        strcat(sbuf, stmp);
 +        if (i < ndev - 1)
 +        {
 +            strcat(sbuf, "\n");
 +        }
 +    }
 +}
 +
 +static void print_gpu_detection_stats(FILE                 *fplog,
 +                                      const gmx_gpu_info_t *gpu_info,
 +                                      const t_commrec      *cr)
 +{
 +    char onhost[266], stmp[STRLEN];
 +    int  ngpu;
 +
 +    if (!gpu_info->bDetectGPUs)
 +    {
 +        /* We skipped the detection, so don't print detection stats */
 +        return;
 +    }
 +
 +    ngpu = gpu_info->ncuda_dev;
 +
 +#if defined GMX_MPI && !defined GMX_THREAD_MPI
 +    /* We only print the detection on one, of possibly multiple, nodes */
 +    strncpy(onhost, " on host ", 10);
 +    gmx_gethostname(onhost+9, 256);
 +#else
 +    /* We detect all relevant GPUs */
 +    strncpy(onhost, "", 1);
 +#endif
 +
 +    if (ngpu > 0)
 +    {
 +        sprint_gpus(stmp, gpu_info);
 +        md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n",
 +                      ngpu, (ngpu > 1) ? "s" : "", onhost, stmp);
 +    }
 +    else
 +    {
 +        md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost);
 +    }
 +}
 +
 +static void print_gpu_use_stats(FILE                 *fplog,
 +                                const gmx_gpu_info_t *gpu_info,
 +                                const gmx_gpu_opt_t  *gpu_opt,
 +                                const t_commrec      *cr)
 +{
 +    char sbuf[STRLEN], stmp[STRLEN];
 +    int  i, ngpu_comp, ngpu_use;
 +
 +    ngpu_comp = gpu_info->ncuda_dev_compatible;
 +    ngpu_use  = gpu_opt->ncuda_dev_use;
 +
 +    /* Issue a note if GPUs are available but not used */
 +    if (ngpu_comp > 0 && ngpu_use < 1)
 +    {
 +        sprintf(sbuf,
 +                "%d compatible GPU%s detected in the system, but none will be used.\n"
 +                "Consider trying GPU acceleration with the Verlet scheme!",
 +                ngpu_comp, (ngpu_comp > 1) ? "s" : "");
 +    }
 +    else
 +    {
 +        int ngpu_use_uniq;
 +
 +        ngpu_use_uniq = gmx_count_gpu_dev_unique(gpu_info, gpu_opt);
 +
 +        sprintf(sbuf, "%d GPU%s %sselected for this run.\n"
 +                "Mapping of GPU%s to the %d PP rank%s in this node: ",
 +                ngpu_use_uniq, (ngpu_use_uniq > 1) ? "s" : "",
 +                gpu_opt->bUserSet ? "user-" : "auto-",
 +                (ngpu_use > 1) ? "s" : "",
 +                cr->nrank_pp_intranode,
 +                (cr->nrank_pp_intranode > 1) ? "s" : "");
 +
 +        for (i = 0; i < ngpu_use; i++)
 +        {
 +            sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i));
 +            if (i < ngpu_use - 1)
 +            {
 +                strcat(stmp, ", ");
 +            }
 +            strcat(sbuf, stmp);
 +        }
 +    }
 +    md_print_info(cr, fplog, "%s\n\n", sbuf);
 +}
 +
 +/* Parse a "plain" GPU ID string which contains a sequence of digits corresponding
 + * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */
 +static void parse_gpu_id_plain_string(const char *idstr, int *nid, int **idlist)
 +{
 +    int i;
 +
 +    *nid = strlen(idstr);
 +
 +    snew(*idlist, *nid);
 +
 +    for (i = 0; i < *nid; i++)
 +    {
 +        if (idstr[i] < '0' || idstr[i] > '9')
 +        {
 +            gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n%s\n",
 +                      idstr[i], invalid_gpuid_hint);
 +        }
 +        (*idlist)[i] = idstr[i] - '0';
 +    }
 +}
 +
 +static void parse_gpu_id_csv_string(const char gmx_unused *idstr, int gmx_unused *nid, int gmx_unused *idlist)
 +{
 +    /* XXX implement cvs format to support more than 10 different GPUs in a box. */
 +    gmx_incons("Not implemented yet");
 +}
 +
 +void gmx_check_hw_runconf_consistency(FILE                *fplog,
 +                                      const gmx_hw_info_t *hwinfo,
 +                                      const t_commrec     *cr,
 +                                      const gmx_hw_opt_t  *hw_opt,
 +                                      gmx_bool             bUseGPU)
 +{
 +    int      npppn, ntmpi_pp;
 +    char     sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN];
 +    gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU;
 +
 +    assert(hwinfo);
 +    assert(cr);
 +
 +    /* Below we only do consistency checks for PP and GPUs,
 +     * this is irrelevant for PME only nodes, so in that case we return
 +     * here.
 +     */
 +    if (!(cr->duty & DUTY_PP))
 +    {
 +        return;
 +    }
 +
 +    btMPI         = bMPI = FALSE;
 +    bNthreadsAuto = FALSE;
 +#if defined(GMX_THREAD_MPI)
 +    btMPI         = TRUE;
 +    bNthreadsAuto = (hw_opt->nthreads_tmpi < 1);
 +#elif defined(GMX_LIB_MPI)
 +    bMPI  = TRUE;
 +#endif
 +
 +    /* GPU emulation detection is done later, but we need here as well
 +     * -- uncool, but there's no elegant workaround */
 +    bEmulateGPU       = (getenv("GMX_EMULATE_GPU") != NULL);
 +    bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL);
 +
 +    /* check the acceleration mdrun is compiled with against hardware
 +       capabilities */
 +    /* TODO: Here we assume homogeneous hardware which is not necessarily
 +             the case! Might not hurt to add an extra check over MPI. */
 +    gmx_cpuid_acceleration_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr));
 +
 +    /* NOTE: this print is only for and on one physical node */
 +    print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr);
 +
 +    if (hwinfo->gpu_info.ncuda_dev_compatible > 0)
 +    {
 +        /* NOTE: this print is only for and on one physical node */
 +        print_gpu_use_stats(fplog, &hwinfo->gpu_info, &hw_opt->gpu_opt, cr);
 +    }
 +
 +    /* Need to ensure that we have enough GPUs:
 +     * - need one GPU per PP node
 +     * - no GPU oversubscription with tMPI
 +     * */
 +    /* number of PP processes per node */
 +    npppn = cr->nrank_pp_intranode;
 +
 +    pernode[0]           = '\0';
 +    th_or_proc_plural[0] = '\0';
 +    if (btMPI)
 +    {
 +        sprintf(th_or_proc, "thread-MPI thread");
 +        if (npppn > 1)
 +        {
 +            sprintf(th_or_proc_plural, "s");
 +        }
 +    }
 +    else if (bMPI)
 +    {
 +        sprintf(th_or_proc, "MPI process");
 +        if (npppn > 1)
 +        {
 +            sprintf(th_or_proc_plural, "es");
 +        }
 +        sprintf(pernode, " per node");
 +    }
 +    else
 +    {
 +        /* neither MPI nor tMPI */
 +        sprintf(th_or_proc, "process");
 +    }
 +
 +    if (bUseGPU && hwinfo->gpu_info.ncuda_dev_compatible > 0 &&
 +        !bEmulateGPU)
 +    {
 +        int  ngpu_comp, ngpu_use;
 +        char gpu_comp_plural[2], gpu_use_plural[2];
 +
 +        ngpu_comp = hwinfo->gpu_info.ncuda_dev_compatible;
 +        ngpu_use  = hw_opt->gpu_opt.ncuda_dev_use;
 +
 +        sprintf(gpu_comp_plural, "%s", (ngpu_comp> 1) ? "s" : "");
 +        sprintf(gpu_use_plural,  "%s", (ngpu_use > 1) ? "s" : "");
 +
 +        /* number of tMPI threads auto-adjusted */
 +        if (btMPI && bNthreadsAuto)
 +        {
 +            if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use)
 +            {
 +                /* The user manually provided more GPUs than threads we
 +                   could automatically start. */
 +                gmx_fatal(FARGS,
 +                          "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n"
 +                          "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.",
 +                          ngpu_use, gpu_use_plural,
 +                          npppn, th_or_proc_plural,
 +                          ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : "");
 +            }
 +
 +            if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp)
 +            {
 +                /* There are more GPUs than tMPI threads; we have
 +                   limited the number GPUs used. */
 +                md_print_warn(cr, fplog,
 +                              "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n"
 +                              "      %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n",
 +                              ngpu_comp, gpu_comp_plural,
 +                              npppn, th_or_proc_plural,
 +                              ShortProgram(), npppn,
 +                              npppn > 1 ? "s" : "",
 +                              bMaxMpiThreadsSet ? "\n      Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : "");
 +            }
 +        }
 +
 +        if (hw_opt->gpu_opt.bUserSet)
 +        {
 +            if (ngpu_use != npppn)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 +                          "%s was started with %d PP %s%s%s, but you provided %d GPU%s.",
 +                          th_or_proc, btMPI ? "s" : "es", pernode,
 +                          ShortProgram(), npppn, th_or_proc,
 +                          th_or_proc_plural, pernode,
 +                          ngpu_use, gpu_use_plural);
 +            }
 +        }
 +        else
 +        {
 +            if (ngpu_comp > npppn)
 +            {
 +                md_print_warn(cr, fplog,
 +                              "NOTE: potentially sub-optimal launch configuration, %s started with less\n"
 +                              "      PP %s%s%s than GPU%s available.\n"
 +                              "      Each PP %s can use only one GPU, %d GPU%s%s will be used.\n",
 +                              ShortProgram(), th_or_proc,
 +                              th_or_proc_plural, pernode, gpu_comp_plural,
 +                              th_or_proc, npppn, gpu_use_plural, pernode);
 +            }
 +
 +            if (ngpu_use != npppn)
 +            {
 +                /* Avoid duplicate error messages.
 +                 * Unfortunately we can only do this at the physical node
 +                 * level, since the hardware setup and MPI process count
 +                 * might differ between physical nodes.
 +                 */
 +                if (cr->rank_pp_intranode == 0)
 +                {
 +                    gmx_fatal(FARGS,
 +                              "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n"
 +                              "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.",
 +                              th_or_proc, btMPI ? "s" : "es", pernode,
 +                              ShortProgram(), npppn, th_or_proc,
 +                              th_or_proc_plural, pernode,
 +                              ngpu_use, gpu_use_plural);
 +                }
 +            }
 +        }
 +
 +        {
 +            int      same_count;
 +
 +            same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt);
 +
 +            if (same_count > 0)
 +            {
 +                md_print_info(cr, fplog,
 +                              "NOTE: You assigned %s to multiple %s%s.\n",
 +                              same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es");
 +            }
 +        }
 +    }
 +
 +#ifdef GMX_MPI
 +    if (PAR(cr))
 +    {
 +        /* Avoid other ranks to continue after
 +           inconsistency */
 +        MPI_Barrier(cr->mpi_comm_mygroup);
 +    }
 +#endif
 +
 +}
 +
 +/* Return 0 if none of the GPU (per node) are shared among PP ranks.
 + *
 + * Sharing GPUs among multiple PP ranks is possible when the user passes
 + * GPU IDs. Here we check for sharing and return a non-zero value when
 + * this is detected. Note that the return value represents the number of
 + * PP rank pairs that share a device.
 + */
 +int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt)
 +{
 +    int      same_count    = 0;
 +    int      ngpu          = gpu_opt->ncuda_dev_use;
 +
 +    if (gpu_opt->bUserSet)
 +    {
 +        int      i, j;
 +
 +        for (i = 0; i < ngpu - 1; i++)
 +        {
 +            for (j = i + 1; j < ngpu; j++)
 +            {
 +                same_count      += (gpu_opt->cuda_dev_use[i] ==
 +                                    gpu_opt->cuda_dev_use[j]);
 +            }
 +        }
 +    }
 +
 +    return same_count;
 +}
 +
 +/* Count and return the number of unique GPUs (per node) selected.
 + *
 + * As sharing GPUs among multiple PP ranks is possible when the user passes
 + * GPU IDs, the number of GPUs user (per node) can be different from the
 + * number of GPU IDs selected.
 + */
 +static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info,
 +                                    const gmx_gpu_opt_t  *gpu_opt)
 +{
 +    int  i, uniq_count, ngpu;
 +    int *uniq_ids;
 +
 +    assert(gpu_info);
 +    assert(gpu_opt);
 +
 +    ngpu        = gpu_info->ncuda_dev;
 +    uniq_count  = 0;
 +
 +    snew(uniq_ids, ngpu);
 +
 +    /* Each element in uniq_ids will be set to 0 or 1. The n-th element set
 +     * to 1 indicates that the respective GPU was selected to be used. */
 +    for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
 +    {
 +        uniq_ids[get_gpu_device_id(gpu_info, gpu_opt, i)] = 1;
 +    }
 +    /* Count the devices used. */
 +    for (i = 0; i < ngpu; i++)
 +    {
 +        uniq_count += uniq_ids[i];
 +    }
 +
 +    sfree(uniq_ids);
 +
 +    return uniq_count;
 +}
 +
 +
 +/* Return the number of hardware threads supported by the current CPU.
 + * We assume that this is equal with the number of CPUs reported to be
 + * online by the OS at the time of the call.
 + */
 +static int get_nthreads_hw_avail(FILE gmx_unused *fplog, const t_commrec gmx_unused *cr)
 +{
 +    int ret = 0;
 +
 +#if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__)))
 +    /* Windows */
 +    SYSTEM_INFO sysinfo;
 +    GetSystemInfo( &sysinfo );
 +    ret = sysinfo.dwNumberOfProcessors;
 +#elif defined HAVE_SYSCONF
 +    /* We are probably on Unix.
 +     * Now check if we have the argument to use before executing the call
 +     */
 +#if defined(_SC_NPROCESSORS_ONLN)
 +    ret = sysconf(_SC_NPROCESSORS_ONLN);
 +#elif defined(_SC_NPROC_ONLN)
 +    ret = sysconf(_SC_NPROC_ONLN);
 +#elif defined(_SC_NPROCESSORS_CONF)
 +    ret = sysconf(_SC_NPROCESSORS_CONF);
 +#elif defined(_SC_NPROC_CONF)
 +    ret = sysconf(_SC_NPROC_CONF);
 +#else
 +#warning "No valid sysconf argument value found. Executables will not be able to determine the number of hardware threads: mdrun will use 1 thread by default!"
 +#endif /* End of check for sysconf argument values */
 +
 +#else
 +    /* Neither windows nor Unix. No fscking idea how many CPUs we have! */
 +    ret = -1;
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "Detected %d processors, will use this as the number "
 +                "of supported hardware threads.\n", ret);
 +    }
 +
 +#ifdef GMX_OPENMP
 +    if (ret != gmx_omp_get_num_procs())
 +    {
 +        md_print_warn(cr, fplog,
 +                      "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n"
 +                      "Consider setting the launch configuration manually!",
 +                      ret, gmx_omp_get_num_procs());
 +    }
 +#endif
 +
 +    return ret;
 +}
 +
 +static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr)
 +{
 +#ifdef GMX_LIB_MPI
 +    int              rank_world;
 +    MPI_Comm         physicalnode_comm;
 +#endif
 +    int              rank_local;
 +
 +    /* Under certain circumstances MPI ranks on the same physical node
 +     * can not simultaneously access the same GPU(s). Therefore we run
 +     * the detection only on one MPI rank per node and broadcast the info.
 +     * Note that with thread-MPI only a single thread runs this code.
 +     *
 +     * TODO: We should also do CPU hardware detection only once on each
 +     * physical node and broadcast it, instead of do it on every MPI rank.
 +     */
 +#ifdef GMX_LIB_MPI
 +    /* A split of MPI_COMM_WORLD over physical nodes is only required here,
 +     * so we create and destroy it locally.
 +     */
 +    MPI_Comm_rank(MPI_COMM_WORLD, &rank_world);
 +    MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(),
 +                   rank_world, &physicalnode_comm);
 +    MPI_Comm_rank(physicalnode_comm, &rank_local);
 +#else
 +    /* Here there should be only one process, check this */
 +    assert(cr->nnodes == 1 && cr->sim_nodeid == 0);
 +
 +    rank_local = 0;
 +#endif
 +
 +    if (rank_local == 0)
 +    {
-         char detection_error[STRLEN], sbuf[STRLEN];
++        char detection_error[STRLEN] = "", sbuf[STRLEN];
 +
 +        if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0)
 +        {
 +            if (detection_error != NULL && detection_error[0] != '\0')
 +            {
 +                sprintf(sbuf, ":\n      %s\n", detection_error);
 +            }
 +            else
 +            {
 +                sprintf(sbuf, ".");
 +            }
 +            md_print_warn(cr, fplog,
 +                          "NOTE: Error occurred during GPU detection%s"
 +                          "      Can not use GPU acceleration, will fall back to CPU kernels.\n",
 +                          sbuf);
 +        }
 +    }
 +
 +#ifdef GMX_LIB_MPI
 +    /* Broadcast the GPU info to the other ranks within this node */
 +    MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev, 1, MPI_INT, 0, physicalnode_comm);
 +
 +    if (hwinfo_g->gpu_info.ncuda_dev > 0)
 +    {
 +        int cuda_dev_size;
 +
 +        cuda_dev_size = hwinfo_g->gpu_info.ncuda_dev*sizeof_cuda_dev_info();
 +
 +        if (rank_local > 0)
 +        {
 +            hwinfo_g->gpu_info.cuda_dev =
 +                (cuda_dev_info_ptr_t)malloc(cuda_dev_size);
 +        }
 +        MPI_Bcast(hwinfo_g->gpu_info.cuda_dev, cuda_dev_size, MPI_BYTE,
 +                  0, physicalnode_comm);
 +        MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev_compatible, 1, MPI_INT,
 +                  0, physicalnode_comm);
 +    }
 +
 +    MPI_Comm_free(&physicalnode_comm);
 +#endif
 +}
 +
 +gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr,
 +                                   gmx_bool bDetectGPUs)
 +{
 +    gmx_hw_info_t   *hw;
 +    int              ret;
 +
 +    /* make sure no one else is doing the same thing */
 +    ret = tMPI_Thread_mutex_lock(&hw_info_lock);
 +    if (ret != 0)
 +    {
 +        gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
 +    }
 +
 +    /* only initialize the hwinfo structure if it is not already initalized */
 +    if (n_hwinfo == 0)
 +    {
 +        snew(hwinfo_g, 1);
 +
 +        /* detect CPUID info; no fuss, we don't detect system-wide
 +         * -- sloppy, but that's it for now */
 +        if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0)
 +        {
 +            gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!");
 +        }
 +
 +        /* detect number of hardware threads */
 +        hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr);
 +
 +        /* detect GPUs */
 +        hwinfo_g->gpu_info.ncuda_dev            = 0;
 +        hwinfo_g->gpu_info.cuda_dev             = NULL;
 +        hwinfo_g->gpu_info.ncuda_dev_compatible = 0;
 +
 +        /* Run the detection if the binary was compiled with GPU support
 +         * and we requested detection.
 +         */
 +        hwinfo_g->gpu_info.bDetectGPUs =
 +            (bGPUBinary && bDetectGPUs &&
 +             getenv("GMX_DISABLE_GPU_DETECTION") == NULL);
 +        if (hwinfo_g->gpu_info.bDetectGPUs)
 +        {
 +            gmx_detect_gpus(fplog, cr);
 +        }
 +    }
 +    /* increase the reference counter */
 +    n_hwinfo++;
 +
 +    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
 +    if (ret != 0)
 +    {
 +        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
 +    }
 +
 +    return hwinfo_g;
 +}
 +
 +void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
 +{
 +    char *env;
 +
 +    if (gpu_opt->gpu_id != NULL && !bGPUBinary)
 +    {
 +        gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram());
 +    }
 +
 +    env = getenv("GMX_GPU_ID");
 +    if (env != NULL && gpu_opt->gpu_id != NULL)
 +    {
 +        gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time");
 +    }
 +    if (env == NULL)
 +    {
 +        env = gpu_opt->gpu_id;
 +    }
 +
 +    /* parse GPU IDs if the user passed any */
 +    if (env != NULL)
 +    {
 +        parse_gpu_id_plain_string(env,
 +                                  &gpu_opt->ncuda_dev_use,
 +                                  &gpu_opt->cuda_dev_use);
 +
 +        if (gpu_opt->ncuda_dev_use == 0)
 +        {
 +            gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
 +                      invalid_gpuid_hint);
 +        }
 +
 +        gpu_opt->bUserSet = TRUE;
 +    }
 +}
 +
 +void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr,
 +                        const gmx_gpu_info_t *gpu_info,
 +                        gmx_bool bForceUseGPU,
 +                        gmx_gpu_opt_t *gpu_opt)
 +{
 +    int              i;
 +    const char      *env;
 +    char             sbuf[STRLEN], stmp[STRLEN];
 +
 +    /* Bail if binary is not compiled with GPU acceleration, but this is either
 +     * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */
 +    if (bForceUseGPU && !bGPUBinary)
 +    {
 +        gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram());
 +    }
 +
 +    if (gpu_opt->bUserSet)
 +    {
 +        /* Check the GPU IDs passed by the user.
 +         * (GPU IDs have been parsed by gmx_parse_gpu_ids before)
 +         */
 +        int *checkres;
 +        int  res;
 +
 +        snew(checkres, gpu_opt->ncuda_dev_use);
 +
 +        res = check_selected_cuda_gpus(checkres, gpu_info, gpu_opt);
 +
 +        if (!res)
 +        {
 +            print_gpu_detection_stats(fplog, gpu_info, cr);
 +
 +            sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n");
 +            for (i = 0; i < gpu_opt->ncuda_dev_use; i++)
 +            {
 +                if (checkres[i] != egpuCompatible)
 +                {
 +                    sprintf(stmp, "    GPU #%d: %s\n",
 +                            gpu_opt->cuda_dev_use[i],
 +                            gpu_detect_res_str[checkres[i]]);
 +                    strcat(sbuf, stmp);
 +                }
 +            }
 +            gmx_fatal(FARGS, "%s", sbuf);
 +        }
 +
 +        sfree(checkres);
 +    }
 +    else
 +    {
 +        pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt);
 +
 +        if (gpu_opt->ncuda_dev_use > cr->nrank_pp_intranode)
 +        {
 +            /* We picked more GPUs than we can use: limit the number.
 +             * We print detailed messages about this later in
 +             * gmx_check_hw_runconf_consistency.
 +             */
 +            limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode);
 +        }
 +
 +        gpu_opt->bUserSet = FALSE;
 +    }
 +
 +    /* If the user asked for a GPU, check whether we have a GPU */
 +    if (bForceUseGPU && gpu_info->ncuda_dev_compatible == 0)
 +    {
 +        gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected.");
 +    }
 +}
 +
 +static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count)
 +{
 +    int ndev_use;
 +
 +    assert(gpu_opt);
 +
 +    ndev_use = gpu_opt->ncuda_dev_use;
 +
 +    if (count > ndev_use)
 +    {
 +        /* won't increase the # of GPUs */
 +        return;
 +    }
 +
 +    if (count < 1)
 +    {
 +        char sbuf[STRLEN];
 +        sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!",
 +                ndev_use, count);
 +        gmx_incons(sbuf);
 +    }
 +
 +    /* TODO: improve this implementation: either sort GPUs or remove the weakest here */
 +    gpu_opt->ncuda_dev_use = count;
 +}
 +
 +void gmx_hardware_info_free(gmx_hw_info_t *hwinfo)
 +{
 +    int ret;
 +
 +    ret = tMPI_Thread_mutex_lock(&hw_info_lock);
 +    if (ret != 0)
 +    {
 +        gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno));
 +    }
 +
 +    /* decrease the reference counter */
 +    n_hwinfo--;
 +
 +
 +    if (hwinfo != hwinfo_g)
 +    {
 +        gmx_incons("hwinfo < hwinfo_g");
 +    }
 +
 +    if (n_hwinfo < 0)
 +    {
 +        gmx_incons("n_hwinfo < 0");
 +    }
 +
 +    if (n_hwinfo == 0)
 +    {
 +        gmx_cpuid_done(hwinfo_g->cpuid_info);
 +        free_gpu_info(&hwinfo_g->gpu_info);
 +        sfree(hwinfo_g);
 +    }
 +
 +    ret = tMPI_Thread_mutex_unlock(&hw_info_lock);
 +    if (ret != 0)
 +    {
 +        gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno));
 +    }
 +}
diff --cc src/gromacs/gmxlib/main.cpp
index 69c0b752cd,0000000000..4ed60e6212
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/main.cpp
+++ b/src/gromacs/gmxlib/main.cpp
@@@ -1,471 -1,0 +1,472 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <limits.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "network.h"
 +#include "main.h"
 +#include "macros.h"
 +#include "gromacs/fileio/futil.h"
 +#include "gromacs/fileio/filenm.h"
 +#include "gromacs/fileio/gmxfio.h"
 +#include "string2.h"
 +#include "copyrite.h"
 +
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/gmxmpi.h"
 +#include "gromacs/utility/programinfo.h"
 +
 +/* The source code in this file should be thread-safe.
 +         Please keep it that way. */
 +
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <process.h>
 +#endif
 +
 +#define BUFSIZE 1024
 +
 +
 +static void par_fn(char *base, int ftp, const t_commrec *cr,
 +                   gmx_bool bAppendSimId, gmx_bool bAppendNodeId,
 +                   char buf[], int bufsize)
 +{
 +    if ((size_t)bufsize < (strlen(base)+10))
 +    {
 +        gmx_mem("Character buffer too small!");
 +    }
 +
 +    /* Copy to buf, and strip extension */
 +    strcpy(buf, base);
 +    buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
 +
 +    if (bAppendSimId)
 +    {
 +        sprintf(buf+strlen(buf), "%d", cr->ms->sim);
 +    }
 +    if (bAppendNodeId)
 +    {
 +        strcat(buf, "_node");
 +        sprintf(buf+strlen(buf), "%d", cr->nodeid);
 +    }
 +    strcat(buf, ".");
 +
 +    /* Add extension again */
 +    strcat(buf, (ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
 +    if (debug)
 +    {
 +        fprintf(debug, "node %d par_fn '%s'\n", cr->nodeid, buf);
 +        if (fn2ftp(buf) == efLOG)
 +        {
 +            fprintf(debug, "log\n");
 +        }
 +    }
 +}
 +
 +void check_multi_int(FILE *log, const gmx_multisim_t *ms, int val,
 +                     const char *name,
 +                     gmx_bool bQuiet)
 +{
 +    int     *ibuf, p;
 +    gmx_bool bCompatible;
 +
 +    if (NULL != log && !bQuiet)
 +    {
 +        fprintf(log, "Multi-checking %s ... ", name);
 +    }
 +
 +    if (ms == NULL)
 +    {
 +        gmx_fatal(FARGS,
 +                  "check_multi_int called with a NULL communication pointer");
 +    }
 +
 +    snew(ibuf, ms->nsim);
 +    ibuf[ms->sim] = val;
 +    gmx_sumi_sim(ms->nsim, ibuf, ms);
 +
 +    bCompatible = TRUE;
 +    for (p = 1; p < ms->nsim; p++)
 +    {
 +        bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +    }
 +
 +    if (bCompatible)
 +    {
 +        if (NULL != log && !bQuiet)
 +        {
 +            fprintf(log, "OK\n");
 +        }
 +    }
 +    else
 +    {
 +        if (NULL != log)
 +        {
 +            fprintf(log, "\n%s is not equal for all subsystems\n", name);
 +            for (p = 0; p < ms->nsim; p++)
 +            {
 +                fprintf(log, "  subsystem %d: %d\n", p, ibuf[p]);
 +            }
 +        }
 +        gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->nsim);
 +    }
 +
 +    sfree(ibuf);
 +}
 +
 +void check_multi_int64(FILE *log, const gmx_multisim_t *ms,
 +                       gmx_int64_t val, const char *name,
 +                       gmx_bool bQuiet)
 +{
 +    gmx_int64_t      *ibuf;
 +    int               p;
 +    gmx_bool          bCompatible;
 +
 +    if (NULL != log && !bQuiet)
 +    {
 +        fprintf(log, "Multi-checking %s ... ", name);
 +    }
 +
 +    if (ms == NULL)
 +    {
 +        gmx_fatal(FARGS,
 +                  "check_multi_int called with a NULL communication pointer");
 +    }
 +
 +    snew(ibuf, ms->nsim);
 +    ibuf[ms->sim] = val;
 +    gmx_sumli_sim(ms->nsim, ibuf, ms);
 +
 +    bCompatible = TRUE;
 +    for (p = 1; p < ms->nsim; p++)
 +    {
 +        bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +    }
 +
 +    if (bCompatible)
 +    {
 +        if (NULL != log && !bQuiet)
 +        {
 +            fprintf(log, "OK\n");
 +        }
 +    }
 +    else
 +    {
 +        if (NULL != log)
 +        {
 +            fprintf(log, "\n%s is not equal for all subsystems\n", name);
 +            for (p = 0; p < ms->nsim; p++)
 +            {
 +                char strbuf[255];
 +                /* first make the format string */
 +                snprintf(strbuf, 255, "  subsystem %%d: %s\n",
 +                         "%" GMX_PRId64);
 +                fprintf(log, strbuf, p, ibuf[p]);
 +            }
 +        }
 +        gmx_fatal(FARGS, "The %d subsystems are not compatible\n", ms->nsim);
 +    }
 +
 +    sfree(ibuf);
 +}
 +
 +
- char *gmx_gethostname(char *name, size_t len)
++int gmx_gethostname(char *name, size_t len)
 +{
 +    if (len < 8)
 +    {
 +        gmx_incons("gmx_gethostname called with len<8");
 +    }
- #ifdef HAVE_UNISTD_H
++#if defined(HAVE_UNISTD_H) && !defined(__native_client__)
 +    if (gethostname(name, len-1) != 0)
 +    {
 +        strncpy(name, "unknown", 8);
++        return -1;
 +    }
++    return 0;
 +#else
 +    strncpy(name, "unknown", 8);
++    return -1;
 +#endif
- 
-     return name;
 +}
 +
 +
 +void gmx_log_open(const char *lognm, const t_commrec *cr, gmx_bool bMasterOnly,
 +                  gmx_bool bAppendFiles, FILE** fplog)
 +{
 +    int    len, pid;
 +    char   buf[256], host[256];
 +    time_t t;
 +    char   timebuf[STRLEN];
 +    FILE  *fp = *fplog;
 +    char  *tmpnm;
 +
 +    debug_gmx();
 +
 +    /* Communicate the filename for logfile */
 +    if (cr->nnodes > 1 && !bMasterOnly
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI the non-master log files are opened later
 +         * when the files names are already known on all nodes.
 +         */
 +        && FALSE
 +#endif
 +        )
 +    {
 +        if (MASTER(cr))
 +        {
 +            len = strlen(lognm) + 1;
 +        }
 +        gmx_bcast(sizeof(len), &len, cr);
 +        if (!MASTER(cr))
 +        {
 +            snew(tmpnm, len+8);
 +        }
 +        else
 +        {
 +            tmpnm = gmx_strdup(lognm);
 +        }
 +        gmx_bcast(len*sizeof(*tmpnm), tmpnm, cr);
 +    }
 +    else
 +    {
 +        tmpnm = gmx_strdup(lognm);
 +    }
 +
 +    debug_gmx();
 +
 +    if (!bMasterOnly && !MASTER(cr))
 +    {
 +        /* Since log always ends with '.log' let's use this info */
 +        par_fn(tmpnm, efLOG, cr, FALSE, !bMasterOnly, buf, 255);
 +        fp = gmx_fio_fopen(buf, bAppendFiles ? "a+" : "w+" );
 +    }
 +    else if (!bAppendFiles)
 +    {
 +        fp = gmx_fio_fopen(tmpnm, bAppendFiles ? "a+" : "w+" );
 +    }
 +
 +    sfree(tmpnm);
 +
 +    gmx_fatal_set_log_file(fp);
 +
 +    /* Get some machine parameters */
 +    gmx_gethostname(host, 256);
 +
 +    time(&t);
 +
 +#ifndef NO_GETPID
 +#   ifdef GMX_NATIVE_WINDOWS
 +    pid = _getpid();
 +#   else
 +    pid = getpid();
 +#   endif
 +#else
 +    pid = 0;
 +#endif
 +
 +    if (bAppendFiles)
 +    {
 +        fprintf(fp,
 +                "\n"
 +                "\n"
 +                "-----------------------------------------------------------\n"
 +                "Restarting from checkpoint, appending to previous log file.\n"
 +                "\n"
 +                );
 +    }
 +
 +    gmx_ctime_r(&t, timebuf, STRLEN);
 +
 +    fprintf(fp,
 +            "Log file opened on %s"
 +            "Host: %s  pid: %d  nodeid: %d  nnodes:  %d\n",
 +            timebuf, host, pid, cr->nodeid, cr->nnodes);
 +    try
 +    {
 +        gmx::BinaryInformationSettings settings;
 +        settings.extendedInfo(true);
 +        settings.copyright(!bAppendFiles);
 +        gmx::printBinaryInformation(fp, gmx::ProgramInfo::getInstance(), settings);
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +    fprintf(fp, "\n\n");
 +
 +    fflush(fp);
 +    debug_gmx();
 +
 +    *fplog = fp;
 +}
 +
 +void gmx_log_close(FILE *fp)
 +{
 +    if (fp)
 +    {
 +        gmx_fatal_set_log_file(NULL);
 +        gmx_fio_fclose(fp);
 +    }
 +}
 +
 +void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
 +                      int nfile, const t_filenm fnm[], gmx_bool bParFn)
 +{
 +    gmx_multisim_t *ms;
 +    int             nnodes, nnodpersim, sim, i, ftp;
 +    char            buf[256];
 +#ifdef GMX_MPI
 +    MPI_Group       mpi_group_world;
 +    int            *rank;
 +#endif
 +
 +#ifndef GMX_MPI
 +    if (nsim > 1)
 +    {
 +        gmx_fatal(FARGS, "This binary is compiled without MPI support, can not do multiple simulations.");
 +    }
 +#endif
 +
 +    nnodes  = cr->nnodes;
 +    if (nnodes % nsim != 0)
 +    {
 +        gmx_fatal(FARGS, "The number of nodes (%d) is not a multiple of the number of simulations (%d)", nnodes, nsim);
 +    }
 +
 +    nnodpersim = nnodes/nsim;
 +    sim        = cr->nodeid/nnodpersim;
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "We have %d simulations, %d nodes per simulation, local simulation is %d\n", nsim, nnodpersim, sim);
 +    }
 +
 +    snew(ms, 1);
 +    cr->ms   = ms;
 +    ms->nsim = nsim;
 +    ms->sim  = sim;
 +#ifdef GMX_MPI
 +    /* Create a communicator for the master nodes */
 +    snew(rank, ms->nsim);
 +    for (i = 0; i < ms->nsim; i++)
 +    {
 +        rank[i] = i*nnodpersim;
 +    }
 +    MPI_Comm_group(MPI_COMM_WORLD, &mpi_group_world);
 +    MPI_Group_incl(mpi_group_world, nsim, rank, &ms->mpi_group_masters);
 +    sfree(rank);
 +    MPI_Comm_create(MPI_COMM_WORLD, ms->mpi_group_masters,
 +                    &ms->mpi_comm_masters);
 +
 +#if !defined(MPI_IN_PLACE_EXISTS)
 +    /* initialize the MPI_IN_PLACE replacement buffers */
 +    snew(ms->mpb, 1);
 +    ms->mpb->ibuf        = NULL;
 +    ms->mpb->libuf       = NULL;
 +    ms->mpb->fbuf        = NULL;
 +    ms->mpb->dbuf        = NULL;
 +    ms->mpb->ibuf_alloc  = 0;
 +    ms->mpb->libuf_alloc = 0;
 +    ms->mpb->fbuf_alloc  = 0;
 +    ms->mpb->dbuf_alloc  = 0;
 +#endif
 +
 +#endif
 +
 +    /* Reduce the intra-simulation communication */
 +    cr->sim_nodeid = cr->nodeid % nnodpersim;
 +    cr->nnodes     = nnodpersim;
 +#ifdef GMX_MPI
 +    MPI_Comm_split(MPI_COMM_WORLD, sim, cr->sim_nodeid, &cr->mpi_comm_mysim);
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid           = cr->sim_nodeid;
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug, "This is simulation %d", cr->ms->sim);
 +        if (PAR(cr))
 +        {
 +            fprintf(debug, ", local number of nodes %d, local nodeid %d",
 +                    cr->nnodes, cr->sim_nodeid);
 +        }
 +        fprintf(debug, "\n\n");
 +    }
 +
 +    if (multidirs)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug, "Changing to directory %s\n", multidirs[cr->ms->sim]);
 +        }
 +        gmx_chdir(multidirs[cr->ms->sim]);
 +    }
 +    else if (bParFn)
 +    {
 +        /* Patch output and tpx, cpt and rerun input file names */
 +        for (i = 0; (i < nfile); i++)
 +        {
 +            /* Because of possible multiple extensions per type we must look
 +             * at the actual file name
 +             */
 +            if (is_output(&fnm[i]) ||
 +                fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
 +                strcmp(fnm[i].opt, "-rerun") == 0)
 +            {
 +                ftp = fn2ftp(fnm[i].fns[0]);
 +                par_fn(fnm[i].fns[0], ftp, cr, TRUE, FALSE, buf, 255);
 +                sfree(fnm[i].fns[0]);
 +                fnm[i].fns[0] = gmx_strdup(buf);
 +            }
 +        }
 +    }
 +}
diff --cc src/gromacs/gmxlib/string2.c
index 405111ee9d,0000000000..f3d79ada1f
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/string2.c
+++ b/src/gromacs/gmxlib/string2.c
@@@ -1,670 -1,0 +1,671 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <errno.h>
 +#include <sys/types.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef HAVE_PWD_H
 +#include <pwd.h>
 +#endif
 +#include <time.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
++#include "main.h"
 +#include "string2.h"
 +#include "gromacs/fileio/futil.h"
 +
 +int continuing(char *s)
 +{
 +    int sl;
 +    assert(s);
 +
 +    rtrim(s);
 +    sl = strlen(s);
 +    if ((sl > 0) && (s[sl-1] == CONTINUE))
 +    {
 +        s[sl-1] = 0;
 +        return TRUE;
 +    }
 +    else
 +    {
 +        return FALSE;
 +    }
 +}
 +
 +
 +
 +char *fgets2(char *line, int n, FILE *stream)
 +{
 +    char *c;
 +    if (fgets(line, n, stream) == NULL)
 +    {
 +        return NULL;
 +    }
 +    if ((c = strchr(line, '\n')) != NULL)
 +    {
 +        *c = '\0';
 +    }
 +    else
 +    {
 +        /* A line not ending in a newline can only occur at the end of a file,
 +         * or because of n being too small.
 +         * Since both cases occur very infrequently, we can check for EOF.
 +         */
 +        if (!gmx_eof(stream))
 +        {
 +            gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
 +        }
 +    }
 +    if ((c = strchr(line, '\r')) != NULL)
 +    {
 +        *c = '\0';
 +    }
 +
 +    return line;
 +}
 +
 +void strip_comment (char *line)
 +{
 +    char *c;
 +
 +    if (!line)
 +    {
 +        return;
 +    }
 +
 +    /* search for a comment mark and replace it by a zero */
 +    if ((c = strchr(line, COMMENTSIGN)) != NULL)
 +    {
 +        (*c) = 0;
 +    }
 +}
 +
 +void upstring (char *str)
 +{
 +    int i;
 +
 +    for (i = 0; (i < (int)strlen(str)); i++)
 +    {
 +        str[i] = toupper(str[i]);
 +    }
 +}
 +
 +void ltrim (char *str)
 +{
 +    char *tr;
 +    int   i, c;
 +
 +    if (NULL == str)
 +    {
 +        return;
 +    }
 +
 +    c = 0;
 +    while (('\0' != str[c]) && isspace(str[c]))
 +    {
 +        c++;
 +    }
 +    if (c > 0)
 +    {
 +        for (i = c; ('\0' != str[i]); i++)
 +        {
 +            str[i-c] = str[i];
 +        }
 +        str[i-c] = '\0';
 +    }
 +}
 +
 +void rtrim (char *str)
 +{
 +    int nul;
 +
 +    if (NULL == str)
 +    {
 +        return;
 +    }
 +
 +    nul = strlen(str)-1;
 +    while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 +    {
 +        str[nul] = '\0';
 +        nul--;
 +    }
 +}
 +
 +void trim (char *str)
 +{
 +    ltrim (str);
 +    rtrim (str);
 +}
 +
 +char *
 +gmx_ctime_r(const time_t *clock, char *buf, int n)
 +{
 +    char tmpbuf[STRLEN];
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +    /* Windows */
 +    ctime_s( tmpbuf, STRLEN, clock );
 +#elif (defined(__sun))
 +    /*Solaris*/
 +    ctime_r(clock, tmpbuf, n);
 +#else
 +    ctime_r(clock, tmpbuf);
 +#endif
 +    strncpy(buf, tmpbuf, n-1);
 +    buf[n-1] = '\0';
 +
 +    return buf;
 +}
 +
 +void nice_header (FILE *out, const char *fn)
 +{
 +    const char    *unk = "onbekend";
 +    time_t         clock;
 +    const char    *user = unk;
 +    int            gh;
 +#ifdef HAVE_PWD_H
 +    uid_t          uid;
 +#else
 +    int            uid;
 +#endif
 +    char           buf[256] = "";
 +    char           timebuf[STRLEN];
 +#ifdef HAVE_PWD_H
 +    struct passwd *pw;
 +#endif
 +
 +    /* Print a nice header above the file */
 +    time(&clock);
 +    fprintf (out, "%c\n", COMMENTSIGN);
 +    fprintf (out, "%c\tFile '%s' was generated\n", COMMENTSIGN, fn ? fn : unk);
 +
 +#ifdef HAVE_PWD_H
 +    uid  = getuid();
 +    pw   = getpwuid(uid);
-     gh   = gethostname(buf, 255);
++    gh   = gmx_gethostname(buf, 255);
 +    /* pw returns null on error (e.g. compute nodes lack /etc/passwd) */
 +    user = pw ? pw->pw_name : unk;
 +#else
 +    uid = 0;
 +    gh  = -1;
 +#endif
 +
 +    gmx_ctime_r(&clock, timebuf, STRLEN);
 +    fprintf (out, "%c\tBy user: %s (%d)\n", COMMENTSIGN,
 +             user ? user : unk, (int) uid);
 +    fprintf(out, "%c\tOn host: %s\n", COMMENTSIGN, (gh == 0) ? buf : unk);
 +
 +    fprintf (out, "%c\tAt date: %s", COMMENTSIGN, timebuf);
 +    fprintf (out, "%c\n", COMMENTSIGN);
 +}
 +
 +
 +int gmx_strcasecmp_min(const char *str1, const char *str2)
 +{
 +    char ch1, ch2;
 +
 +    do
 +    {
 +        do
 +        {
 +            ch1 = toupper(*(str1++));
 +        }
 +        while ((ch1 == '-') || (ch1 == '_'));
 +        do
 +        {
 +            ch2 = toupper(*(str2++));
 +        }
 +        while ((ch2 == '-') || (ch2 == '_'));
 +
 +        if (ch1 != ch2)
 +        {
 +            return (ch1-ch2);
 +        }
 +    }
 +    while (ch1);
 +    return 0;
 +}
 +
 +int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 +{
 +    char  ch1, ch2;
 +    char *stri1, *stri2;
 +
 +    stri1 = (char *)str1;
 +    stri2 = (char *)str2;
 +    do
 +    {
 +        do
 +        {
 +            ch1 = toupper(*(str1++));
 +        }
 +        while ((ch1 == '-') || (ch1 == '_'));
 +        do
 +        {
 +            ch2 = toupper(*(str2++));
 +        }
 +        while ((ch2 == '-') || (ch2 == '_'));
 +
 +        if (ch1 != ch2)
 +        {
 +            return (ch1-ch2);
 +        }
 +    }
 +    while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 +    return 0;
 +}
 +
 +int gmx_strcasecmp(const char *str1, const char *str2)
 +{
 +    char ch1, ch2;
 +
 +    do
 +    {
 +        ch1 = toupper(*(str1++));
 +        ch2 = toupper(*(str2++));
 +        if (ch1 != ch2)
 +        {
 +            return (ch1-ch2);
 +        }
 +    }
 +    while (ch1);
 +    return 0;
 +}
 +
 +int gmx_strncasecmp(const char *str1, const char *str2, int n)
 +{
 +    char ch1, ch2;
 +
 +    if (n == 0)
 +    {
 +        return 0;
 +    }
 +
 +    do
 +    {
 +        ch1 = toupper(*(str1++));
 +        ch2 = toupper(*(str2++));
 +        if (ch1 != ch2)
 +        {
 +            return (ch1-ch2);
 +        }
 +        n--;
 +    }
 +    while (ch1 && n);
 +    return 0;
 +}
 +
 +char *gmx_strdup(const char *src)
 +{
 +    char *dest;
 +
 +    snew(dest, strlen(src)+1);
 +    strcpy(dest, src);
 +
 +    return dest;
 +}
 +
 +char *
 +gmx_strndup(const char *src, int n)
 +{
 +    int   len;
 +    char *dest;
 +
 +    len = strlen(src);
 +    if (len > n)
 +    {
 +        len = n;
 +    }
 +    snew(dest, len+1);
 +    strncpy(dest, src, len);
 +    dest[len] = 0;
 +    return dest;
 +}
 +
 +/* Magic hash init number for Dan J. Bernsteins algorithm.
 + * Do NOT use any other value unless you really know what you are doing.
 + */
 +const unsigned int
 +    gmx_string_hash_init = 5381;
 +
 +
 +unsigned int
 +gmx_string_fullhash_func(const char *s, unsigned int hash_init)
 +{
 +    int c;
 +
 +    while ((c = (*s++)) != '\0')
 +    {
 +        hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 +    }
 +    return hash_init;
 +}
 +
 +unsigned int
 +gmx_string_hash_func(const char *s, unsigned int hash_init)
 +{
 +    int c;
 +
 +    while ((c = toupper(*s++)) != '\0')
 +    {
 +        if (isalnum(c))
 +        {
 +            hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 +        }
 +    }
 +    return hash_init;
 +}
 +
 +int
 +gmx_wcmatch(const char *pattern, const char *str)
 +{
 +    while (*pattern)
 +    {
 +        if (*pattern == '*')
 +        {
 +            /* Skip multiple wildcards in a sequence */
 +            while (*pattern == '*' || *pattern == '?')
 +            {
 +                ++pattern;
 +                /* For ?, we need to check that there are characters left
 +                 * in str. */
 +                if (*pattern == '?')
 +                {
 +                    if (*str == 0)
 +                    {
 +                        return GMX_NO_WCMATCH;
 +                    }
 +                    else
 +                    {
 +                        ++str;
 +                    }
 +                }
 +            }
 +            /* If the pattern ends after the star, we have a match */
 +            if (*pattern == 0)
 +            {
 +                return 0;
 +            }
 +            /* Match the rest against each possible suffix of str */
 +            while (*str)
 +            {
 +                /* Only do the recursive call if the first character
 +                 * matches. We don't have to worry about wildcards here,
 +                 * since we have processed them above. */
 +                if (*pattern == *str)
 +                {
 +                    int rc;
 +                    /* Match the suffix, and return if a match or an error */
 +                    rc = gmx_wcmatch(pattern, str);
 +                    if (rc != GMX_NO_WCMATCH)
 +                    {
 +                        return rc;
 +                    }
 +                }
 +                ++str;
 +            }
 +            /* If no suffix of str matches, we don't have a match */
 +            return GMX_NO_WCMATCH;
 +        }
 +        else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 +        {
 +            ++str;
 +        }
 +        else
 +        {
 +            return GMX_NO_WCMATCH;
 +        }
 +        ++pattern;
 +    }
 +    /* When the pattern runs out, we have a match if the string has ended. */
 +    return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 +}
 +
 +char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 +{
 +    char    *b2;
 +    int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 +    gmx_bool bFirst, bFitsOnLine;
 +
 +    /* characters are copied from buf to b2 with possible spaces changed
 +     * into newlines and extra space added for indentation.
 +     * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 +     * i0 points to the beginning of the current line (in buf, source)
 +     * lspace and l2space point to the last space on the current line
 +     * bFirst is set to prevent indentation of first line
 +     * bFitsOnLine says if the first space occurred before line_width, if
 +     * that is not the case, we have a word longer than line_width which
 +     * will also not fit on the next line, so we might as well keep it on
 +     * the current line (where it also won't fit, but looks better)
 +     */
 +
 +    b2    = NULL;
 +    b2len = strlen(buf)+1+indent;
 +    snew(b2, b2len);
 +    i0 = i2 = 0;
 +    if (bIndentFirst)
 +    {
 +        for (i2 = 0; (i2 < indent); i2++)
 +        {
 +            b2[i2] = ' ';
 +        }
 +    }
 +    bFirst = TRUE;
 +    do
 +    {
 +        l2space = -1;
 +        /* find the last space before end of line */
 +        for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 +        {
 +            b2[i2++] = buf[i];
 +            /* remember the position of a space */
 +            if (buf[i] == ' ')
 +            {
 +                lspace  = i;
 +                l2space = i2-1;
 +            }
 +            /* if we have a newline before the line is full, reset counters */
 +            if (buf[i] == '\n' && buf[i+1])
 +            {
 +                i0     = i+1;
 +                b2len += indent;
 +                srenew(b2, b2len);
 +                /* add indentation after the newline */
 +                for (j = 0; (j < indent); j++)
 +                {
 +                    b2[i2++] = ' ';
 +                }
 +            }
 +        }
 +        /* If we are at the last newline, copy it */
 +        if (buf[i] == '\n' && !buf[i+1])
 +        {
 +            b2[i2++] = buf[i++];
 +        }
 +        /* if we're not at the end of the string */
 +        if (buf[i])
 +        {
 +            /* check if one word does not fit on the line */
 +            bFitsOnLine = (i-i0 <= line_width);
 +            /* reset line counters to just after the space */
 +            i0 = lspace+1;
 +            i2 = l2space+1;
 +            /* if the words fit on the line, and we're beyond the indentation part */
 +            if ( (bFitsOnLine) && (l2space >= indent) )
 +            {
 +                /* start a new line */
 +                b2[l2space] = '\n';
 +                /* and add indentation */
 +                if (indent)
 +                {
 +                    if (bFirst)
 +                    {
 +                        line_width -= indent;
 +                        bFirst      = FALSE;
 +                    }
 +                    b2len += indent;
 +                    srenew(b2, b2len);
 +                    for (j = 0; (j < indent); j++)
 +                    {
 +                        b2[i2++] = ' ';
 +                    }
 +                    /* no extra spaces after indent; */
 +                    while (buf[i0] == ' ')
 +                    {
 +                        i0++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    while (buf[i]);
 +    b2[i2] = '\0';
 +
 +    return b2;
 +}
 +
 +char **split(char sep, const char *str)
 +{
 +    char **ptr = NULL;
 +    int    n, nn, nptr = 0;
 +
 +    if (str == NULL)
 +    {
 +        return NULL;
 +    }
 +    nn = strlen(str);
 +    for (n = 0; (n < nn); n++)
 +    {
 +        if (str[n] == sep)
 +        {
 +            nptr++;
 +        }
 +    }
 +    snew(ptr, nptr+2);
 +    nptr = 0;
 +    while (*str != '\0')
 +    {
 +        while ((*str != '\0') && (*str == sep))
 +        {
 +            str++;
 +        }
 +        if (*str != '\0')
 +        {
 +            snew(ptr[nptr], 1+strlen(str));
 +            n = 0;
 +            while ((*str != '\0') && (*str != sep))
 +            {
 +                ptr[nptr][n] = *str;
 +                str++;
 +                n++;
 +            }
 +            ptr[nptr][n] = '\0';
 +            nptr++;
 +        }
 +    }
 +    ptr[nptr] = NULL;
 +
 +    return ptr;
 +}
 +
 +gmx_int64_t
 +str_to_int64_t(const char *str, char **endptr)
 +{
 +#ifndef _MSC_VER
 +    return strtoll(str, endptr, 10);
 +#else
 +    return _strtoi64(str, endptr, 10);
 +#endif
 +}
 +
 +char *gmx_strsep(char **stringp, const char *delim)
 +{
 +    char *ret;
 +    int   len = strlen(delim);
 +    int   i, j = 0;
 +    int   found = 0;
 +
 +    if (!*stringp)
 +    {
 +        return NULL;
 +    }
 +    ret = *stringp;
 +    do
 +    {
 +        if ( (*stringp)[j] == '\0')
 +        {
 +            found    = 1;
 +            *stringp = NULL;
 +            break;
 +        }
 +        for (i = 0; i < len; i++)
 +        {
 +            if ( (*stringp)[j] == delim[i])
 +            {
 +                (*stringp)[j] = '\0';
 +                *stringp      = *stringp+j+1;
 +                found         = 1;
 +                break;
 +            }
 +        }
 +        j++;
 +    }
 +    while (!found);
 +
 +    return ret;
 +}
diff --cc src/gromacs/gmxlib/thread_mpi/pthreads.c
index 1c62caf0e3,0000000000..8da1df6751
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/thread_mpi/pthreads.c
+++ b/src/gromacs/gmxlib/thread_mpi/pthreads.c
@@@ -1,943 -1,0 +1,946 @@@
 +/*
 +   This source code file is part of thread_mpi.
 +   Written by Sander Pronk, Erik Lindahl, and possibly others.
 +
 +   Copyright (c) 2009, Sander Pronk, Erik Lindahl.
 +   All rights reserved.
 +
 +   Redistribution and use in source and binary forms, with or without
 +   modification, are permitted provided that the following conditions are met:
 +   1) Redistributions of source code must retain the above copyright
 +   notice, this list of conditions and the following disclaimer.
 +   2) Redistributions in binary form must reproduce the above copyright
 +   notice, this list of conditions and the following disclaimer in the
 +   documentation and/or other materials provided with the distribution.
 +   3) Neither the name of the copyright holders nor the
 +   names of its contributors may be used to endorse or promote products
 +   derived from this software without specific prior written permission.
 +
 +   THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
 +   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 +   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 +   DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
 +   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 +   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 +   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 +   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 +   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 +
 +   If you want to redistribute modifications, please consider that
 +   scientific software is very special. Version control is crucial -
 +   bugs must be traceable. We will be happy to consider code for
 +   inclusion in the official distribution, but derived work should not
 +   be called official thread_mpi. Details are found in the README & COPYING
 +   files.
 + */
 +
 +
 +
 +/* Include the defines that determine which thread library to use.
 + * We do not use HAVE_PTHREAD_H directly, since we might want to
 + * turn off thread support explicity (e.g. for debugging).
 + */
 +#ifdef HAVE_TMPI_CONFIG_H
 +#include "tmpi_config.h"
 +#endif
 +
 +#ifdef HAVE_CONFIG_H
 +#include "config.h"
 +#endif
 +
 +
 +#ifdef THREAD_PTHREADS
 +
 +#ifdef HAVE_PTHREAD_SETAFFINITY
 +#define _GNU_SOURCE
 +#endif
 +
 +/* pthread.h must be the first header, apart from the defines in config.h */
 +#include <pthread.h>
 +
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#include <errno.h>
 +#include <stdlib.h>
 +#include <stdio.h>
 +#include <stdarg.h>
 +
 +#include "thread_mpi/atomic.h"
 +#include "thread_mpi/threads.h"
 +#include "impl.h"
 +#include "unused.h"
 +
 +#include "pthreads.h"
 +
 +/* mutex for initializing mutexes */
 +static pthread_mutex_t mutex_init = PTHREAD_MUTEX_INITIALIZER;
 +/* mutex for initializing barriers */
 +static pthread_mutex_t once_init = PTHREAD_MUTEX_INITIALIZER;
 +/* mutex for initializing thread_conds */
 +static pthread_mutex_t cond_init = PTHREAD_MUTEX_INITIALIZER;
 +/* mutex for initializing barriers */
 +static pthread_mutex_t barrier_init = PTHREAD_MUTEX_INITIALIZER;
 +
 +/* mutex for managing  thread IDs */
 +static pthread_mutex_t thread_id_mutex = PTHREAD_MUTEX_INITIALIZER;
 +static pthread_key_t   thread_id_key;
 +static int             thread_id_key_initialized = 0;
 +
 +
 +
 +
 +enum tMPI_Thread_support tMPI_Thread_support(void)
 +{
 +    return TMPI_THREAD_SUPPORT_YES;
 +}
 +
 +
 +int tMPI_Thread_get_hw_number(void)
 +{
 +    int ret = 0;
 +#ifdef HAVE_SYSCONF
 +#if defined(_SC_NPROCESSORS_ONLN)
 +    ret = sysconf(_SC_NPROCESSORS_ONLN);
 +#elif defined(_SC_NPROC_ONLN)
 +    ret = sysconf(_SC_NPROC_ONLN);
 +#elif defined(_SC_NPROCESSORS_CONF)
 +    ret = sysconf(_SC_NPROCESSORS_CONF);
 +#elif defined(_SC_NPROC_CONF)
 +    ret = sysconf(_SC_NPROC_CONF);
 +#endif
 +#endif
 +
 +    return ret;
 +}
 +
 +/* destructor for thread ids */
 +static void tMPI_Destroy_thread_id(void* thread_id)
 +{
 +    struct tMPI_Thread *thread = (struct tMPI_Thread*)thread_id;
 +    if (!thread->started_by_tmpi)
 +    {
 +        /* if the thread is started by tMPI, it must be freed in the join()
 +           call. */
 +        free(thread_id);
 +    }
 +}
 +
 +
 +/* Set the thread id var for this thread
 +    Returns a pointer to the thread object if succesful, NULL if ENOMEM */
 +static struct tMPI_Thread* tMPI_Set_thread_id_key(int started_by_tmpi)
 +{
 +    struct tMPI_Thread *th;
 +
 +    th = (struct tMPI_Thread*)malloc(sizeof(struct tMPI_Thread)*1);
 +    if (th == NULL)
 +    {
 +        return NULL;
 +    }
 +    th->th              = pthread_self();
 +    th->started_by_tmpi = started_by_tmpi;
 +    /* we ignore errors because any thread that needs this value will
 +       re-generate it in the next iteration. */
 +    pthread_setspecific(thread_id_key, th);
 +    return th;
 +}
 +
 +/* initialize the thread id vars if not already initialized */
 +static int tMPI_Init_thread_ids(void)
 +{
 +    int ret;
 +    ret = pthread_mutex_lock( &thread_id_mutex );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    if (!thread_id_key_initialized)
 +    {
 +        /* initialize and set the thread id thread-specific variable */
 +        struct tMPI_Thread *th;
 +
 +        thread_id_key_initialized = 1;
 +        ret = pthread_key_create(&thread_id_key, tMPI_Destroy_thread_id);
 +        if (ret != 0)
 +        {
 +            goto err;
 +        }
 +        th = tMPI_Set_thread_id_key(0);
 +        if (th == NULL)
 +        {
 +            ret = ENOMEM;
 +            goto err;
 +        }
 +    }
 +
 +    ret = pthread_mutex_unlock( &thread_id_mutex );
 +    return ret;
 +err:
 +    pthread_mutex_unlock( &thread_id_mutex );
 +    return ret;
 +}
 +
 +/* structure to hold the arguments for the thread_starter function */
 +struct tMPI_Thread_starter
 +{
 +    struct tMPI_Thread *thread;
 +    void               *(*start_routine)(void*);
 +    void               *arg;
 +    pthread_mutex_t     cond_lock; /* lock for initialization of thread
 +                                      structure */
 +};
 +
 +/* the thread_starter function that sets the thread id */
 +static void *tMPI_Thread_starter(void *arg)
 +{
 +    struct tMPI_Thread_starter *starter = (struct tMPI_Thread_starter *)arg;
 +    void *(*start_routine)(void*);
 +    void *parg;
 +    int   ret;
 +
 +    /* first wait for the parent thread to signal that the starter->thread
 +       structure is ready. That's done by unlocking the starter->cond_lock */
 +    ret = pthread_mutex_lock(&(starter->cond_lock));
 +    if (ret != 0)
 +    {
 +        return NULL;
 +    }
 +    ret = pthread_mutex_unlock(&(starter->cond_lock));
 +    if (ret != 0)
 +    {
 +        return NULL;
 +    }
 +
 +    /* now remember the tMPI_thread_t structure for this thread */
 +    ret = pthread_setspecific(thread_id_key, starter->thread);
 +    if (ret != 0)
 +    {
 +        return NULL;
 +    }
 +    start_routine = starter->start_routine;
 +    parg          = starter->arg;
 +
 +    /* deallocate the starter structure. Errors here are non-fatal. */
 +    pthread_mutex_destroy(&(starter->cond_lock));
 +    free(starter);
 +    return (*start_routine)(parg);
 +}
 +
 +int tMPI_Thread_create(tMPI_Thread_t *thread, void *(*start_routine)(void *),
 +                       void *arg)
 +{
 +    int ret;
 +    struct tMPI_Thread_starter *starter;
 +
 +    if (thread == NULL)
 +    {
 +        return EINVAL;
 +    }
 +    tMPI_Init_thread_ids();
 +
 +    *thread = (struct tMPI_Thread*)malloc(sizeof(struct tMPI_Thread)*1);
 +    if (*thread == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +    (*thread)->started_by_tmpi = 1;
 +    starter                    = (struct tMPI_Thread_starter*)
 +        malloc(sizeof(struct tMPI_Thread_starter)*1);
 +    if (starter == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +    /* fill the starter structure */
 +    starter->thread        = *thread;
 +    starter->start_routine = start_routine;
 +    starter->arg           = arg;
 +
 +    ret = pthread_mutex_init(&(starter->cond_lock), NULL);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    /* now lock the mutex so we can unlock it once we know the data in
 +       thread->th is safe. */
 +    ret = pthread_mutex_lock(&(starter->cond_lock));
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    ret = pthread_create(&((*thread)->th), NULL, tMPI_Thread_starter,
 +                         (void*)starter);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    /* Here we know thread->th is safe. */
 +    ret = pthread_mutex_unlock(&(starter->cond_lock));
 +
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_join(tMPI_Thread_t thread, void **value_ptr)
 +{
 +    int       ret;
 +    pthread_t th = thread->th;
 +
 +    ret = pthread_join( th, value_ptr );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    free(thread);
 +    return 0;
 +}
 +
 +
 +tMPI_Thread_t tMPI_Thread_self(void)
 +{
 +    tMPI_Thread_t th;
 +    int           ret;
 +
 +    /* make sure the key var is set */
 +    ret = tMPI_Init_thread_ids();
 +    if (ret != 0)
 +    {
 +        return NULL;
 +    }
 +
 +    th = pthread_getspecific(thread_id_key);
 +
 +    /* check if it is already in our list */
 +    if (th == NULL)
 +    {
 +        th = tMPI_Set_thread_id_key(0);
 +    }
 +    return th;
 +}
 +
 +int tMPI_Thread_equal(tMPI_Thread_t t1, tMPI_Thread_t t2)
 +{
 +    return pthread_equal(t1->th, t2->th);
 +}
 +
 +
 +enum tMPI_Thread_setaffinity_support tMPI_Thread_setaffinity_support(void)
 +{
 +#ifdef HAVE_PTHREAD_SETAFFINITY
 +    cpu_set_t set;
 +    int       ret;
 +
 +    /* run getaffinity to check whether we get back ENOSYS */
 +    ret = pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
 +    if (ret == 0)
 +    {
 +        return TMPI_SETAFFINITY_SUPPORT_YES;
 +    }
 +    else
 +    {
 +        return TMPI_SETAFFINITY_SUPPORT_NO;
 +    }
 +#else
 +    return TMPI_SETAFFINITY_SUPPORT_NO;
 +#endif
 +}
 +
 +
 +/* set thread's own affinity to a processor number n */
 +int tMPI_Thread_setaffinity_single(tMPI_Thread_t tmpi_unused thread,
 +                                   unsigned int  tmpi_unused nr)
 +{
 +#ifdef HAVE_PTHREAD_SETAFFINITY
 +    int       nt = tMPI_Thread_get_hw_number();
 +    cpu_set_t set;
 +
 +    if (nt < nr)
 +    {
 +        return TMPI_ERR_PROCNR;
 +    }
 +
 +    CPU_ZERO(&set);
 +    CPU_SET(nr, &set);
 +    return pthread_setaffinity_np(thread->th, sizeof(set), &set);
 +#endif
 +    return 0;
 +}
 +
 +
 +
 +
 +int tMPI_Thread_mutex_init(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    if (mtx == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    mtx->mutex = (struct tMPI_Mutex*)malloc(sizeof(struct tMPI_Mutex)*1);
 +    if (mtx->mutex == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +    ret = pthread_mutex_init(&(mtx->mutex->mtx), NULL);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +#ifndef TMPI_NO_ATOMICS
 +    tMPI_Atomic_set(&(mtx->initialized), 1);
 +#else
 +    mtx->initialized.value = 1;
 +#endif
 +    return 0;
 +}
 +
 +static inline int tMPI_Thread_mutex_init_once(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret = 0;
 +
 +#ifndef TMPI_NO_ATOMICS
 +    /* check whether the mutex is initialized */
 +    if (tMPI_Atomic_get( &(mtx->initialized)  ) == 0)
 +#endif
 +    {
 +        /* we're relying on the memory barrier semantics of mutex_lock/unlock
 +           for the check preceding this function call to have worked */
 +        ret = pthread_mutex_lock( &(mutex_init) );
 +        if (ret != 0)
 +        {
 +            return ret;
 +        }
 +
 +        if (mtx->mutex == NULL)
 +        {
 +            mtx->mutex = (struct tMPI_Mutex*)malloc(sizeof(struct tMPI_Mutex));
 +            if (mtx->mutex == NULL)
 +            {
 +                ret = ENOMEM;
 +                goto err;
 +            }
 +            ret = pthread_mutex_init( &(mtx->mutex->mtx), NULL);
 +            if (ret != 0)
 +            {
 +                goto err;
 +            }
 +        }
++        ret = pthread_mutex_unlock( &(mutex_init) );
 +    }
-     ret = pthread_mutex_unlock( &(mutex_init) );
 +    return ret;
 +err:
 +    pthread_mutex_unlock( &(mutex_init) );
 +    return ret;
 +}
 +
 +
 +int tMPI_Thread_mutex_destroy(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    if (mtx == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    ret = pthread_mutex_destroy( &(mtx->mutex->mtx) );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    free(mtx->mutex);
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_mutex_lock(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    /* check whether the mutex is initialized */
 +    ret = tMPI_Thread_mutex_init_once(mtx);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    ret = pthread_mutex_lock(&(mtx->mutex->mtx));
 +    return ret;
 +}
 +
 +
 +
 +
 +int tMPI_Thread_mutex_trylock(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    /* check whether the mutex is initialized */
 +    ret = tMPI_Thread_mutex_init_once(mtx);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    ret = pthread_mutex_trylock(&(mtx->mutex->mtx));
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_mutex_unlock(tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    /* check whether the mutex is initialized */
 +    ret = tMPI_Thread_mutex_init_once(mtx);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    ret = pthread_mutex_unlock(&(mtx->mutex->mtx));
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *))
 +{
 +    int ret;
 +
 +    if (key == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +
 +    key->key = (struct tMPI_Thread_key*)malloc(sizeof(struct
 +                                                      tMPI_Thread_key)*1);
 +    if (key->key == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +    ret = pthread_key_create(&((key)->key->pkey), destructor);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    tMPI_Atomic_set(&(key->initialized), 1);
 +    return 0;
 +}
 +
 +
 +int tMPI_Thread_key_delete(tMPI_Thread_key_t key)
 +{
 +    int ret;
 +
 +    ret = pthread_key_delete((key.key->pkey));
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    free(key.key);
 +
 +    return 0;
 +}
 +
 +
 +
 +void * tMPI_Thread_getspecific(tMPI_Thread_key_t key)
 +{
 +    void *p = NULL;
 +
 +    p = pthread_getspecific((key.key->pkey));
 +
 +    return p;
 +}
 +
 +
 +int tMPI_Thread_setspecific(tMPI_Thread_key_t key, void *value)
 +{
 +    int ret;
 +
 +    ret = pthread_setspecific((key.key->pkey), value);
 +
 +    return ret;
 +}
 +
 +
 +int tMPI_Thread_once(tMPI_Thread_once_t *once_control,
 +                     void                (*init_routine)(void))
 +{
 +    int ret;
 +    if (!once_control || !init_routine)
 +    {
 +        return EINVAL;
 +    }
 +
 +    /* really ugly hack - and it's slow... */
 +    ret = pthread_mutex_lock( &once_init );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    if (tMPI_Atomic_get(&(once_control->once)) == 0)
 +    {
 +        (*init_routine)();
 +        tMPI_Atomic_set(&(once_control->once), 1);
 +    }
 +    ret = pthread_mutex_unlock( &once_init );
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +int tMPI_Thread_cond_init(tMPI_Thread_cond_t *cond)
 +{
 +    int ret;
 +
 +    if (cond == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    cond->condp = (struct tMPI_Thread_cond*)malloc(
 +                sizeof(struct tMPI_Thread_cond));
 +    if (cond->condp == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +
 +    ret = pthread_cond_init(&(cond->condp->cond), NULL);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    tMPI_Atomic_set(&(cond->initialized), 1);
 +    tMPI_Atomic_memory_barrier();
 +
 +    return 0;
 +}
 +
 +
 +static int tMPI_Thread_cond_init_once(tMPI_Thread_cond_t *cond)
 +{
 +    int ret = 0;
 +
 +    /* we're relying on the memory barrier semantics of mutex_lock/unlock
 +       for the check preceding this function call to have worked */
 +    ret = pthread_mutex_lock( &(cond_init) );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    if (cond->condp == NULL)
 +    {
 +        cond->condp = (struct tMPI_Thread_cond*)
 +            malloc(sizeof(struct tMPI_Thread_cond)*1);
 +        if (cond->condp == NULL)
 +        {
 +            ret = ENOMEM;
 +            goto err;
 +        }
 +        ret = pthread_cond_init( &(cond->condp->cond), NULL);
 +        if (ret != 0)
 +        {
 +            goto err;
 +        }
 +    }
 +    ret = pthread_mutex_unlock( &(cond_init) );
 +    return ret;
 +err:
 +    /* try to unlock anyway */
 +    pthread_mutex_unlock( &(cond_init) );
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_cond_destroy(tMPI_Thread_cond_t *cond)
 +{
 +    int ret;
 +
 +    if (cond == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    ret = pthread_cond_destroy(&(cond->condp->cond));
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    free(cond->condp);
 +
 +    return 0;
 +}
 +
 +
 +int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
 +{
 +    int ret;
 +
 +    /* check whether the condition is initialized */
 +    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
 +    {
 +        ret = tMPI_Thread_cond_init_once(cond);
 +        if (ret != 0)
 +        {
 +            return ret;
 +        }
 +    }
 +    /* the mutex must have been initialized because it should be locked here */
 +
 +    ret = pthread_cond_wait( &(cond->condp->cond), &(mtx->mutex->mtx) );
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +int tMPI_Thread_cond_signal(tMPI_Thread_cond_t *cond)
 +{
 +    int ret;
 +
 +    /* check whether the condition is initialized */
 +    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
 +    {
 +        ret = tMPI_Thread_cond_init_once(cond);
 +        if (ret != 0)
 +        {
 +            return ret;
 +        }
 +    }
 +
 +    ret = pthread_cond_signal( &(cond->condp->cond) );
 +
 +    return ret;
 +}
 +
 +
 +
 +int tMPI_Thread_cond_broadcast(tMPI_Thread_cond_t *cond)
 +{
 +    int ret;
 +
 +    /* check whether the condition is initialized */
 +    if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
 +    {
 +        ret = tMPI_Thread_cond_init_once(cond);
 +        if (ret != 0)
 +        {
 +            return ret;
 +        }
 +    }
 +
 +    ret = pthread_cond_broadcast( &(cond->condp->cond) );
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +void tMPI_Thread_exit(void *value_ptr)
 +{
 +    pthread_exit(value_ptr);
 +}
 +
 +
 +int tMPI_Thread_cancel(tMPI_Thread_t thread)
 +{
++    #ifdef __native_client__
++    return ENOSYS;
++    #endif
 +    return pthread_cancel(thread->th);
 +}
 +
 +
 +
 +
 +int tMPI_Thread_barrier_init(tMPI_Thread_barrier_t *barrier, int n)
 +{
 +    int ret;
 +    /*tMPI_Thread_pthread_barrier_t *p;*/
 +
 +    if (barrier == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    barrier->barrierp = (struct tMPI_Thread_barrier*)
 +        malloc(sizeof(struct tMPI_Thread_barrier)*1);
 +    if (barrier->barrierp == NULL)
 +    {
 +        return ENOMEM;
 +    }
 +
 +    ret = pthread_mutex_init(&(barrier->barrierp->mutex), NULL);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    ret = pthread_cond_init(&(barrier->barrierp->cv), NULL);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    barrier->threshold = n;
 +    barrier->count     = n;
 +    barrier->cycle     = 0;
 +
 +    tMPI_Atomic_set(&(barrier->initialized), 1);
 +    return 0;
 +}
 +
 +static int tMPI_Thread_barrier_init_once(tMPI_Thread_barrier_t *barrier)
 +{
 +    int ret = 0;
 +
 +    /* we're relying on the memory barrier semantics of mutex_lock/unlock
 +       for the check preceding this function call to have worked */
 +    ret = pthread_mutex_lock( &(barrier_init) );
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    if (barrier->barrierp == NULL)
 +    {
 +        barrier->barrierp = (struct tMPI_Thread_barrier*)
 +            malloc(sizeof(struct tMPI_Thread_barrier)*1);
 +        if (barrier->barrierp == NULL)
 +        {
 +            ret = ENOMEM;
 +            goto err;
 +        }
 +
 +        ret = pthread_mutex_init(&(barrier->barrierp->mutex), NULL);
 +
 +        if (ret != 0)
 +        {
 +            goto err;
 +        }
 +
 +        ret = pthread_cond_init(&(barrier->barrierp->cv), NULL);
 +
 +        if (ret != 0)
 +        {
 +            goto err;
 +        }
 +    }
 +    ret = pthread_mutex_unlock( &(barrier_init) );
 +    return ret;
 +err:
 +    pthread_mutex_unlock( &(barrier_init) );
 +    return ret;
 +}
 +
 +
 +
 +
 +int tMPI_Thread_barrier_destroy(tMPI_Thread_barrier_t *barrier)
 +{
 +    int ret;
 +
 +    if (barrier == NULL)
 +    {
 +        return EINVAL;
 +    }
 +
 +    ret = pthread_mutex_destroy(&(barrier->barrierp->mutex));
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +    ret = pthread_cond_destroy(&(barrier->barrierp->cv));
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    free(barrier->barrierp);
 +
 +    return 0;
 +}
 +
 +
 +int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t * barrier)
 +{
 +    int cycle;
 +    int ret;
 +
 +    /* check whether the barrier is initialized */
 +    if (tMPI_Atomic_get( &(barrier->initialized)  ) == 0)
 +    {
 +        tMPI_Thread_barrier_init_once(barrier);
 +    }
 +
 +
 +    ret = pthread_mutex_lock(&barrier->barrierp->mutex);
 +    if (ret != 0)
 +    {
 +        return ret;
 +    }
 +
 +    cycle = barrier->cycle;
 +
 +    /* Decrement the count atomically and check if it is zero.
 +     * This will only be true for the last thread calling us.
 +     */
 +    if (--barrier->count <= 0)
 +    {
 +        barrier->cycle = !barrier->cycle;
 +        barrier->count = barrier->threshold;
 +        ret            = pthread_cond_broadcast(&barrier->barrierp->cv);
 +
 +        if (ret == 0)
 +        {
 +            goto err;
 +        }
 +    }
 +    else
 +    {
 +        while (cycle == barrier->cycle)
 +        {
 +            ret = pthread_cond_wait(&barrier->barrierp->cv,
 +                                    &barrier->barrierp->mutex);
 +            if (ret != 0)
 +            {
 +                goto err;
 +            }
 +        }
 +    }
 +
 +    ret = pthread_mutex_unlock(&barrier->barrierp->mutex);
 +    return ret;
 +err:
 +    pthread_mutex_unlock(&barrier->barrierp->mutex);
 +    return ret;
 +
 +}
 +
 +#else
 +
 +/* just to have some symbols */
 +int tMPI_Thread_pthreads = 0;
 +
 +#endif /* THREAD_PTHREADS */
diff --cc src/gromacs/legacyheaders/main.h
index 3089e00237,0000000000..81011e23f9
mode 100644,000000..100644
--- a/src/gromacs/legacyheaders/main.h
+++ b/src/gromacs/legacyheaders/main.h
@@@ -1,89 -1,0 +1,89 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +
 +#ifndef _main_h
 +#define _main_h
 +
 +
 +#include <stdio.h>
 +#include "network.h"
 +#include "../fileio/filenm.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +
- char *gmx_gethostname(char *name, size_t len);
++int gmx_gethostname(char *name, size_t len);
 +/* Sets the hostname to the value given by gethostname, if available,
 + * and to "unknown" otherwise. name should have at least size len.
-  * Returns name.
++ * Returns 0 on success, -1 on error.
 + */
 +
 +void gmx_log_open(const char *fn, const t_commrec *cr,
 +                  gmx_bool bMasterOnly, gmx_bool bAppendFiles, FILE**);
 +/* Open the log file, if necessary (nprocs > 1) the logfile name is
 + * communicated around the ring.
 + */
 +
 +void gmx_log_close(FILE *fp);
 +/* Close the log file */
 +
 +void check_multi_int(FILE *log, const gmx_multisim_t *ms,
 +                     int val, const char *name,
 +                     gmx_bool bQuiet);
 +void check_multi_int64(FILE *log, const gmx_multisim_t *ms,
 +                       gmx_int64_t val, const char *name,
 +                       gmx_bool bQuiet);
 +/* Check if val is the same on all processors for a mdrun -multi run
 + * The string name is used to print to the log file and in a fatal error
 + * if the val's don't match. If bQuiet is true and the check passes,
 + * no output is written.
 + */
 +
 +void init_multisystem(t_commrec *cr, int nsim, char **multidirs,
 +                      int nfile, const t_filenm fnm[], gmx_bool bParFn);
 +/* Splits the communication into nsim separate simulations
 + * and creates a communication structure between the master
 + * these simulations.
 + * If bParFn is set, the nodeid is appended to the tpx and each output file.
 + */
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +
 +#endif  /* _main_h */
diff --cc src/programs/mdrun/runner.c
index a11bd54a4e,0000000000..ed32d5f099
mode 100644,000000..100644
--- a/src/programs/mdrun/runner.c
+++ b/src/programs/mdrun/runner.c
@@@ -1,1817 -1,0 +1,1820 @@@
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2011,2012,2013, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +#include <string.h>
 +#include <assert.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "copyrite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "md_logging.h"
 +#include "md_support.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "gmx_detect_hardware.h"
 +#include "gmx_omp_nthreads.h"
 +#include "pull_rotation.h"
 +#include "calc_verletbuf.h"
 +#include "gmx_fatal_collective.h"
 +#include "membed.h"
 +#include "macros.h"
 +#include "gmx_thread_affinity.h"
 +
 +#include "gromacs/fileio/tpxio.h"
 +#include "gromacs/mdlib/nbnxn_search.h"
 +#include "gromacs/mdlib/nbnxn_consts.h"
 +#include "gromacs/timing/wallcycle.h"
 +#include "gromacs/utility/gmxmpi.h"
 +#include "gromacs/utility/gmxomp.h"
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#include "gpu_utils.h"
 +#include "nbnxn_cuda_data_mgmt.h"
 +
 +typedef struct {
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +const gmx_intp_t    integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}};
 +
 +gmx_int64_t         deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    gmx_hw_opt_t    hw_opt;
 +    FILE           *fplog;
 +    t_commrec      *cr;
 +    int             nfile;
 +    const t_filenm *fnm;
 +    output_env_t    oenv;
 +    gmx_bool        bVerbose;
 +    gmx_bool        bCompact;
 +    int             nstglobalcomm;
 +    ivec            ddxyz;
 +    int             dd_node_order;
 +    real            rdd;
 +    real            rconstr;
 +    const char     *dddlb_opt;
 +    real            dlb_scale;
 +    const char     *ddcsx;
 +    const char     *ddcsy;
 +    const char     *ddcsz;
 +    const char     *nbpu_opt;
 +    int             nstlist_cmdline;
 +    gmx_int64_t     nsteps_cmdline;
 +    int             nstepout;
 +    int             resetstep;
 +    int             nmultisim;
 +    int             repl_ex_nst;
 +    int             repl_ex_nex;
 +    int             repl_ex_seed;
 +    real            pforce;
 +    real            cpt_period;
 +    real            max_hours;
 +    const char     *deviceOptions;
 +    unsigned long   Flags;
 +    int             ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner()
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist  mc  = *mda; /* copy the arg list to make sure
 +                                            that it's thread-local. This doesn't
 +                                            copy pointed-to items, of course,
 +                                            but those are all const. */
 +    t_commrec *cr;                       /* we need a local version of this */
 +    FILE      *fplog = NULL;
 +    t_filenm  *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = reinitialize_commrec_for_this_thread(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog = mc.fplog;
 +    }
 +
 +    mda->ret = mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
 +                        mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
 +                        mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                        mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
 +                        mc.ddcsx, mc.ddcsy, mc.ddcsz,
 +                        mc.nbpu_opt, mc.nstlist_cmdline,
 +                        mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
 +                        mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
 +                        mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread.
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
 +                                         FILE *fplog, t_commrec *cr, int nfile,
 +                                         const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +                                         gmx_bool bCompact, int nstglobalcomm,
 +                                         ivec ddxyz, int dd_node_order, real rdd, real rconstr,
 +                                         const char *dddlb_opt, real dlb_scale,
 +                                         const char *ddcsx, const char *ddcsy, const char *ddcsz,
 +                                         const char *nbpu_opt, int nstlist_cmdline,
 +                                         gmx_int64_t nsteps_cmdline,
 +                                         int nstepout, int resetstep,
 +                                         int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                                         real pforce, real cpt_period, real max_hours,
 +                                         const char *deviceOptions, unsigned long Flags)
 +{
 +    int                      ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec               *crn; /* the new commrec */
 +    t_filenm                *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (hw_opt->nthreads_tmpi < 2)
 +    {
 +        return cr;
 +    }
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda, 1);
 +    fnmn = dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    /* hw_opt contains pointers, which should all be NULL at this stage */
 +    mda->hw_opt          = *hw_opt;
 +    mda->fplog           = fplog;
 +    mda->cr              = cr;
 +    mda->nfile           = nfile;
 +    mda->fnm             = fnmn;
 +    mda->oenv            = oenv;
 +    mda->bVerbose        = bVerbose;
 +    mda->bCompact        = bCompact;
 +    mda->nstglobalcomm   = nstglobalcomm;
 +    mda->ddxyz[XX]       = ddxyz[XX];
 +    mda->ddxyz[YY]       = ddxyz[YY];
 +    mda->ddxyz[ZZ]       = ddxyz[ZZ];
 +    mda->dd_node_order   = dd_node_order;
 +    mda->rdd             = rdd;
 +    mda->rconstr         = rconstr;
 +    mda->dddlb_opt       = dddlb_opt;
 +    mda->dlb_scale       = dlb_scale;
 +    mda->ddcsx           = ddcsx;
 +    mda->ddcsy           = ddcsy;
 +    mda->ddcsz           = ddcsz;
 +    mda->nbpu_opt        = nbpu_opt;
 +    mda->nstlist_cmdline = nstlist_cmdline;
 +    mda->nsteps_cmdline  = nsteps_cmdline;
 +    mda->nstepout        = nstepout;
 +    mda->resetstep       = resetstep;
 +    mda->nmultisim       = nmultisim;
 +    mda->repl_ex_nst     = repl_ex_nst;
 +    mda->repl_ex_nex     = repl_ex_nex;
 +    mda->repl_ex_seed    = repl_ex_seed;
 +    mda->pforce          = pforce;
 +    mda->cpt_period      = cpt_period;
 +    mda->max_hours       = max_hours;
 +    mda->deviceOptions   = deviceOptions;
 +    mda->Flags           = Flags;
 +
 +    /* now spawn new threads that start mdrunner_start_fn(), while
 +       the main thread returns, we set thread affinity later */
 +    ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
 +                       mdrunner_start_fn, (void*)(mda) );
 +    if (ret != TMPI_SUCCESS)
 +    {
 +        return NULL;
 +    }
 +
 +    crn = reinitialize_commrec_for_this_thread(cr);
 +    return crn;
 +}
 +
 +
 +static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
 +                                        const gmx_hw_opt_t  *hw_opt,
 +                                        int                  nthreads_tot,
 +                                        int                  ngpu)
 +{
 +    int nthreads_tmpi;
 +
 +    /* There are no separate PME nodes here, as we ensured in
 +     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
 +     * and a conditional ensures we would not have ended up here.
 +     * Note that separate PME nodes might be switched on later.
 +     */
 +    if (ngpu > 0)
 +    {
 +        nthreads_tmpi = ngpu;
 +        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
 +        {
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +    else if (hw_opt->nthreads_omp > 0)
 +    {
 +        /* Here we could oversubscribe, when we do, we issue a warning later */
 +        nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp);
 +    }
 +    else
 +    {
 +        /* TODO choose nthreads_omp based on hardware topology
 +           when we have a hardware topology detection library */
 +        /* In general, when running up to 4 threads, OpenMP should be faster.
 +         * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
 +         * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
 +         * even on two CPUs it's usually faster (but with many OpenMP threads
 +         * it could be faster not to use HT, currently we always use HT).
 +         * On Nehalem/Westmere we want to avoid running 16 threads over
 +         * two CPUs with HT, so we need a limit<16; thus we use 12.
 +         * A reasonable limit for Intel Sandy and Ivy bridge,
 +         * not knowing the topology, is 16 threads.
 +         */
 +        const int nthreads_omp_always_faster             =  4;
 +        const int nthreads_omp_always_faster_Nehalem     = 12;
 +        const int nthreads_omp_always_faster_SandyBridge = 16;
 +        const int first_model_Nehalem                    = 0x1A;
 +        const int first_model_SandyBridge                = 0x2A;
 +        gmx_bool  bIntel_Family6;
 +
 +        bIntel_Family6 =
 +            (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
 +             gmx_cpuid_family(hwinfo->cpuid_info) == 6);
 +
 +        if (nthreads_tot <= nthreads_omp_always_faster ||
 +            (bIntel_Family6 &&
 +             ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
 +              (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
 +        {
 +            /* Use pure OpenMP parallelization */
 +            nthreads_tmpi = 1;
 +        }
 +        else
 +        {
 +            /* Don't use OpenMP parallelization */
 +            nthreads_tmpi = nthreads_tot;
 +        }
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + * At the point we have already called check_and_update_hw_opt.
 + * Thus all options should be internally consistent and consistent
 + * with the hardware, except that ntmpi could be larger than #GPU.
 + */
 +static int get_nthreads_mpi(const gmx_hw_info_t *hwinfo,
 +                            gmx_hw_opt_t *hw_opt,
 +                            t_inputrec *inputrec, gmx_mtop_t *mtop,
 +                            const t_commrec *cr,
 +                            FILE *fplog)
 +{
 +    int      nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu;
 +    int      min_atoms_per_mpi_thread;
 +    char    *env;
 +    char     sbuf[STRLEN];
 +    gmx_bool bCanUseGPU;
 +
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        /* Trivial, return right away */
 +        return hw_opt->nthreads_tmpi;
 +    }
 +
 +    nthreads_hw = hwinfo->nthreads_hw_avail;
 +
 +    /* How many total (#tMPI*#OpenMP) threads can we start? */
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        nthreads_tot_max = hw_opt->nthreads_tot;
 +    }
 +    else
 +    {
 +        nthreads_tot_max = nthreads_hw;
 +    }
 +
 +    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET &&
 +                  hwinfo->gpu_info.ncuda_dev_compatible > 0);
 +    if (bCanUseGPU)
 +    {
 +        ngpu = hwinfo->gpu_info.ncuda_dev_compatible;
 +    }
 +    else
 +    {
 +        ngpu = 0;
 +    }
 +
 +    if (inputrec->cutoff_scheme == ecutsGROUP)
 +    {
 +        /* We checked this before, but it doesn't hurt to do it once more */
 +        assert(hw_opt->nthreads_omp == 1);
 +    }
 +
 +    nthreads_tmpi =
 +        get_tmpi_omp_thread_division(hwinfo, hw_opt, nthreads_tot_max, ngpu);
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Dims/steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_mpi_thread = 0;
 +    }
 +    else
 +    {
 +        if (bCanUseGPU)
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
 +        }
 +        else
 +        {
 +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
 +        }
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads_tmpi != 1 &&
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        nthreads_tmpi = 1;
 +
 +        md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
 +        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
 +        {
 +            gmx_fatal(FARGS, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
 +        }
 +    }
 +    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread);
 +
 +        /* Avoid partial use of Hyper-Threading */
 +        if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
 +            nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
 +        {
 +            nthreads_new = nthreads_hw/2;
 +        }
 +
 +        /* Avoid large prime numbers in the thread count */
 +        if (nthreads_new >= 6)
 +        {
 +            /* Use only 6,8,10 with additional factors of 2 */
 +            int fac;
 +
 +            fac = 2;
 +            while (3*fac*2 <= nthreads_new)
 +            {
 +                fac *= 2;
 +            }
 +
 +            nthreads_new = (nthreads_new/fac)*fac;
 +        }
 +        else
 +        {
 +            /* Avoid 5 */
 +            if (nthreads_new == 5)
 +            {
 +                nthreads_new = 4;
 +            }
 +        }
 +
 +        nthreads_tmpi = nthreads_new;
 +
 +        fprintf(stderr, "\n");
 +        fprintf(stderr, "NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr, "      only starting %d thread-MPI threads.\n", nthreads_tmpi);
 +        fprintf(stderr, "      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
 +    }
 +
 +    return nthreads_tmpi;
 +}
 +#endif /* GMX_THREAD_MPI */
 +
 +
 +/* We determine the extra cost of the non-bonded kernels compared to
 + * a reference nstlist value of 10 (which is the default in grompp).
 + */
 +static const int    nbnxn_reference_nstlist = 10;
 +/* The values to try when switching  */
 +const int           nstlist_try[] = { 20, 25, 40 };
 +#define NNSTL  sizeof(nstlist_try)/sizeof(nstlist_try[0])
 +/* Increase nstlist until the non-bonded cost increases more than listfac_ok,
 + * but never more than listfac_max.
 + * A standard (protein+)water system at 300K with PME ewald_rtol=1e-5
 + * needs 1.28 at rcoulomb=0.9 and 1.24 at rcoulomb=1.0 to get to nstlist=40.
 + * Note that both CPU and GPU factors are conservative. Performance should
 + * not go down due to this tuning, except with a relatively slow GPU.
 + * On the other hand, at medium/high parallelization or with fast GPUs
 + * nstlist will not be increased enough to reach optimal performance.
 + */
 +/* CPU: pair-search is about a factor 1.5 slower than the non-bonded kernel */
 +static const float  nbnxn_cpu_listfac_ok    = 1.05;
 +static const float  nbnxn_cpu_listfac_max   = 1.09;
 +/* GPU: pair-search is a factor 1.5-3 slower than the non-bonded kernel */
 +static const float  nbnxn_gpu_listfac_ok    = 1.20;
 +static const float  nbnxn_gpu_listfac_max   = 1.30;
 +
 +/* Try to increase nstlist when using the Verlet cut-off scheme */
 +static void increase_nstlist(FILE *fp, t_commrec *cr,
 +                             t_inputrec *ir, int nstlist_cmdline,
 +                             const gmx_mtop_t *mtop, matrix box,
 +                             gmx_bool bGPU)
 +{
 +    float                  listfac_ok, listfac_max;
 +    int                    nstlist_orig, nstlist_prev;
 +    verletbuf_list_setup_t ls;
 +    real                   rlist_nstlist10, rlist_inc, rlist_ok, rlist_max;
 +    real                   rlist_new, rlist_prev;
 +    int                    nstlist_ind = 0;
 +    t_state                state_tmp;
 +    gmx_bool               bBox, bDD, bCont;
 +    const char            *nstl_gpu = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
 +    const char            *vbd_err  = "Can not increase nstlist because verlet-buffer-tolerance is not set or used";
 +    const char            *box_err  = "Can not increase nstlist because the box is too small";
 +    const char            *dd_err   = "Can not increase nstlist because of domain decomposition limitations";
 +    char                   buf[STRLEN];
 +
 +    if (nstlist_cmdline <= 0)
 +    {
 +        if (fp != NULL && bGPU && ir->nstlist < nstlist_try[0])
 +        {
 +            fprintf(fp, nstl_gpu, ir->nstlist);
 +        }
 +        nstlist_ind = 0;
 +        while (nstlist_ind < NNSTL && ir->nstlist >= nstlist_try[nstlist_ind])
 +        {
 +            nstlist_ind++;
 +        }
 +        if (nstlist_ind == NNSTL)
 +        {
 +            /* There are no larger nstlist value to try */
 +            return;
 +        }
 +    }
 +
 +    if (ir->verletbuf_tol == 0 && bGPU)
 +    {
 +        gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
 +    }
 +
 +    if (ir->verletbuf_tol < 0)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "%s\n", vbd_err);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "%s\n", vbd_err);
 +        }
 +
 +        return;
 +    }
 +
 +    if (bGPU)
 +    {
 +        listfac_ok  = nbnxn_gpu_listfac_ok;
 +        listfac_max = nbnxn_gpu_listfac_max;
 +    }
 +    else
 +    {
 +        listfac_ok  = nbnxn_cpu_listfac_ok;
 +        listfac_max = nbnxn_cpu_listfac_max;
 +    }
 +
 +    nstlist_orig = ir->nstlist;
 +    if (nstlist_cmdline > 0)
 +    {
 +        if (fp)
 +        {
 +            sprintf(buf, "Getting nstlist=%d from command line option",
 +                    nstlist_cmdline);
 +        }
 +        ir->nstlist = nstlist_cmdline;
 +    }
 +
 +    verletbuf_get_list_setup(bGPU, &ls);
 +
 +    /* Allow rlist to make the list a given factor larger than the list
 +     * would be with nstlist=10.
 +     */
 +    nstlist_prev = ir->nstlist;
 +    ir->nstlist  = 10;
 +    calc_verlet_buffer_size(mtop, det(box), ir, &ls, NULL, &rlist_nstlist10);
 +    ir->nstlist  = nstlist_prev;
 +
 +    /* Determine the pair list size increase due to zero interactions */
 +    rlist_inc = nbnxn_get_rlist_effective_inc(ls.cluster_size_j,
 +                                              mtop->natoms/det(box));
 +    rlist_ok  = (rlist_nstlist10 + rlist_inc)*pow(listfac_ok, 1.0/3.0) - rlist_inc;
 +    rlist_max = (rlist_nstlist10 + rlist_inc)*pow(listfac_max, 1.0/3.0) - rlist_inc;
 +    if (debug)
 +    {
 +        fprintf(debug, "nstlist tuning: rlist_inc %.3f rlist_ok %.3f rlist_max %.3f\n",
 +                rlist_inc, rlist_ok, rlist_max);
 +    }
 +
 +    nstlist_prev = nstlist_orig;
 +    rlist_prev   = ir->rlist;
 +    do
 +    {
 +        if (nstlist_cmdline <= 0)
 +        {
 +            ir->nstlist = nstlist_try[nstlist_ind];
 +        }
 +
 +        /* Set the pair-list buffer size in ir */
 +        calc_verlet_buffer_size(mtop, det(box), ir, &ls, NULL, &rlist_new);
 +
 +        /* Does rlist fit in the box? */
 +        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box));
 +        bDD  = TRUE;
 +        if (bBox && DOMAINDECOMP(cr))
 +        {
 +            /* Check if rlist fits in the domain decomposition */
 +            if (inputrec2nboundeddim(ir) < DIM)
 +            {
 +                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
 +            }
 +            copy_mat(box, state_tmp.box);
 +            bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new);
 +        }
 +
 +        if (debug)
 +        {
 +            fprintf(debug, "nstlist %d rlist %.3f bBox %d bDD %d\n",
 +                    ir->nstlist, rlist_new, bBox, bDD);
 +        }
 +
 +        bCont = FALSE;
 +
 +        if (nstlist_cmdline <= 0)
 +        {
 +            if (bBox && bDD && rlist_new <= rlist_max)
 +            {
 +                /* Increase nstlist */
 +                nstlist_prev = ir->nstlist;
 +                rlist_prev   = rlist_new;
 +                bCont        = (nstlist_ind+1 < NNSTL && rlist_new < rlist_ok);
 +            }
 +            else
 +            {
 +                /* Stick with the previous nstlist */
 +                ir->nstlist = nstlist_prev;
 +                rlist_new   = rlist_prev;
 +                bBox        = TRUE;
 +                bDD         = TRUE;
 +            }
 +        }
 +
 +        nstlist_ind++;
 +    }
 +    while (bCont);
 +
 +    if (!bBox || !bDD)
 +    {
 +        gmx_warning(!bBox ? box_err : dd_err);
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "\n%s\n", bBox ? box_err : dd_err);
 +        }
 +        ir->nstlist = nstlist_orig;
 +    }
 +    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
 +    {
 +        sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g",
 +                nstlist_orig, ir->nstlist,
 +                ir->rlist, rlist_new);
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "%s\n\n", buf);
 +        }
 +        if (fp != NULL)
 +        {
 +            fprintf(fp, "%s\n\n", buf);
 +        }
 +        ir->rlist     = rlist_new;
 +        ir->rlistlong = rlist_new;
 +    }
 +}
 +
 +static void prepare_verlet_scheme(FILE                           *fplog,
 +                                  t_commrec                      *cr,
 +                                  t_inputrec                     *ir,
 +                                  int                             nstlist_cmdline,
 +                                  const gmx_mtop_t               *mtop,
 +                                  matrix                          box,
 +                                  gmx_bool                        bUseGPU)
 +{
 +    if (ir->verletbuf_tol > 0)
 +    {
 +        /* Update the Verlet buffer size for the current run setup */
 +        verletbuf_list_setup_t ls;
 +        real                   rlist_new;
 +
 +        /* Here we assume CPU acceleration is on. But as currently
 +         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
 +         * and 4x2 gives a larger buffer than 4x4, this is ok.
 +         */
 +        verletbuf_get_list_setup(bUseGPU, &ls);
 +
 +        calc_verlet_buffer_size(mtop, det(box), ir, &ls, NULL, &rlist_new);
 +
 +        if (rlist_new != ir->rlist)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
 +                        ir->rlist, rlist_new,
 +                        ls.cluster_size_i, ls.cluster_size_j);
 +            }
 +            ir->rlist     = rlist_new;
 +            ir->rlistlong = rlist_new;
 +        }
 +    }
 +
 +    if (nstlist_cmdline > 0 && (!EI_DYNAMICS(ir->eI) || ir->verletbuf_tol <= 0))
 +    {
 +        gmx_fatal(FARGS, "Can not set nstlist without %s",
 +                  !EI_DYNAMICS(ir->eI) ? "dynamics" : "verlet-buffer-tolerance");
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        /* Set or try nstlist values */
 +        increase_nstlist(fplog, cr, ir, nstlist_cmdline, mtop, box, bUseGPU);
 +    }
 +}
 +
 +static void convert_to_verlet_scheme(FILE *fplog,
 +                                     t_inputrec *ir,
 +                                     gmx_mtop_t *mtop, real box_vol)
 +{
 +    char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme";
 +
 +    md_print_warn(NULL, fplog, "%s\n", conv_mesg);
 +
 +    ir->cutoff_scheme = ecutsVERLET;
 +    ir->verletbuf_tol = 0.005;
 +
 +    if (ir->rcoulomb != ir->rvdw)
 +    {
 +        gmx_fatal(FARGS, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
 +    }
 +
 +    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
 +    {
 +        gmx_fatal(FARGS, "User non-bonded potentials are not (yet) supported with the Verlet scheme");
 +    }
 +    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
 +    {
 +        md_print_warn(NULL, fplog, "Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
 +
 +        if (EVDW_SWITCHED(ir->vdwtype))
 +        {
 +            ir->vdwtype = evdwCUT;
 +        }
 +        if (EEL_SWITCHED(ir->coulombtype))
 +        {
 +            if (EEL_FULL(ir->coulombtype))
 +            {
 +                /* With full electrostatic only PME can be switched */
 +                ir->coulombtype = eelPME;
 +            }
 +            else
 +            {
 +                md_print_warn(NULL, fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]);
 +                ir->coulombtype = eelRF;
 +                ir->epsilon_rf  = 0.0;
 +            }
 +        }
 +
 +        /* We set the pair energy error tolerance to a small number.
 +         * Note that this is only for testing. For production the user
 +         * should think about this and set the mdp options.
 +         */
 +        ir->verletbuf_tol = 1e-4;
 +    }
 +
 +    if (inputrec2nboundeddim(ir) != 3)
 +    {
 +        gmx_fatal(FARGS, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
 +    }
 +
 +    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
 +    {
 +        gmx_fatal(FARGS, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
 +    {
 +        verletbuf_list_setup_t ls;
 +
 +        verletbuf_get_list_setup(FALSE, &ls);
 +        calc_verlet_buffer_size(mtop, box_vol, ir, &ls, NULL, &ir->rlist);
 +    }
 +    else
 +    {
 +        ir->verletbuf_tol   = -1;
 +        ir->rlist           = 1.05*max(ir->rvdw, ir->rcoulomb);
 +    }
 +
 +    gmx_mtop_remove_chargegroups(mtop);
 +}
 +
 +static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
 +{
 +    fprintf(fp, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
 +            hw_opt->nthreads_tot,
 +            hw_opt->nthreads_tmpi,
 +            hw_opt->nthreads_omp,
 +            hw_opt->nthreads_omp_pme,
 +            hw_opt->gpu_opt.gpu_id != NULL ? hw_opt->gpu_opt.gpu_id : "");
 +}
 +
 +/* Checks we can do when we don't (yet) know the cut-off scheme */
 +static void check_and_update_hw_opt_1(gmx_hw_opt_t *hw_opt,
 +                                      gmx_bool      bIsSimMaster)
 +{
 +    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
 +
 +#ifndef GMX_THREAD_MPI
 +    if (hw_opt->nthreads_tot > 0)
 +    {
 +        gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +    if (hw_opt->nthreads_tmpi > 0)
 +    {
 +        gmx_fatal(FARGS, "Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
 +    }
 +#endif
 +
 +#ifndef GMX_OPENMP
 +    if (hw_opt->nthreads_omp > 1)
 +    {
 +        gmx_fatal(FARGS, "More than 1 OpenMP thread requested, but Gromacs was compiled without OpenMP support");
 +    }
 +    hw_opt->nthreads_omp = 1;
 +#endif
 +
 +    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
 +    {
 +        /* We have the same number of OpenMP threads for PP and PME processes,
 +         * thus we can perform several consistency checks.
 +         */
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_tmpi);
 +        }
 +
 +        if (hw_opt->nthreads_omp > 0 &&
 +            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
 +        {
 +            gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
 +                      hw_opt->nthreads_tot, hw_opt->nthreads_omp);
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 0 &&
 +            hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +    }
 +
 +#ifndef GMX_OPENMP
 +    if (hw_opt->nthreads_omp > 1)
 +    {
 +        gmx_fatal(FARGS, "OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
 +    }
 +#endif
 +
 +    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
 +    {
 +        gmx_fatal(FARGS, "You need to specify -ntomp in addition to -ntomp_pme");
 +    }
 +
 +    if (hw_opt->nthreads_tot == 1)
 +    {
 +        hw_opt->nthreads_tmpi = 1;
 +
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads",
 +                      hw_opt->nthreads_tmpi, hw_opt->nthreads_tot);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
 +    {
 +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
 +    }
 +
 +    /* Parse GPU IDs, if provided.
 +     * We check consistency with the tMPI thread count later.
 +     */
 +    gmx_parse_gpu_ids(&hw_opt->gpu_opt);
 +
 +#ifdef GMX_THREAD_MPI
 +    if (hw_opt->gpu_opt.ncuda_dev_use > 0 && hw_opt->nthreads_tmpi == 0)
 +    {
 +        /* Set the number of MPI threads equal to the number of GPUs */
 +        hw_opt->nthreads_tmpi = hw_opt->gpu_opt.ncuda_dev_use;
 +
 +        if (hw_opt->nthreads_tot > 0 &&
 +            hw_opt->nthreads_tmpi > hw_opt->nthreads_tot)
 +        {
 +            /* We have more GPUs than total threads requested.
 +             * We choose to (later) generate a mismatch error,
 +             * instead of launching more threads than requested.
 +             */
 +            hw_opt->nthreads_tmpi = hw_opt->nthreads_tot;
 +        }
 +    }
 +#endif
 +
 +    if (debug)
 +    {
 +        print_hw_opt(debug, hw_opt);
 +    }
 +}
 +
 +/* Checks we can do when we know the cut-off scheme */
 +static void check_and_update_hw_opt_2(gmx_hw_opt_t *hw_opt,
 +                                      int           cutoff_scheme)
 +{
 +    if (cutoff_scheme == ecutsGROUP)
 +    {
 +        /* We only have OpenMP support for PME only nodes */
 +        if (hw_opt->nthreads_omp > 1)
 +        {
 +            gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
 +                      ecutscheme_names[cutoff_scheme],
 +                      ecutscheme_names[ecutsVERLET]);
 +        }
 +        hw_opt->nthreads_omp = 1;
 +    }
 +
 +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
 +    {
 +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
 +    }
 +
 +    if (debug)
 +    {
 +        print_hw_opt(debug, hw_opt);
 +    }
 +}
 +
 +
 +/* Override the value in inputrec with value passed on the command line (if any) */
 +static void override_nsteps_cmdline(FILE            *fplog,
 +                                    gmx_int64_t      nsteps_cmdline,
 +                                    t_inputrec      *ir,
 +                                    const t_commrec *cr)
 +{
 +    char sbuf[STEPSTRSIZE];
 +
 +    assert(ir);
 +    assert(cr);
 +
 +    /* override with anything else than the default -2 */
 +    if (nsteps_cmdline > -2)
 +    {
 +        char stmp[STRLEN];
 +
 +        ir->nsteps = nsteps_cmdline;
 +        if (EI_DYNAMICS(ir->eI))
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3f ps",
 +                    gmx_step_str(nsteps_cmdline, sbuf),
 +                    nsteps_cmdline*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps",
 +                    gmx_step_str(nsteps_cmdline, sbuf));
 +        }
 +
 +        md_print_warn(cr, fplog, "%s\n", stmp);
 +    }
 +}
 +
 +/* Frees GPU memory and destroys the CUDA context.
 + *
 + * Note that this function needs to be called even if GPUs are not used
 + * in this run because the PME ranks have no knowledge of whether GPUs
 + * are used or not, but all ranks need to enter the barrier below.
 + */
 +static void free_gpu_resources(const t_forcerec *fr,
 +                               const t_commrec  *cr)
 +{
 +    gmx_bool bIsPPrankUsingGPU;
 +    char     gpu_err_str[STRLEN];
 +
 +    bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU;
 +
 +    if (bIsPPrankUsingGPU)
 +    {
 +        /* free nbnxn data in GPU memory */
 +        nbnxn_cuda_free(fr->nbv->cu_nbv);
 +
 +        /* With tMPI we need to wait for all ranks to finish deallocation before
 +         * destroying the context in free_gpu() as some ranks may be sharing
 +         * GPU and context.
 +         * Note: as only PP ranks need to free GPU resources, so it is safe to
 +         * not call the barrier on PME ranks.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        if (PAR(cr))
 +        {
 +            gmx_barrier(cr);
 +        }
 +#endif  /* GMX_THREAD_MPI */
 +
 +        /* uninitialize GPU (by destroying the context) */
 +        if (!free_gpu(gpu_err_str))
 +        {
 +            gmx_warning("On node %d failed to free GPU #%d: %s",
 +                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
 +        }
 +    }
 +}
 +
 +int mdrunner(gmx_hw_opt_t *hw_opt,
 +             FILE *fplog, t_commrec *cr, int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz, int dd_node_order, real rdd, real rconstr,
 +             const char *dddlb_opt, real dlb_scale,
 +             const char *ddcsx, const char *ddcsy, const char *ddcsz,
 +             const char *nbpu_opt, int nstlist_cmdline,
 +             gmx_int64_t nsteps_cmdline, int nstepout, int resetstep,
 +             int gmx_unused nmultisim, int repl_ex_nst, int repl_ex_nex,
 +             int repl_ex_seed, real pforce, real cpt_period, real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    gmx_bool                  bForceUseGPU, bTryUseGPU;
 +    double                    nodetime = 0, realtime;
 +    t_inputrec               *inputrec;
 +    t_state                  *state = NULL;
 +    matrix                    box;
 +    gmx_ddbox_t               ddbox = {0};
 +    int                       npme_major, npme_minor;
 +    real                      tmpr1, tmpr2;
 +    t_nrnb                   *nrnb;
 +    gmx_mtop_t               *mtop          = NULL;
 +    t_mdatoms                *mdatoms       = NULL;
 +    t_forcerec               *fr            = NULL;
 +    t_fcdata                 *fcd           = NULL;
 +    real                      ewaldcoeff_q  = 0;
 +    real                      ewaldcoeff_lj = 0;
 +    gmx_pme_t                *pmedata       = NULL;
 +    gmx_vsite_t              *vsite         = NULL;
 +    gmx_constr_t              constr;
 +    int                       i, m, nChargePerturbed = -1, nTypePerturbed = 0, status, nalloc;
 +    char                     *gro;
 +    gmx_wallcycle_t           wcycle;
 +    gmx_bool                  bReadRNG, bReadEkin;
 +    int                       list;
 +    gmx_walltime_accounting_t walltime_accounting = NULL;
 +    int                       rc;
 +    gmx_int64_t               reset_counters;
 +    gmx_edsam_t               ed           = NULL;
 +    t_commrec                *cr_old       = cr;
 +    int                       nthreads_pme = 1;
 +    int                       nthreads_pp  = 1;
 +    gmx_membed_t              membed       = NULL;
 +    gmx_hw_info_t            *hwinfo       = NULL;
 +    /* The master rank decides early on bUseGPU and broadcasts this later */
 +    gmx_bool                  bUseGPU      = FALSE;
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec, 1);
 +    snew(mtop, 1);
 +
 +    if (Flags & MD_APPENDFILES)
 +    {
 +        fplog = NULL;
 +    }
 +
 +    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
 +    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
 +
 +    /* Detect hardware, gather information. This is an operation that is
 +     * global for this process (MPI rank). */
 +    hwinfo = gmx_detect_hardware(fplog, cr, bTryUseGPU);
 +
 +
 +    snew(state, 1);
 +    if (SIMMASTER(cr))
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL, mtop);
 +
 +        if (inputrec->cutoff_scheme != ecutsVERLET &&
 +            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
 +        {
 +            convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box));
 +        }
 +
 +        if (inputrec->cutoff_scheme == ecutsVERLET)
 +        {
 +            /* Here the master rank decides if all ranks will use GPUs */
 +            bUseGPU = (hwinfo->gpu_info.ncuda_dev_compatible > 0 ||
 +                       getenv("GMX_EMULATE_GPU") != NULL);
 +
 +            prepare_verlet_scheme(fplog, cr,
 +                                  inputrec, nstlist_cmdline, mtop, state->box,
 +                                  bUseGPU);
 +        }
 +        else
 +        {
 +            if (nstlist_cmdline > 0)
 +            {
 +                gmx_fatal(FARGS, "Can not set nstlist with the group cut-off scheme");
 +            }
 +
 +            if (hwinfo->gpu_info.ncuda_dev_compatible > 0)
 +            {
 +                md_print_warn(cr, fplog,
 +                              "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
 +                              "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
 +                              "      (for quick performance testing you can use the -testverlet option)\n");
 +            }
 +
 +            if (bForceUseGPU)
 +            {
 +                gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet");
 +            }
 +
 +#ifdef GMX_TARGET_BGQ
 +            md_print_warn(cr, fplog,
 +                          "NOTE: There is no SIMD implementation of the group scheme kernels on\n"
 +                          "      BlueGene/Q. You will observe better performance from using the\n"
 +                          "      Verlet cut-off scheme.\n");
 +#endif
 +        }
 +    }
 +
 +    /* Check for externally set OpenMP affinity and turn off internal
 +     * pinning if any is found. We need to do this check early to tell
 +     * thread-MPI whether it should do pinning when spawning threads.
 +     * TODO: the above no longer holds, we should move these checks down
 +     */
 +    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
 +
 +    /* Check and update the hardware options for internal consistency */
 +    check_and_update_hw_opt_1(hw_opt, SIMMASTER(cr));
 +
 +    if (SIMMASTER(cr))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        /* Early check for externally set process affinity.
 +         * With thread-MPI this is needed as pinning might get turned off,
 +         * which needs to be known before starting thread-MPI.
 +         * With thread-MPI hw_opt is processed here on the master rank
 +         * and passed to the other ranks later, so we only do this on master.
 +         */
 +        gmx_check_thread_affinity_set(fplog,
 +                                      NULL,
 +                                      hw_opt, hwinfo->nthreads_hw_avail, FALSE);
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
 +        {
 +            gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
 +        }
 +#endif
 +
 +        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
 +            cr->npmenodes <= 0)
 +        {
 +            gmx_fatal(FARGS, "You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
 +        }
 +    }
 +
 +#ifdef GMX_THREAD_MPI
 +    if (SIMMASTER(cr))
 +    {
 +        /* Since the master knows the cut-off scheme, update hw_opt for this.
 +         * This is done later for normal MPI and also once more with tMPI
 +         * for all tMPI ranks.
 +         */
 +        check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
 +
 +        /* NOW the threads will be started: */
 +        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
 +                                                 hw_opt,
 +                                                 inputrec, mtop,
 +                                                 cr, fplog);
 +        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
 +        {
 +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
 +        }
 +
 +        if (hw_opt->nthreads_tmpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
 +                                        oenv, bVerbose, bCompact, nstglobalcomm,
 +                                        ddxyz, dd_node_order, rdd, rconstr,
 +                                        dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                        nbpu_opt, nstlist_cmdline,
 +                                        nsteps_cmdline, nstepout, resetstep, nmultisim,
 +                                        repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                        cpt_period, max_hours, deviceOptions,
 +                                        Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +    }
 +#endif
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed", nfile, fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr, "Initializing membed");
 +        }
 +        membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(cr, inputrec, mtop);
 +
 +        /* This check needs to happen after get_nthreads_mpi() */
 +        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal_collective(FARGS, cr, NULL,
 +                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
 +                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
 +        }
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE);
 +    }
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state, inputrec, cr->nnodes);
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but %s was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
 +#endif
 +#endif
 +                  , ShortProgram()
 +                  );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec, TRUE, cr, fplog) && PAR(cr))
 +    {
 +        /* Simple neighbour searching and (also?) all-vs-all loops
 +         * do not work with domain decomposition. */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!(EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype)) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype) && !EVDW_PME(inputrec->vdwtype))
 +            {
 +                gmx_fatal_collective(FARGS, cr, NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics or LJ-PME");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS, cr, NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
-     fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
++    if (MASTER(cr))
++    {
++        fcRegisterSteps(inputrec->nsteps, inputrec->init_step);
++    }
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd, 1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog, mtop, inputrec, cr, Flags & MD_PARTDEC, fcd, state, repl_ex_nst > 0);
 +
 +    if (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS, "Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog, mtop, state->x, inputrec, cr->ms, &(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box, box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box), box, cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box, deform_init_box_tpx);
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +    }
 +
 +    if (opt2bSet("-cpi", nfile, fnm))
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog,
 +                            cr, Flags & MD_PARTDEC, ddxyz,
 +                            inputrec, state, &bReadRNG, &bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT),
 +                     Flags, &fplog);
 +    }
 +
 +    /* override nsteps with value from cmdline */
 +    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
 +
 +    if (SIMMASTER(cr))
 +    {
 +        copy_mat(state->box, box);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(box), box, cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei", nfile, fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr);
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr,
 +                                           dddlb_opt, dlb_scale,
 +                                           ddcsx, ddcsy, ddcsz,
 +                                           mtop, inputrec,
 +                                           box, state->x,
 +                                           &ddbox, &npme_major, &npme_minor);
 +
 +        make_dd_communicators(fplog, cr, dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty      = (DUTY_PP | DUTY_PME);
 +        npme_major    = 1;
 +        npme_minor    = 1;
 +        /* NM and TPI perform single node energy calculations in parallel */
 +        if (!(inputrec->eI == eiNM || EI_TPI(inputrec->eI)))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog, cr);
 +    }
 +
 +    /* Initialize per-physical-node MPI process/thread ID and counters. */
 +    gmx_init_intranode_counters(cr);
 +
 +#ifdef GMX_MPI
 +    md_print_info(cr, fplog, "Using %d MPI %s\n",
 +                  cr->nnodes,
 +#ifdef GMX_THREAD_MPI
 +                  cr->nnodes == 1 ? "thread" : "threads"
 +#else
 +                  cr->nnodes == 1 ? "process" : "processes"
 +#endif
 +                  );
 +    fflush(stderr);
 +#endif
 +
 +    /* Check and update hw_opt for the cut-off scheme */
 +    check_and_update_hw_opt_2(hw_opt, inputrec->cutoff_scheme);
 +
 +    gmx_omp_nthreads_init(fplog, cr,
 +                          hwinfo->nthreads_hw_avail,
 +                          hw_opt->nthreads_omp,
 +                          hw_opt->nthreads_omp_pme,
 +                          (cr->duty & DUTY_PP) == 0,
 +                          inputrec->cutoff_scheme == ecutsVERLET);
 +
 +    if (PAR(cr))
 +    {
 +        /* The master rank decided on the use of GPUs,
 +         * broadcast this information to all ranks.
 +         */
 +        gmx_bcast_sim(sizeof(bUseGPU), &bUseGPU, cr);
 +    }
 +
 +    if (bUseGPU)
 +    {
 +        if (cr->npmenodes == -1)
 +        {
 +            /* Don't automatically use PME-only nodes with GPUs */
 +            cr->npmenodes = 0;
 +        }
 +
 +        /* Select GPU id's to use */
 +        gmx_select_gpu_ids(fplog, cr, &hwinfo->gpu_info, bForceUseGPU,
 +                           &hw_opt->gpu_opt);
 +    }
 +    else
 +    {
 +        /* Ignore (potentially) manually selected GPUs */
 +        hw_opt->gpu_opt.ncuda_dev_use = 0;
 +    }
 +
 +    /* check consistency of CPU acceleration and number of GPUs selected */
 +    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt, bUseGPU);
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* When we share GPUs over ranks, we need to know this for the DLB */
 +        dd_setup_dlb_resource_sharing(cr, hwinfo, hw_opt);
 +    }
 +
 +    /* getting number of PP/PME threads
 +       PME: env variable should be read only on one node to make sure it is
 +       identical everywhere;
 +     */
 +    /* TODO nthreads_pp is only used for pinning threads.
 +     * This is a temporary solution until we have a hw topology library.
 +     */
 +    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
 +    nthreads_pme = gmx_omp_nthreads_get(emntPME);
 +
 +    wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme);
 +
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +    snew(nrnb, 1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr, state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr, state, TRUE);
 +            }
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr          = mk_forcerec();
 +        fr->hwinfo  = hwinfo;
 +        fr->gpu_opt = &hw_opt->gpu_opt;
 +        init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box,
 +                      opt2fn("-table", nfile, fnm),
 +                      opt2fn("-tabletf", nfile, fnm),
 +                      opt2fn("-tablep", nfile, fnm),
 +                      opt2fn("-tableb", nfile, fnm),
 +                      nbpu_opt,
 +                      FALSE,
 +                      pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +           "nofile","nofile","nofile","nofile",FALSE,pforce);
 +         */
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if (fr->bQMMM)
 +        {
 +            init_QMMMrec(cr, mtop, inputrec, fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO);
 +
 +        if (mdatoms->nPerturbed > 0 && inputrec->cutoff_scheme == ecutsVERLET)
 +        {
 +            gmx_fatal(FARGS, "The Verlet cut-off scheme does not (yet) support free-energy calculations with perturbed atoms, only perturbed interactions. This will be implemented soon. Use the group scheme for now.");
 +        }
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop, cr, FALSE);
 +
 +        calc_shifts(box, fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(vsite, mtop, state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype))
 +        {
 +            ewaldcoeff_q  = fr->ewaldcoeff_q;
 +            ewaldcoeff_lj = fr->ewaldcoeff_lj;
 +            pmedata       = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff_q  = calc_ewaldcoeff_q(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        ewaldcoeff_lj = calc_ewaldcoeff_lj(inputrec->rvdw, inputrec->ewald_rtol_lj);
 +        snew(pmedata, 1);
 +    }
 +
 +    if (hw_opt->thread_affinity != threadaffOFF)
 +    {
 +        /* Before setting affinity, check whether the affinity has changed
 +         * - which indicates that probably the OpenMP library has changed it
 +         * since we first checked).
 +         */
 +        gmx_check_thread_affinity_set(fplog, cr,
 +                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);
 +
 +        /* Set the CPU affinity */
 +        gmx_set_thread_affinity(fplog, cr, hw_opt, hwinfo);
 +    }
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype) || EVDW_PME(inputrec->vdwtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +            if (EVDW_PME(inputrec->vdwtype))
 +            {
 +                nTypePerturbed   = mdatoms->nTypePerturbed;
 +            }
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed(FEP on Q) and nTypePerturbed(FEP on LJ)*/
 +            gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr);
 +            gmx_bcast_sim(sizeof(nTypePerturbed), &nTypePerturbed, cr);
 +        }
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec,
 +                                  mtop ? mtop->natoms : 0, nChargePerturbed, nTypePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE), nthreads_pme);
 +            if (status != 0)
 +            {
 +                gmx_fatal(FARGS, "Error %d initializing PME", status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md)
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Assumes uniform use of the number of OpenMP threads */
 +        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntDefault));
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags);
 +        }
 +
 +        if (inputrec->bRot)
 +        {
 +            /* Initialize enforced rotation code */
 +            init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv,
 +                     bVerbose, Flags);
 +        }
 +
 +        constr = init_constraints(fplog, mtop, inputrec, ed, state, cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog, cr->dd, mtop, vsite, inputrec,
 +                            Flags & MD_DDBONDCHECK, fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, &ddbox);
 +
 +            setup_dd_grid(fplog, cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog, cr, nfile, fnm,
 +                                      oenv, bVerbose, bCompact,
 +                                      nstglobalcomm,
 +                                      vsite, constr,
 +                                      nstepout, inputrec, mtop,
 +                                      fcd, state,
 +                                      mdatoms, nrnb, wcycle, ed, fr,
 +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed,
 +                                      membed,
 +                                      cpt_period, max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      walltime_accounting);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(inputrec->pull);
 +        }
 +
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(inputrec->rot);
 +        }
 +
 +    }
 +    else
 +    {
 +        /* do PME only */
 +        walltime_accounting = walltime_accounting_init(gmx_omp_nthreads_get(emntPME));
 +        gmx_pmeonly(*pmedata, cr, nrnb, wcycle, walltime_accounting, ewaldcoeff_q, ewaldcoeff_lj, inputrec);
 +    }
 +
 +    wallcycle_stop(wcycle, ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again
 +     */
 +    finish_run(fplog, cr,
 +               inputrec, nrnb, wcycle, walltime_accounting,
 +               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
 +               nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +
 +    /* Free GPU memory and context */
 +    free_gpu_resources(fr, cr);
 +
 +    if (opt2bSet("-membed", nfile, fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +    gmx_hardware_info_free(hwinfo);
 +
 +    /* Does what it says */
 +    print_date_and_time(fplog, cr->nodeid, "Finished mdrun", walltime_accounting);
 +    walltime_accounting_destroy(walltime_accounting);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    }
 +
 +    rc = (int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}