From: Mark Abraham Date: Fri, 14 Mar 2014 18:08:48 +0000 (+0100) Subject: Merge branch release-4-6 X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=eab39e78f3f45662bb25fa047510b05a0149880a;p=alexxy%2Fgromacs.git Merge branch release-4-6 Conflicts: src/gromacs/pulling/pull_rotation.c Adopted the bugfixes from release-4-6 Uncrustified incoming code Change-Id: I7d90bb49ef83750b9607f677e97a94f2b5793eeb --- eab39e78f3f45662bb25fa047510b05a0149880a diff --cc src/gromacs/gmxana/gmx_tune_pme.c index 25f4c30065,0000000000..eeed729591 mode 100644,000000..100644 --- a/src/gromacs/gmxana/gmx_tune_pme.c +++ b/src/gromacs/gmxana/gmx_tune_pme.c @@@ -1,2508 -1,0 +1,2516 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + + +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif + + + +#include "gromacs/commandline/pargs.h" +#include "typedefs.h" +#include "smalloc.h" +#include "vec.h" +#include "copyrite.h" +#include "gromacs/fileio/tpxio.h" +#include "string2.h" +#include "readinp.h" +#include "calcgrid.h" +#include "checkpoint.h" +#include "macros.h" +#include "gmx_ana.h" +#include "names.h" +#include "perf_est.h" +#include "inputrec.h" +#include "gromacs/timing/walltime_accounting.h" + + +/* Enum for situations that can occur during log file parsing, the + * corresponding string entries can be found in do_the_tests() in + * const char* ParseLog[] */ +enum { + eParselogOK, + eParselogNotFound, + eParselogNoPerfData, + eParselogTerm, + eParselogResetProblem, + eParselogNoDDGrid, + eParselogTPXVersion, + eParselogNotParallel, + eParselogLargePrimeFactor, + eParselogFatal, + eParselogNr +}; + + +typedef struct +{ + int nPMEnodes; /* number of PME-only nodes used in this test */ + int nx, ny, nz; /* DD grid */ + int guessPME; /* if nPMEnodes == -1, this is the guessed number of PME nodes */ + double *Gcycles; /* This can contain more than one value if doing multiple tests */ + double Gcycles_Av; + float *ns_per_day; + float ns_per_day_Av; + float *PME_f_load; /* PME mesh/force load average*/ + float PME_f_load_Av; /* Average average ;) ... */ + char *mdrun_cmd_line; /* Mdrun command line used for this test */ +} t_perf; + + +typedef struct +{ + int nr_inputfiles; /* The number of tpr and mdp input files */ + gmx_int64_t orig_sim_steps; /* Number of steps to be done in the real simulation */ + gmx_int64_t orig_init_step; /* Init step for the real simulation */ + real *rcoulomb; /* The coulomb radii [0...nr_inputfiles] */ + real *rvdw; /* The vdW radii */ + real *rlist; /* Neighbourlist cutoff radius */ + real *rlistlong; + int *nkx, *nky, *nkz; + real *fsx, *fsy, *fsz; /* Fourierspacing in x,y,z dimension */ +} t_inputinfo; + + +static void sep_line(FILE *fp) +{ + fprintf(fp, "\n------------------------------------------------------------\n"); +} + + +/* Wrapper for system calls */ +static int gmx_system_call(char *command) +{ +#ifdef GMX_NO_SYSTEM + gmx_fatal(FARGS, "No calls to system(3) supported on this platform. Attempted to call:\n'%s'\n", command); +#else + return ( system(command) ); +#endif +} + + +/* Check if string starts with substring */ +static gmx_bool str_starts(const char *string, const char *substring) +{ + return ( strncmp(string, substring, strlen(substring)) == 0); +} + + +static void cleandata(t_perf *perfdata, int test_nr) +{ + perfdata->Gcycles[test_nr] = 0.0; + perfdata->ns_per_day[test_nr] = 0.0; + perfdata->PME_f_load[test_nr] = 0.0; + + return; +} + + +static gmx_bool is_equal(real a, real b) +{ + real diff, eps = 1.0e-7; + + + diff = a - b; + + if (diff < 0.0) + { + diff = -diff; + } + + if (diff < eps) + { + return TRUE; + } + else + { + return FALSE; + } +} + + +static void finalize(const char *fn_out) +{ + char buf[STRLEN]; + FILE *fp; + + + fp = fopen(fn_out, "r"); + fprintf(stdout, "\n\n"); + + while (fgets(buf, STRLEN-1, fp) != NULL) + { + fprintf(stdout, "%s", buf); + } + fclose(fp); + fprintf(stdout, "\n\n"); +} + + +enum { + eFoundNothing, eFoundDDStr, eFoundAccountingStr, eFoundCycleStr +}; + +static int parse_logfile(const char *logfile, const char *errfile, + t_perf *perfdata, int test_nr, int presteps, gmx_int64_t cpt_steps, + int nnodes) +{ + FILE *fp; + char line[STRLEN], dumstring[STRLEN], dumstring2[STRLEN]; + const char matchstrdd[] = "Domain decomposition grid"; + const char matchstrcr[] = "resetting all time and cycle counters"; + const char matchstrbal[] = "Average PME mesh/force load:"; + const char matchstring[] = "R E A L C Y C L E A N D T I M E A C C O U N T I N G"; + const char errSIG[] = "signal, stopping at the next"; + int iFound; + int procs; + float dum1, dum2, dum3, dum4; + int ndum; + int npme; + gmx_int64_t resetsteps = -1; + gmx_bool bFoundResetStr = FALSE; + gmx_bool bResetChecked = FALSE; + + + if (!gmx_fexist(logfile)) + { + fprintf(stderr, "WARNING: Could not find logfile %s.\n", logfile); + cleandata(perfdata, test_nr); + return eParselogNotFound; + } + + fp = fopen(logfile, "r"); + perfdata->PME_f_load[test_nr] = -1.0; + perfdata->guessPME = -1; + + iFound = eFoundNothing; + if (1 == nnodes) + { + iFound = eFoundDDStr; /* Skip some case statements */ + } + + while (fgets(line, STRLEN, fp) != NULL) + { + /* Remove leading spaces */ + ltrim(line); + + /* Check for TERM and INT signals from user: */ + if (strstr(line, errSIG) != NULL) + { + fclose(fp); + cleandata(perfdata, test_nr); + return eParselogTerm; + } + + /* Check whether cycle resetting worked */ + if (presteps > 0 && !bFoundResetStr) + { + if (strstr(line, matchstrcr) != NULL) + { + sprintf(dumstring, "step %s", "%"GMX_SCNd64); + sscanf(line, dumstring, &resetsteps); + bFoundResetStr = TRUE; + if (resetsteps == presteps+cpt_steps) + { + bResetChecked = TRUE; + } + else + { + sprintf(dumstring, "%"GMX_PRId64, resetsteps); + sprintf(dumstring2, "%"GMX_PRId64, presteps+cpt_steps); + fprintf(stderr, "WARNING: Time step counters were reset at step %s,\n" + " though they were supposed to be reset at step %s!\n", + dumstring, dumstring2); + } + } + } + + /* Look for strings that appear in a certain order in the log file: */ + switch (iFound) + { + case eFoundNothing: + /* Look for domain decomp grid and separate PME nodes: */ + if (str_starts(line, matchstrdd)) + { + sscanf(line, "Domain decomposition grid %d x %d x %d, separate PME nodes %d", + &(perfdata->nx), &(perfdata->ny), &(perfdata->nz), &npme); + if (perfdata->nPMEnodes == -1) + { + perfdata->guessPME = npme; + } + else if (perfdata->nPMEnodes != npme) + { + gmx_fatal(FARGS, "PME nodes from command line and output file are not identical"); + } + iFound = eFoundDDStr; + } + /* Catch a few errors that might have occured: */ + else if (str_starts(line, "There is no domain decomposition for")) + { + fclose(fp); + return eParselogNoDDGrid; + } + else if (str_starts(line, "The number of nodes you selected")) + { + fclose(fp); + return eParselogLargePrimeFactor; + } + else if (str_starts(line, "reading tpx file")) + { + fclose(fp); + return eParselogTPXVersion; + } + else if (str_starts(line, "The -dd or -npme option request a parallel simulation")) + { + fclose(fp); + return eParselogNotParallel; + } + break; + case eFoundDDStr: + /* Look for PME mesh/force balance (not necessarily present, though) */ + if (str_starts(line, matchstrbal)) + { + sscanf(&line[strlen(matchstrbal)], "%f", &(perfdata->PME_f_load[test_nr])); + } + /* Look for matchstring */ + if (str_starts(line, matchstring)) + { + iFound = eFoundAccountingStr; + } + break; + case eFoundAccountingStr: + /* Already found matchstring - look for cycle data */ + if (str_starts(line, "Total ")) + { + sscanf(line, "Total %d %lf", &procs, &(perfdata->Gcycles[test_nr])); + iFound = eFoundCycleStr; + } + break; + case eFoundCycleStr: + /* Already found cycle data - look for remaining performance info and return */ + if (str_starts(line, "Performance:")) + { + ndum = sscanf(line, "%s %f %f %f %f", dumstring, &dum1, &dum2, &dum3, &dum4); + /* (ns/day) is the second last entry, depending on whether GMX_DETAILED_PERF_STATS was set in print_perf(), nrnb.c */ + perfdata->ns_per_day[test_nr] = (ndum == 5) ? dum3 : dum1; + fclose(fp); + if (bResetChecked || presteps == 0) + { + return eParselogOK; + } + else + { + return eParselogResetProblem; + } + } + break; + } + } /* while */ + + /* Close the log file */ + fclose(fp); + + /* Check why there is no performance data in the log file. + * Did a fatal errors occur? */ + if (gmx_fexist(errfile)) + { + fp = fopen(errfile, "r"); + while (fgets(line, STRLEN, fp) != NULL) + { + if (str_starts(line, "Fatal error:") ) + { + if (fgets(line, STRLEN, fp) != NULL) + { + fprintf(stderr, "\nWARNING: An error occured during this benchmark:\n" + "%s\n", line); + } + fclose(fp); + cleandata(perfdata, test_nr); + return eParselogFatal; + } + } + fclose(fp); + } + else + { + fprintf(stderr, "WARNING: Could not find stderr file %s.\n", errfile); + } + + /* Giving up ... we could not find out why there is no performance data in + * the log file. */ + fprintf(stdout, "No performance data in log file.\n"); + cleandata(perfdata, test_nr); + + return eParselogNoPerfData; +} + + +static gmx_bool analyze_data( + FILE *fp, + const char *fn, + t_perf **perfdata, + int nnodes, + int ntprs, + int ntests, + int nrepeats, + t_inputinfo *info, + int *index_tpr, /* OUT: Nr of mdp file with best settings */ + int *npme_optimal) /* OUT: Optimal number of PME nodes */ +{ + int i, j, k; + int line = 0, line_win = -1; + int k_win = -1, i_win = -1, winPME; + double s = 0.0; /* standard deviation */ + t_perf *pd; + char strbuf[STRLEN]; + char str_PME_f_load[13]; + gmx_bool bCanUseOrigTPR; + gmx_bool bRefinedCoul, bRefinedVdW, bRefinedGrid; + + + if (nrepeats > 1) + { + sep_line(fp); + fprintf(fp, "Summary of successful runs:\n"); + fprintf(fp, "Line tpr PME nodes Gcycles Av. Std.dev. ns/day PME/f"); + if (nnodes > 1) + { + fprintf(fp, " DD grid"); + } + fprintf(fp, "\n"); + } + + + for (k = 0; k < ntprs; k++) + { + for (i = 0; i < ntests; i++) + { + /* Select the right dataset: */ + pd = &(perfdata[k][i]); + + pd->Gcycles_Av = 0.0; + pd->PME_f_load_Av = 0.0; + pd->ns_per_day_Av = 0.0; + + if (pd->nPMEnodes == -1) + { + sprintf(strbuf, "(%3d)", pd->guessPME); + } + else + { + sprintf(strbuf, " "); + } + + /* Get the average run time of a setting */ + for (j = 0; j < nrepeats; j++) + { + pd->Gcycles_Av += pd->Gcycles[j]; + pd->PME_f_load_Av += pd->PME_f_load[j]; + } + pd->Gcycles_Av /= nrepeats; + pd->PME_f_load_Av /= nrepeats; + + for (j = 0; j < nrepeats; j++) + { + if (pd->ns_per_day[j] > 0.0) + { + pd->ns_per_day_Av += pd->ns_per_day[j]; + } + else + { + /* Somehow the performance number was not aquired for this run, + * therefor set the average to some negative value: */ + pd->ns_per_day_Av = -1.0f*nrepeats; + break; + } + } + pd->ns_per_day_Av /= nrepeats; + + /* Nicer output: */ + if (pd->PME_f_load_Av > 0.0) + { + sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load_Av); + } + else + { + sprintf(str_PME_f_load, "%s", " - "); + } + + + /* We assume we had a successful run if both averages are positive */ + if (pd->Gcycles_Av > 0.0 && pd->ns_per_day_Av > 0.0) + { + /* Output statistics if repeats were done */ + if (nrepeats > 1) + { + /* Calculate the standard deviation */ + s = 0.0; + for (j = 0; j < nrepeats; j++) + { + s += pow( pd->Gcycles[j] - pd->Gcycles_Av, 2 ); + } + s /= (nrepeats - 1); + s = sqrt(s); + + fprintf(fp, "%4d %3d %4d%s %12.3f %12.3f %12.3f %s", + line, k, pd->nPMEnodes, strbuf, pd->Gcycles_Av, s, + pd->ns_per_day_Av, str_PME_f_load); + if (nnodes > 1) + { + fprintf(fp, " %3d %3d %3d", pd->nx, pd->ny, pd->nz); + } + fprintf(fp, "\n"); + } + /* Store the index of the best run found so far in 'winner': */ + if ( (k_win == -1) || (pd->Gcycles_Av < perfdata[k_win][i_win].Gcycles_Av) ) + { + k_win = k; + i_win = i; + line_win = line; + } + line++; + } + } + } + + if (k_win == -1) + { + gmx_fatal(FARGS, "None of the runs was successful! Check %s for problems.", fn); + } + + sep_line(fp); + + winPME = perfdata[k_win][i_win].nPMEnodes; + + if (1 == ntests) + { + /* We stuck to a fixed number of PME-only nodes */ + sprintf(strbuf, "settings No. %d", k_win); + } + else + { + /* We have optimized the number of PME-only nodes */ + if (winPME == -1) + { + sprintf(strbuf, "%s", "the automatic number of PME nodes"); + } + else + { + sprintf(strbuf, "%d PME nodes", winPME); + } + } + fprintf(fp, "Best performance was achieved with %s", strbuf); + if ((nrepeats > 1) && (ntests > 1)) + { + fprintf(fp, " (see line %d)", line_win); + } + fprintf(fp, "\n"); + + /* Only mention settings if they were modified: */ + bRefinedCoul = !is_equal(info->rcoulomb[k_win], info->rcoulomb[0]); + bRefinedVdW = !is_equal(info->rvdw[k_win], info->rvdw[0] ); + bRefinedGrid = !(info->nkx[k_win] == info->nkx[0] && + info->nky[k_win] == info->nky[0] && + info->nkz[k_win] == info->nkz[0]); + + if (bRefinedCoul || bRefinedVdW || bRefinedGrid) + { + fprintf(fp, "Optimized PME settings:\n"); + bCanUseOrigTPR = FALSE; + } + else + { + bCanUseOrigTPR = TRUE; + } + + if (bRefinedCoul) + { + fprintf(fp, " New Coulomb radius: %f nm (was %f nm)\n", info->rcoulomb[k_win], info->rcoulomb[0]); + } + + if (bRefinedVdW) + { + fprintf(fp, " New Van der Waals radius: %f nm (was %f nm)\n", info->rvdw[k_win], info->rvdw[0]); + } + + if (bRefinedGrid) + { + fprintf(fp, " New Fourier grid xyz: %d %d %d (was %d %d %d)\n", info->nkx[k_win], info->nky[k_win], info->nkz[k_win], + info->nkx[0], info->nky[0], info->nkz[0]); + } + + if (bCanUseOrigTPR && ntprs > 1) + { + fprintf(fp, "and original PME settings.\n"); + } + + fflush(fp); + + /* Return the index of the mdp file that showed the highest performance + * and the optimal number of PME nodes */ + *index_tpr = k_win; + *npme_optimal = winPME; + + return bCanUseOrigTPR; +} + + +/* Get the commands we need to set up the runs from environment variables */ +static void get_program_paths(gmx_bool bThreads, char *cmd_mpirun[], char *cmd_mdrun[]) +{ + char *cp; + FILE *fp; + const char def_mpirun[] = "mpirun"; + const char def_mdrun[] = "mdrun"; + + const char empty_mpirun[] = ""; + + /* Get the commands we need to set up the runs from environment variables */ + if (!bThreads) + { + if ( (cp = getenv("MPIRUN")) != NULL) + { + *cmd_mpirun = strdup(cp); + } + else + { + *cmd_mpirun = strdup(def_mpirun); + } + } + else + { + *cmd_mpirun = strdup(empty_mpirun); + } + + if ( (cp = getenv("MDRUN" )) != NULL) + { + *cmd_mdrun = strdup(cp); + } + else + { + *cmd_mdrun = strdup(def_mdrun); + } +} + +/* Check that the commands will run mdrun (perhaps via mpirun) by + * running a very quick test simulation. Requires MPI environment to + * be available if applicable. */ +static void check_mdrun_works(gmx_bool bThreads, + const char *cmd_mpirun, + const char *cmd_np, + const char *cmd_mdrun) +{ + char *command = NULL; + char *cp; + char line[STRLEN]; + FILE *fp; + const char filename[] = "benchtest.log"; + + /* This string should always be identical to the one in copyrite.c, + * gmx_print_version_info() in the defined(GMX_MPI) section */ + const char match_mpi[] = "MPI library: MPI"; + const char match_mdrun[] = "Program: "; + gmx_bool bMdrun = FALSE; + gmx_bool bMPI = FALSE; + + /* Run a small test to see whether mpirun + mdrun work */ + fprintf(stdout, "Making sure that mdrun can be executed. "); + if (bThreads) + { + snew(command, strlen(cmd_mdrun) + strlen(cmd_np) + strlen(filename) + 50); + sprintf(command, "%s%s-version -maxh 0.001 1> %s 2>&1", cmd_mdrun, cmd_np, filename); + } + else + { + snew(command, strlen(cmd_mpirun) + strlen(cmd_np) + strlen(cmd_mdrun) + strlen(filename) + 50); + sprintf(command, "%s%s%s -version -maxh 0.001 1> %s 2>&1", cmd_mpirun, cmd_np, cmd_mdrun, filename); + } + fprintf(stdout, "Trying '%s' ... ", command); + make_backup(filename); + gmx_system_call(command); + + /* Check if we find the characteristic string in the output: */ + if (!gmx_fexist(filename)) + { + gmx_fatal(FARGS, "Output from test run could not be found."); + } + + fp = fopen(filename, "r"); + /* We need to scan the whole output file, since sometimes the queuing system + * also writes stuff to stdout/err */ + while (!feof(fp) ) + { + cp = fgets(line, STRLEN, fp); + if (cp != NULL) + { + if (str_starts(line, match_mdrun) ) + { + bMdrun = TRUE; + } + if (str_starts(line, match_mpi) ) + { + bMPI = TRUE; + } + } + } + fclose(fp); + + if (bThreads) + { + if (bMPI) + { + gmx_fatal(FARGS, "Need a threaded version of mdrun. This one\n" + "(%s)\n" + "seems to have been compiled with MPI instead.", + *cmd_mdrun); + } + } + else + { + if (bMdrun && !bMPI) + { + gmx_fatal(FARGS, "Need an MPI-enabled version of mdrun. This one\n" + "(%s)\n" + "seems to have been compiled without MPI support.", + *cmd_mdrun); + } + } + + if (!bMdrun) + { + gmx_fatal(FARGS, "Cannot execute mdrun. Please check %s for problems!", + filename); + } + + fprintf(stdout, "passed.\n"); + + /* Clean up ... */ + remove(filename); + sfree(command); +} + + +static void launch_simulation( + gmx_bool bLaunch, /* Should the simulation be launched? */ + FILE *fp, /* General log file */ + gmx_bool bThreads, /* whether to use threads */ + char *cmd_mpirun, /* Command for mpirun */ + char *cmd_np, /* Switch for -np or -ntmpi or empty */ + char *cmd_mdrun, /* Command for mdrun */ + char *args_for_mdrun, /* Arguments for mdrun */ + const char *simulation_tpr, /* This tpr will be simulated */ + int nPMEnodes) /* Number of PME nodes to use */ +{ + char *command; + + + /* Make enough space for the system call command, + * (100 extra chars for -npme ... etc. options should suffice): */ + snew(command, strlen(cmd_mpirun)+strlen(cmd_mdrun)+strlen(cmd_np)+strlen(args_for_mdrun)+strlen(simulation_tpr)+100); + + /* Note that the -passall options requires args_for_mdrun to be at the end + * of the command line string */ + if (bThreads) + { + sprintf(command, "%s%s-npme %d -s %s %s", + cmd_mdrun, cmd_np, nPMEnodes, simulation_tpr, args_for_mdrun); + } + else + { + sprintf(command, "%s%s%s -npme %d -s %s %s", + cmd_mpirun, cmd_np, cmd_mdrun, nPMEnodes, simulation_tpr, args_for_mdrun); + } + + fprintf(fp, "%s this command line to launch the simulation:\n\n%s", bLaunch ? "Using" : "Please use", command); + sep_line(fp); + fflush(fp); + + /* Now the real thing! */ + if (bLaunch) + { + fprintf(stdout, "\nLaunching simulation with best parameters now.\nExecuting '%s'", command); + sep_line(stdout); + fflush(stdout); + gmx_system_call(command); + } +} + + +static void modify_PMEsettings( + gmx_int64_t simsteps, /* Set this value as number of time steps */ + gmx_int64_t init_step, /* Set this value as init_step */ + const char *fn_best_tpr, /* tpr file with the best performance */ + const char *fn_sim_tpr) /* name of tpr file to be launched */ +{ + t_inputrec *ir; + t_state state; + gmx_mtop_t mtop; + char buf[200]; + + snew(ir, 1); + read_tpx_state(fn_best_tpr, ir, &state, NULL, &mtop); + + /* Reset nsteps and init_step to the value of the input .tpr file */ + ir->nsteps = simsteps; + ir->init_step = init_step; + + /* Write the tpr file which will be launched */ + sprintf(buf, "Writing optimized simulation file %s with nsteps=%s.\n", fn_sim_tpr, "%"GMX_PRId64); + fprintf(stdout, buf, ir->nsteps); + fflush(stdout); + write_tpx_state(fn_sim_tpr, ir, &state, &mtop); + + sfree(ir); +} + + +#define EPME_SWITCHED(e) ((e) == eelPMESWITCH || (e) == eelPMEUSERSWITCH) + +/* Make additional TPR files with more computational load for the + * direct space processors: */ +static void make_benchmark_tprs( + const char *fn_sim_tpr, /* READ : User-provided tpr file */ + char *fn_bench_tprs[], /* WRITE: Names of benchmark tpr files */ + gmx_int64_t benchsteps, /* Number of time steps for benchmark runs */ + gmx_int64_t statesteps, /* Step counter in checkpoint file */ + real rmin, /* Minimal Coulomb radius */ + real rmax, /* Maximal Coulomb radius */ + real bScaleRvdw, /* Scale rvdw along with rcoulomb */ + int *ntprs, /* No. of TPRs to write, each with a different + rcoulomb and fourierspacing */ + t_inputinfo *info, /* Contains information about mdp file options */ + FILE *fp) /* Write the output here */ +{ + int i, j, d; + t_inputrec *ir; + t_state state; + gmx_mtop_t mtop; + real nlist_buffer; /* Thickness of the buffer regions for PME-switch potentials */ + char buf[200]; + rvec box_size; + gmx_bool bNote = FALSE; + real add; /* Add this to rcoul for the next test */ + real fac = 1.0; /* Scaling factor for Coulomb radius */ + real fourierspacing; /* Basic fourierspacing from tpr */ + + + sprintf(buf, "Making benchmark tpr file%s with %s time step%s", + *ntprs > 1 ? "s" : "", "%"GMX_PRId64, benchsteps > 1 ? "s" : ""); + fprintf(stdout, buf, benchsteps); + if (statesteps > 0) + { + sprintf(buf, " (adding %s steps from checkpoint file)", "%"GMX_PRId64); + fprintf(stdout, buf, statesteps); + benchsteps += statesteps; + } + fprintf(stdout, ".\n"); + + + snew(ir, 1); + read_tpx_state(fn_sim_tpr, ir, &state, NULL, &mtop); + + /* Check if some kind of PME was chosen */ + if (EEL_PME(ir->coulombtype) == FALSE) + { + gmx_fatal(FARGS, "Can only do optimizations for simulations with %s electrostatics.", + EELTYPE(eelPME)); + } + + /* Check if rcoulomb == rlist, which is necessary for plain PME. */ + if ( (ir->cutoff_scheme != ecutsVERLET) && + (eelPME == ir->coulombtype) && !(ir->rcoulomb == ir->rlist)) + { + gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to rlist (%f).", + EELTYPE(eelPME), ir->rcoulomb, ir->rlist); + } + /* For other PME types, rcoulomb is allowed to be smaller than rlist */ + else if (ir->rcoulomb > ir->rlist) + { + gmx_fatal(FARGS, "%s requires rcoulomb (%f) to be equal to or smaller than rlist (%f)", + EELTYPE(ir->coulombtype), ir->rcoulomb, ir->rlist); + } + + if (bScaleRvdw && ir->rvdw != ir->rcoulomb) + { + fprintf(stdout, "NOTE: input rvdw != rcoulomb, will not scale rvdw\n"); + bScaleRvdw = FALSE; + } + + /* Reduce the number of steps for the benchmarks */ + info->orig_sim_steps = ir->nsteps; + ir->nsteps = benchsteps; + /* We must not use init_step from the input tpr file for the benchmarks */ + info->orig_init_step = ir->init_step; + ir->init_step = 0; + + /* For PME-switch potentials, keep the radial distance of the buffer region */ + nlist_buffer = ir->rlist - ir->rcoulomb; + + /* Determine length of triclinic box vectors */ + for (d = 0; d < DIM; d++) + { + box_size[d] = 0; + for (i = 0; i < DIM; i++) + { + box_size[d] += state.box[d][i]*state.box[d][i]; + } + box_size[d] = sqrt(box_size[d]); + } + + if (ir->fourier_spacing > 0) + { + info->fsx[0] = ir->fourier_spacing; + info->fsy[0] = ir->fourier_spacing; + info->fsz[0] = ir->fourier_spacing; + } + else + { + /* Reconstruct fourierspacing per dimension from the number of grid points and box size */ + info->fsx[0] = box_size[XX]/ir->nkx; + info->fsy[0] = box_size[YY]/ir->nky; + info->fsz[0] = box_size[ZZ]/ir->nkz; + } + + /* If no value for the fourierspacing was provided on the command line, we + * use the reconstruction from the tpr file */ + if (ir->fourier_spacing > 0) + { + /* Use the spacing from the tpr */ + fourierspacing = ir->fourier_spacing; + } + else + { + /* Use the maximum observed spacing */ + fourierspacing = max(max(info->fsx[0], info->fsy[0]), info->fsz[0]); + } + + fprintf(stdout, "Calculating PME grid points on the basis of a fourierspacing of %f nm\n", fourierspacing); + + /* For performance comparisons the number of particles is useful to have */ + fprintf(fp, " Number of particles : %d\n", mtop.natoms); + + /* Print information about settings of which some are potentially modified: */ + fprintf(fp, " Coulomb type : %s\n", EELTYPE(ir->coulombtype)); + fprintf(fp, " Grid spacing x y z : %f %f %f\n", + box_size[XX]/ir->nkx, box_size[YY]/ir->nky, box_size[ZZ]/ir->nkz); + fprintf(fp, " Van der Waals type : %s\n", EVDWTYPE(ir->vdwtype)); + if (ir_vdw_switched(ir)) + { + fprintf(fp, " rvdw_switch : %f nm\n", ir->rvdw_switch); + } + if (EPME_SWITCHED(ir->coulombtype)) + { + fprintf(fp, " rlist : %f nm\n", ir->rlist); + } + if (ir->rlistlong != max_cutoff(ir->rvdw, ir->rcoulomb)) + { + fprintf(fp, " rlistlong : %f nm\n", ir->rlistlong); + } + + /* Print a descriptive line about the tpr settings tested */ + fprintf(fp, "\nWill try these real/reciprocal workload settings:\n"); + fprintf(fp, " No. scaling rcoulomb"); + fprintf(fp, " nkx nky nkz"); + fprintf(fp, " spacing"); + if (evdwCUT == ir->vdwtype) + { + fprintf(fp, " rvdw"); + } + if (EPME_SWITCHED(ir->coulombtype)) + { + fprintf(fp, " rlist"); + } + if (ir->rlistlong != max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)) ) + { + fprintf(fp, " rlistlong"); + } + fprintf(fp, " tpr file\n"); + + /* Loop to create the requested number of tpr input files */ + for (j = 0; j < *ntprs; j++) + { + /* The first .tpr is the provided one, just need to modify nsteps, + * so skip the following block */ + if (j != 0) + { + /* Determine which Coulomb radii rc to use in the benchmarks */ + add = (rmax-rmin)/(*ntprs-1); + if (is_equal(rmin, info->rcoulomb[0])) + { + ir->rcoulomb = rmin + j*add; + } + else if (is_equal(rmax, info->rcoulomb[0])) + { + ir->rcoulomb = rmin + (j-1)*add; + } + else + { + /* rmin != rcoul != rmax, ergo test between rmin and rmax */ + add = (rmax-rmin)/(*ntprs-2); + ir->rcoulomb = rmin + (j-1)*add; + } + + /* Determine the scaling factor fac */ + fac = ir->rcoulomb/info->rcoulomb[0]; + + /* Scale the Fourier grid spacing */ + ir->nkx = ir->nky = ir->nkz = 0; + calc_grid(NULL, state.box, fourierspacing*fac, &ir->nkx, &ir->nky, &ir->nkz); + - /* Adjust other radii since various conditions neet to be fulfilled */ ++ /* Adjust other radii since various conditions need to be fulfilled */ + if (eelPME == ir->coulombtype) + { + /* plain PME, rcoulomb must be equal to rlist */ + ir->rlist = ir->rcoulomb; + } + else + { + /* rlist must be >= rcoulomb, we keep the size of the buffer region */ + ir->rlist = ir->rcoulomb + nlist_buffer; + } + + if (bScaleRvdw && evdwCUT == ir->vdwtype) + { - /* For vdw cutoff, rvdw >= rlist */ - ir->rvdw = max(info->rvdw[0], ir->rlist); ++ if (ecutsVERLET == ir->cutoff_scheme) ++ { ++ /* With Verlet, the van der Waals radius must always equal the Coulomb radius */ ++ ir->rvdw = ir->rcoulomb; ++ } ++ else ++ { ++ /* For vdw cutoff, rvdw >= rlist */ ++ ir->rvdw = max(info->rvdw[0], ir->rlist); ++ } + } + + ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)); + + } /* end of "if (j != 0)" */ + + /* for j==0: Save the original settings + * for j >0: Save modified radii and Fourier grids */ + info->rcoulomb[j] = ir->rcoulomb; + info->rvdw[j] = ir->rvdw; + info->nkx[j] = ir->nkx; + info->nky[j] = ir->nky; + info->nkz[j] = ir->nkz; + info->rlist[j] = ir->rlist; + info->rlistlong[j] = ir->rlistlong; + info->fsx[j] = fac*fourierspacing; + info->fsy[j] = fac*fourierspacing; + info->fsz[j] = fac*fourierspacing; + + /* Write the benchmark tpr file */ + strncpy(fn_bench_tprs[j], fn_sim_tpr, strlen(fn_sim_tpr)-strlen(".tpr")); + sprintf(buf, "_bench%.2d.tpr", j); + strcat(fn_bench_tprs[j], buf); + fprintf(stdout, "Writing benchmark tpr %s with nsteps=", fn_bench_tprs[j]); + fprintf(stdout, "%"GMX_PRId64, ir->nsteps); + if (j > 0) + { + fprintf(stdout, ", scaling factor %f\n", fac); + } + else + { + fprintf(stdout, ", unmodified settings\n"); + } + + write_tpx_state(fn_bench_tprs[j], ir, &state, &mtop); + + /* Write information about modified tpr settings to log file */ + fprintf(fp, "%4d%10f%10f", j, fac, ir->rcoulomb); + fprintf(fp, "%5d%5d%5d", ir->nkx, ir->nky, ir->nkz); + fprintf(fp, " %9f ", info->fsx[j]); + if (evdwCUT == ir->vdwtype) + { + fprintf(fp, "%10f", ir->rvdw); + } + if (EPME_SWITCHED(ir->coulombtype)) + { + fprintf(fp, "%10f", ir->rlist); + } + if (info->rlistlong[0] != max_cutoff(info->rlist[0], max_cutoff(info->rvdw[0], info->rcoulomb[0])) ) + { + fprintf(fp, "%10f", ir->rlistlong); + } + fprintf(fp, " %-14s\n", fn_bench_tprs[j]); + + /* Make it clear to the user that some additional settings were modified */ + if (!is_equal(ir->rvdw, info->rvdw[0]) + || !is_equal(ir->rlistlong, info->rlistlong[0]) ) + { + bNote = TRUE; + } + } + if (bNote) + { + fprintf(fp, "\nNote that in addition to the Coulomb radius and the Fourier grid\n" + "other input settings were also changed (see table above).\n" + "Please check if the modified settings are appropriate.\n"); + } + fflush(stdout); + fflush(fp); + sfree(ir); +} + + +/* Rename the files we want to keep to some meaningful filename and + * delete the rest */ +static void cleanup(const t_filenm *fnm, int nfile, int k, int nnodes, + int nPMEnodes, int nr, gmx_bool bKeepStderr) +{ + char numstring[STRLEN]; + char newfilename[STRLEN]; + const char *fn = NULL; + int i; + const char *opt; + + + fprintf(stdout, "Cleaning up, deleting benchmark temp files ...\n"); + + for (i = 0; i < nfile; i++) + { + opt = (char *)fnm[i].opt; + if (strcmp(opt, "-p") == 0) + { + /* do nothing; keep this file */ + ; + } + else if (strcmp(opt, "-bg") == 0) + { + /* Give the log file a nice name so one can later see which parameters were used */ + numstring[0] = '\0'; + if (nr > 0) + { + sprintf(numstring, "_%d", nr); + } + sprintf(newfilename, "%s_no%d_np%d_npme%d%s", opt2fn("-bg", nfile, fnm), k, nnodes, nPMEnodes, numstring); + if (gmx_fexist(opt2fn("-bg", nfile, fnm))) + { + fprintf(stdout, "renaming log file to %s\n", newfilename); + make_backup(newfilename); + rename(opt2fn("-bg", nfile, fnm), newfilename); + } + } + else if (strcmp(opt, "-err") == 0) + { + /* This file contains the output of stderr. We want to keep it in + * cases where there have been problems. */ + fn = opt2fn(opt, nfile, fnm); + numstring[0] = '\0'; + if (nr > 0) + { + sprintf(numstring, "_%d", nr); + } + sprintf(newfilename, "%s_no%d_np%d_npme%d%s", fn, k, nnodes, nPMEnodes, numstring); + if (gmx_fexist(fn)) + { + if (bKeepStderr) + { + fprintf(stdout, "Saving stderr output in %s\n", newfilename); + make_backup(newfilename); + rename(fn, newfilename); + } + else + { + fprintf(stdout, "Deleting %s\n", fn); + remove(fn); + } + } + } + /* Delete the files which are created for each benchmark run: (options -b*) */ + else if ( (0 == strncmp(opt, "-b", 2)) && (opt2bSet(opt, nfile, fnm) || !is_optional(&fnm[i])) ) + { + fn = opt2fn(opt, nfile, fnm); + if (gmx_fexist(fn)) + { + fprintf(stdout, "Deleting %s\n", fn); + remove(fn); + } + } + } +} + + +/* Returns the largest common factor of n1 and n2 */ +static int largest_common_factor(int n1, int n2) +{ + int factor, nmax; + + nmax = min(n1, n2); + for (factor = nmax; factor > 0; factor--) + { + if (0 == (n1 % factor) && 0 == (n2 % factor) ) + { + return(factor); + } + } + return 0; /* one for the compiler */ +} + +enum { + eNpmeAuto, eNpmeAll, eNpmeReduced, eNpmeSubset, eNpmeNr +}; + +/* Create a list of numbers of PME nodes to test */ +static void make_npme_list( + const char *npmevalues_opt, /* Make a complete list with all + * possibilities or a short list that keeps only + * reasonable numbers of PME nodes */ + int *nentries, /* Number of entries we put in the nPMEnodes list */ + int *nPMEnodes[], /* Each entry contains the value for -npme */ + int nnodes, /* Total number of nodes to do the tests on */ + int minPMEnodes, /* Minimum number of PME nodes */ + int maxPMEnodes) /* Maximum number of PME nodes */ +{ + int i, npme, npp; + int min_factor = 1; /* We request that npp and npme have this minimal + * largest common factor (depends on npp) */ + int nlistmax; /* Max. list size */ + int nlist; /* Actual number of entries in list */ + int eNPME = 0; + + + /* Do we need to check all possible values for -npme or is a reduced list enough? */ + if (0 == strcmp(npmevalues_opt, "all") ) + { + eNPME = eNpmeAll; + } + else if (0 == strcmp(npmevalues_opt, "subset") ) + { + eNPME = eNpmeSubset; + } + else /* "auto" or "range" */ + { + if (nnodes <= 64) + { + eNPME = eNpmeAll; + } + else if (nnodes < 128) + { + eNPME = eNpmeReduced; + } + else + { + eNPME = eNpmeSubset; + } + } + + /* Calculate how many entries we could possibly have (in case of -npme all) */ + if (nnodes > 2) + { + nlistmax = maxPMEnodes - minPMEnodes + 3; + if (0 == minPMEnodes) + { + nlistmax--; + } + } + else + { + nlistmax = 1; + } + + /* Now make the actual list which is at most of size nlist */ + snew(*nPMEnodes, nlistmax); + nlist = 0; /* start counting again, now the real entries in the list */ + for (i = 0; i < nlistmax - 2; i++) + { + npme = maxPMEnodes - i; + npp = nnodes-npme; + switch (eNPME) + { + case eNpmeAll: + min_factor = 1; + break; + case eNpmeReduced: + min_factor = 2; + break; + case eNpmeSubset: + /* For 2d PME we want a common largest factor of at least the cube + * root of the number of PP nodes */ + min_factor = (int) pow(npp, 1.0/3.0); + break; + default: + gmx_fatal(FARGS, "Unknown option for eNPME in make_npme_list"); + break; + } + if (largest_common_factor(npp, npme) >= min_factor) + { + (*nPMEnodes)[nlist] = npme; + nlist++; + } + } + /* We always test 0 PME nodes and the automatic number */ + *nentries = nlist + 2; + (*nPMEnodes)[nlist ] = 0; + (*nPMEnodes)[nlist+1] = -1; + + fprintf(stderr, "Will try the following %d different values for -npme:\n", *nentries); + for (i = 0; i < *nentries-1; i++) + { + fprintf(stderr, "%d, ", (*nPMEnodes)[i]); + } + fprintf(stderr, "and %d (auto).\n", (*nPMEnodes)[*nentries-1]); +} + + +/* Allocate memory to store the performance data */ +static void init_perfdata(t_perf *perfdata[], int ntprs, int datasets, int repeats) +{ + int i, j, k; + + + for (k = 0; k < ntprs; k++) + { + snew(perfdata[k], datasets); + for (i = 0; i < datasets; i++) + { + for (j = 0; j < repeats; j++) + { + snew(perfdata[k][i].Gcycles, repeats); + snew(perfdata[k][i].ns_per_day, repeats); + snew(perfdata[k][i].PME_f_load, repeats); + } + } + } +} + + +/* Check for errors on mdrun -h */ +static void make_sure_it_runs(char *mdrun_cmd_line, int length, FILE *fp) +{ + char *command, *msg; + int ret; + + + snew(command, length + 15); + snew(msg, length + 500); + + fprintf(stdout, "Making sure the benchmarks can be executed ...\n"); + /* FIXME: mdrun -h no longer actually does anything useful. + * It unconditionally prints the help, ignoring all other options. */ + sprintf(command, "%s-h -quiet", mdrun_cmd_line); + ret = gmx_system_call(command); + + if (0 != ret) + { + /* To prevent confusion, do not again issue a gmx_fatal here since we already + * get the error message from mdrun itself */ + sprintf(msg, "Cannot run the benchmark simulations! Please check the error message of\n" + "mdrun for the source of the problem. Did you provide a command line\n" + "argument that neither g_tune_pme nor mdrun understands? Offending command:\n" + "\n%s\n\n", command); + + fprintf(stderr, "%s", msg); + sep_line(fp); + fprintf(fp, "%s", msg); + + exit(ret); + } + + sfree(command); + sfree(msg ); +} + + +static void do_the_tests( + FILE *fp, /* General g_tune_pme output file */ + char **tpr_names, /* Filenames of the input files to test */ + int maxPMEnodes, /* Max fraction of nodes to use for PME */ + int minPMEnodes, /* Min fraction of nodes to use for PME */ + int npme_fixed, /* If >= -1, test fixed number of PME + * nodes only */ + const char *npmevalues_opt, /* Which -npme values should be tested */ + t_perf **perfdata, /* Here the performace data is stored */ + int *pmeentries, /* Entries in the nPMEnodes list */ + int repeats, /* Repeat each test this often */ + int nnodes, /* Total number of nodes = nPP + nPME */ + int nr_tprs, /* Total number of tpr files to test */ + gmx_bool bThreads, /* Threads or MPI? */ + char *cmd_mpirun, /* mpirun command string */ + char *cmd_np, /* "-np", "-n", whatever mpirun needs */ + char *cmd_mdrun, /* mdrun command string */ + char *cmd_args_bench, /* arguments for mdrun in a string */ + const t_filenm *fnm, /* List of filenames from command line */ + int nfile, /* Number of files specified on the cmdl. */ + int presteps, /* DLB equilibration steps, is checked */ + gmx_int64_t cpt_steps) /* Time step counter in the checkpoint */ +{ + int i, nr, k, ret, count = 0, totaltests; + int *nPMEnodes = NULL; + t_perf *pd = NULL; + int cmdline_length; + char *command, *cmd_stub; + char buf[STRLEN]; + gmx_bool bResetProblem = FALSE; + gmx_bool bFirst = TRUE; + + + /* This string array corresponds to the eParselog enum type at the start + * of this file */ + const char* ParseLog[] = { + "OK.", + "Logfile not found!", + "No timings, logfile truncated?", + "Run was terminated.", + "Counters were not reset properly.", + "No DD grid found for these settings.", + "TPX version conflict!", + "mdrun was not started in parallel!", + "Number of PP nodes has a prime factor that is too large.", + "An error occured." + }; + char str_PME_f_load[13]; + + + /* Allocate space for the mdrun command line. 100 extra characters should + be more than enough for the -npme etcetera arguments */ + cmdline_length = strlen(cmd_mpirun) + + strlen(cmd_np) + + strlen(cmd_mdrun) + + strlen(cmd_args_bench) + + strlen(tpr_names[0]) + 100; + snew(command, cmdline_length); + snew(cmd_stub, cmdline_length); + + /* Construct the part of the command line that stays the same for all tests: */ + if (bThreads) + { + sprintf(cmd_stub, "%s%s", cmd_mdrun, cmd_np); + } + else + { + sprintf(cmd_stub, "%s%s%s ", cmd_mpirun, cmd_np, cmd_mdrun); + } + + /* Create a list of numbers of PME nodes to test */ + if (npme_fixed < -1) + { + make_npme_list(npmevalues_opt, pmeentries, &nPMEnodes, + nnodes, minPMEnodes, maxPMEnodes); + } + else + { + *pmeentries = 1; + snew(nPMEnodes, 1); + nPMEnodes[0] = npme_fixed; + fprintf(stderr, "Will use a fixed number of %d PME-only nodes.\n", nPMEnodes[0]); + } + + if (0 == repeats) + { + fprintf(fp, "\nNo benchmarks done since number of repeats (-r) is 0.\n"); + gmx_ffclose(fp); + finalize(opt2fn("-p", nfile, fnm)); + exit(0); + } + + /* Allocate one dataset for each tpr input file: */ + init_perfdata(perfdata, nr_tprs, *pmeentries, repeats); + + /*****************************************/ + /* Main loop over all tpr files to test: */ + /*****************************************/ + totaltests = nr_tprs*(*pmeentries)*repeats; + for (k = 0; k < nr_tprs; k++) + { + fprintf(fp, "\nIndividual timings for input file %d (%s):\n", k, tpr_names[k]); + fprintf(fp, "PME nodes Gcycles ns/day PME/f Remark\n"); + /* Loop over various numbers of PME nodes: */ + for (i = 0; i < *pmeentries; i++) + { + pd = &perfdata[k][i]; + + /* Loop over the repeats for each scenario: */ + for (nr = 0; nr < repeats; nr++) + { + pd->nPMEnodes = nPMEnodes[i]; + + /* Add -npme and -s to the command line and save it. Note that + * the -passall (if set) options requires cmd_args_bench to be + * at the end of the command line string */ + snew(pd->mdrun_cmd_line, cmdline_length); + sprintf(pd->mdrun_cmd_line, "%s-npme %d -s %s %s", + cmd_stub, pd->nPMEnodes, tpr_names[k], cmd_args_bench); + + /* To prevent that all benchmarks fail due to a show-stopper argument + * on the mdrun command line, we make a quick check with mdrun -h first */ + if (bFirst) + { + make_sure_it_runs(pd->mdrun_cmd_line, cmdline_length, fp); + } + bFirst = FALSE; + + /* Do a benchmark simulation: */ + if (repeats > 1) + { + sprintf(buf, ", pass %d/%d", nr+1, repeats); + } + else + { + buf[0] = '\0'; + } + fprintf(stdout, "\n=== Progress %2.0f%%, tpr %d/%d, run %d/%d%s:\n", + (100.0*count)/totaltests, + k+1, nr_tprs, i+1, *pmeentries, buf); + make_backup(opt2fn("-err", nfile, fnm)); + sprintf(command, "%s 1> /dev/null 2>%s", pd->mdrun_cmd_line, opt2fn("-err", nfile, fnm)); + fprintf(stdout, "%s\n", pd->mdrun_cmd_line); + gmx_system_call(command); + + /* Collect the performance data from the log file; also check stderr + * for fatal errors */ + ret = parse_logfile(opt2fn("-bg", nfile, fnm), opt2fn("-err", nfile, fnm), + pd, nr, presteps, cpt_steps, nnodes); + if ((presteps > 0) && (ret == eParselogResetProblem)) + { + bResetProblem = TRUE; + } + + if (-1 == pd->nPMEnodes) + { + sprintf(buf, "(%3d)", pd->guessPME); + } + else + { + sprintf(buf, " "); + } + + /* Nicer output */ + if (pd->PME_f_load[nr] > 0.0) + { + sprintf(str_PME_f_load, "%12.3f", pd->PME_f_load[nr]); + } + else + { + sprintf(str_PME_f_load, "%s", " - "); + } + + /* Write the data we got to disk */ + fprintf(fp, "%4d%s %12.3f %12.3f %s %s", pd->nPMEnodes, + buf, pd->Gcycles[nr], pd->ns_per_day[nr], str_PME_f_load, ParseLog[ret]); + if (!(ret == eParselogOK || ret == eParselogNoDDGrid || ret == eParselogNotFound) ) + { + fprintf(fp, " Check %s file for problems.", ret == eParselogFatal ? "err" : "log"); + } + fprintf(fp, "\n"); + fflush(fp); + count++; + + /* Do some cleaning up and delete the files we do not need any more */ + cleanup(fnm, nfile, k, nnodes, pd->nPMEnodes, nr, ret == eParselogFatal); + + /* If the first run with this number of processors already failed, do not try again: */ + if (pd->Gcycles[0] <= 0.0 && repeats > 1) + { + fprintf(stdout, "Skipping remaining passes of unsuccessful setting, see log file for details.\n"); + count += repeats-(nr+1); + break; + } + } /* end of repeats loop */ + } /* end of -npme loop */ + } /* end of tpr file loop */ + + if (bResetProblem) + { + sep_line(fp); + fprintf(fp, "WARNING: The cycle and time step counters could not be reset properly. "); + sep_line(fp); + } + sfree(command); + sfree(cmd_stub); +} + + +static void check_input( + int nnodes, + int repeats, + int *ntprs, + real *rmin, + real rcoulomb, + real *rmax, + real maxPMEfraction, + real minPMEfraction, + int npme_fixed, + gmx_int64_t bench_nsteps, + const t_filenm *fnm, + int nfile, + int sim_part, + int presteps, + int npargs, + t_pargs *pa) +{ + int old; + + + /* Make sure the input file exists */ + if (!gmx_fexist(opt2fn("-s", nfile, fnm))) + { + gmx_fatal(FARGS, "File %s not found.", opt2fn("-s", nfile, fnm)); + } + + /* Make sure that the checkpoint file is not overwritten during benchmarking */ + if ( (0 == strcmp(opt2fn("-cpi", nfile, fnm), opt2fn("-bcpo", nfile, fnm)) ) && (sim_part > 1) ) + { + gmx_fatal(FARGS, "Checkpoint input (-cpi) and benchmark checkpoint output (-bcpo) files must not be identical.\n" + "The checkpoint input file must not be overwritten during the benchmarks.\n"); + } + + /* Make sure that repeats is >= 0 (if == 0, only write tpr files) */ + if (repeats < 0) + { + gmx_fatal(FARGS, "Number of repeats < 0!"); + } + + /* Check number of nodes */ + if (nnodes < 1) + { + gmx_fatal(FARGS, "Number of nodes/threads must be a positive integer."); + } + + /* Automatically choose -ntpr if not set */ + if (*ntprs < 1) + { + if (nnodes < 16) + { + *ntprs = 1; + } + else + { + *ntprs = 3; + /* Set a reasonable scaling factor for rcoulomb */ + if (*rmax <= 0) + { + *rmax = rcoulomb * 1.2; + } + } + fprintf(stderr, "Will test %d tpr file%s.\n", *ntprs, *ntprs == 1 ? "" : "s"); + } + else + { + if (1 == *ntprs) + { + fprintf(stderr, "Note: Choose ntpr>1 to shift PME load between real and reciprocal space.\n"); + } + } + + /* Make shure that rmin <= rcoulomb <= rmax */ + if (*rmin <= 0) + { + *rmin = rcoulomb; + } + if (*rmax <= 0) + { + *rmax = rcoulomb; + } + if (!(*rmin <= *rmax) ) + { + gmx_fatal(FARGS, "Please choose the Coulomb radii such that rmin <= rmax.\n" + "rmin = %g, rmax = %g, actual rcoul from .tpr file = %g\n", *rmin, *rmax, rcoulomb); + } + /* Add test scenarios if rmin or rmax were set */ + if (*ntprs <= 2) + { + if (!is_equal(*rmin, rcoulomb) && (*ntprs == 1) ) + { + (*ntprs)++; + fprintf(stderr, "NOTE: Setting -rmin to %g changed -ntpr to %d\n", + *rmin, *ntprs); + } + if (!is_equal(*rmax, rcoulomb) && (*ntprs == 1) ) + { + (*ntprs)++; + fprintf(stderr, "NOTE: Setting -rmax to %g changed -ntpr to %d\n", + *rmax, *ntprs); + } + } + old = *ntprs; + /* If one of rmin, rmax is set, we need 2 tpr files at minimum */ + if (!is_equal(*rmax, rcoulomb) || !is_equal(*rmin, rcoulomb) ) + { + *ntprs = max(*ntprs, 2); + } + + /* If both rmin, rmax are set, we need 3 tpr files at minimum */ + if (!is_equal(*rmax, rcoulomb) && !is_equal(*rmin, rcoulomb) ) + { + *ntprs = max(*ntprs, 3); + } + + if (old != *ntprs) + { + fprintf(stderr, "NOTE: Your rmin, rmax setting changed -ntpr to %d\n", *ntprs); + } + + if (*ntprs > 1) + { + if (is_equal(*rmin, rcoulomb) && is_equal(rcoulomb, *rmax)) /* We have just a single rc */ + { + fprintf(stderr, "WARNING: Resetting -ntpr to 1 since no Coulomb radius scaling is requested.\n" + "Please set rmin < rmax to test Coulomb radii in the [rmin, rmax] interval\n" + "with correspondingly adjusted PME grid settings\n"); + *ntprs = 1; + } + } + + /* Check whether max and min fraction are within required values */ + if (maxPMEfraction > 0.5 || maxPMEfraction < 0) + { + gmx_fatal(FARGS, "-max must be between 0 and 0.5"); + } + if (minPMEfraction > 0.5 || minPMEfraction < 0) + { + gmx_fatal(FARGS, "-min must be between 0 and 0.5"); + } + if (maxPMEfraction < minPMEfraction) + { + gmx_fatal(FARGS, "-max must be larger or equal to -min"); + } + + /* Check whether the number of steps - if it was set - has a reasonable value */ + if (bench_nsteps < 0) + { + gmx_fatal(FARGS, "Number of steps must be positive."); + } + + if (bench_nsteps > 10000 || bench_nsteps < 100) + { + fprintf(stderr, "WARNING: steps="); + fprintf(stderr, "%"GMX_PRId64, bench_nsteps); + fprintf(stderr, ". Are you sure you want to perform so %s steps for each benchmark?\n", (bench_nsteps < 100) ? "few" : "many"); + } + + if (presteps < 0) + { + gmx_fatal(FARGS, "Cannot have a negative number of presteps.\n"); + } + + /* Check for rcoulomb scaling if more than one .tpr file is tested */ + if (*ntprs > 1) + { + if (*rmin/rcoulomb < 0.75 || *rmax/rcoulomb > 1.25) + { + fprintf(stderr, "WARNING: Applying extreme scaling factor. I hope you know what you are doing.\n"); + } + } + + /* If a fixed number of PME nodes is set we do rcoulomb and PME gird tuning + * only. We need to check whether the requested number of PME-only nodes + * makes sense. */ + if (npme_fixed > -1) + { + /* No more than 50% of all nodes can be assigned as PME-only nodes. */ + if (2*npme_fixed > nnodes) + { + gmx_fatal(FARGS, "Cannot have more than %d PME-only nodes for a total of %d nodes (you chose %d).\n", + nnodes/2, nnodes, npme_fixed); + } + if ((npme_fixed > 0) && (5*npme_fixed < nnodes)) + { + fprintf(stderr, "WARNING: Only %g percent of the nodes are assigned as PME-only nodes.\n", + 100.0*((real)npme_fixed / (real)nnodes)); + } + if (opt2parg_bSet("-min", npargs, pa) || opt2parg_bSet("-max", npargs, pa)) + { + fprintf(stderr, "NOTE: The -min, -max, and -npme options have no effect when a\n" + " fixed number of PME-only nodes is requested with -fix.\n"); + } + } +} + + +/* Returns TRUE when "opt" is needed at launch time */ +static gmx_bool is_launch_file(char *opt, gmx_bool bSet) +{ + if (0 == strncmp(opt, "-swap", 5)) + { + return bSet; + } + + /* Apart from the input .tpr and the output log files we need all options that + * were set on the command line and that do not start with -b */ + if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2) + || 0 == strncmp(opt, "-err", 4) || 0 == strncmp(opt, "-p", 2) ) + { + return FALSE; + } + + return bSet; +} + + +/* Returns TRUE when "opt" defines a file which is needed for the benchmarks runs */ +static gmx_bool is_bench_file(char *opt, gmx_bool bSet, gmx_bool bOptional, gmx_bool bIsOutput) +{ + /* Apart from the input .tpr, all files starting with "-b" are for + * _b_enchmark files exclusively */ + if (0 == strncmp(opt, "-s", 2)) + { + return FALSE; + } + + if (0 == strncmp(opt, "-b", 2) || 0 == strncmp(opt, "-s", 2)) + { + if (!bOptional || bSet) + { + return TRUE; + } + else + { + return FALSE; + } + } + else + { + if (bIsOutput) + { + return FALSE; + } + else + { + if (bSet) /* These are additional input files like -cpi -ei */ + { + return TRUE; + } + else + { + return FALSE; + } + } + } +} + + +/* Adds 'buf' to 'str' */ +static void add_to_string(char **str, char *buf) +{ + int len; + + + len = strlen(*str) + strlen(buf) + 1; + srenew(*str, len); + strcat(*str, buf); +} + + +/* Create the command line for the benchmark as well as for the real run */ +static void create_command_line_snippets( + gmx_bool bAppendFiles, + gmx_bool bKeepAndNumCPT, + gmx_bool bResetHWay, + int presteps, + int nfile, + t_filenm fnm[], + char *cmd_args_bench[], /* command line arguments for benchmark runs */ + char *cmd_args_launch[], /* command line arguments for simulation run */ + char extra_args[]) /* Add this to the end of the command line */ +{ + int i; + char *opt; + const char *name; + char strbuf[STRLEN]; + + + /* strlen needs at least '\0' as a string: */ + snew(*cmd_args_bench, 1); + snew(*cmd_args_launch, 1); + *cmd_args_launch[0] = '\0'; + *cmd_args_bench[0] = '\0'; + + + /*******************************************/ + /* 1. Process other command line arguments */ + /*******************************************/ + if (presteps > 0) + { + /* Add equilibration steps to benchmark options */ + sprintf(strbuf, "-resetstep %d ", presteps); + add_to_string(cmd_args_bench, strbuf); + } + /* These switches take effect only at launch time */ + if (FALSE == bAppendFiles) + { + add_to_string(cmd_args_launch, "-noappend "); + } + if (bKeepAndNumCPT) + { + add_to_string(cmd_args_launch, "-cpnum "); + } + if (bResetHWay) + { + add_to_string(cmd_args_launch, "-resethway "); + } + + /********************/ + /* 2. Process files */ + /********************/ + for (i = 0; i < nfile; i++) + { + opt = (char *)fnm[i].opt; + name = opt2fn(opt, nfile, fnm); + + /* Strbuf contains the options, now let's sort out where we need that */ + sprintf(strbuf, "%s %s ", opt, name); + + if (is_bench_file(opt, opt2bSet(opt, nfile, fnm), is_optional(&fnm[i]), is_output(&fnm[i])) ) + { + /* All options starting with -b* need the 'b' removed, + * therefore overwrite strbuf */ + if (0 == strncmp(opt, "-b", 2)) + { + sprintf(strbuf, "-%s %s ", &opt[2], name); + } + + add_to_string(cmd_args_bench, strbuf); + } + + if (is_launch_file(opt, opt2bSet(opt, nfile, fnm)) ) + { + add_to_string(cmd_args_launch, strbuf); + } + } + + add_to_string(cmd_args_bench, extra_args); + add_to_string(cmd_args_launch, extra_args); +} + + +/* Set option opt */ +static void setopt(const char *opt, int nfile, t_filenm fnm[]) +{ + int i; + + for (i = 0; (i < nfile); i++) + { + if (strcmp(opt, fnm[i].opt) == 0) + { + fnm[i].flag |= ffSET; + } + } +} + + +/* This routine inspects the tpr file and ... + * 1. checks for output files that get triggered by a tpr option. These output + * files are marked as 'set' to allow for a proper cleanup after each + * tuning run. + * 2. returns the PME:PP load ratio + * 3. returns rcoulomb from the tpr */ +static float inspect_tpr(int nfile, t_filenm fnm[], real *rcoulomb) +{ + gmx_bool bPull; /* Is pulling requested in .tpr file? */ + gmx_bool bTpi; /* Is test particle insertion requested? */ + gmx_bool bFree; /* Is a free energy simulation requested? */ + gmx_bool bNM; /* Is a normal mode analysis requested? */ + gmx_bool bSwap; /* Is water/ion position swapping requested? */ + t_inputrec ir; + t_state state; + gmx_mtop_t mtop; + + + /* Check tpr file for options that trigger extra output files */ + read_tpx_state(opt2fn("-s", nfile, fnm), &ir, &state, NULL, &mtop); + bPull = (epullNO != ir.ePull); + bFree = (efepNO != ir.efep ); + bNM = (eiNM == ir.eI ); + bSwap = (eswapNO != ir.eSwapCoords); + bTpi = EI_TPI(ir.eI); + + /* Set these output files on the tuning command-line */ + if (bPull) + { + setopt("-pf", nfile, fnm); + setopt("-px", nfile, fnm); + } + if (bFree) + { + setopt("-dhdl", nfile, fnm); + } + if (bTpi) + { + setopt("-tpi", nfile, fnm); + setopt("-tpid", nfile, fnm); + } + if (bNM) + { + setopt("-mtx", nfile, fnm); + } + if (bSwap) + { + setopt("-swap", nfile, fnm); + } + + *rcoulomb = ir.rcoulomb; + + /* Return the estimate for the number of PME nodes */ + return pme_load_estimate(&mtop, &ir, state.box); +} + + +static void couple_files_options(int nfile, t_filenm fnm[]) +{ + int i; + gmx_bool bSet, bBench; + char *opt; + char buf[20]; + + + for (i = 0; i < nfile; i++) + { + opt = (char *)fnm[i].opt; + bSet = ((fnm[i].flag & ffSET) != 0); + bBench = (0 == strncmp(opt, "-b", 2)); + + /* Check optional files */ + /* If e.g. -eo is set, then -beo also needs to be set */ + if (is_optional(&fnm[i]) && bSet && !bBench) + { + sprintf(buf, "-b%s", &opt[1]); + setopt(buf, nfile, fnm); + } + /* If -beo is set, then -eo also needs to be! */ + if (is_optional(&fnm[i]) && bSet && bBench) + { + sprintf(buf, "-%s", &opt[2]); + setopt(buf, nfile, fnm); + } + } +} + + +#define BENCHSTEPS (1000) + +int gmx_tune_pme(int argc, char *argv[]) +{ + const char *desc[] = { + "For a given number [TT]-np[tt] or [TT]-ntmpi[tt] of processors/threads, [THISMODULE] systematically", + "times [gmx-mdrun] with various numbers of PME-only nodes and determines", + "which setting is fastest. It will also test whether performance can", + "be enhanced by shifting load from the reciprocal to the real space", + "part of the Ewald sum. ", + "Simply pass your [TT].tpr[tt] file to [THISMODULE] together with other options", + "for [gmx-mdrun] as needed.[PAR]", + "Which executables are used can be set in the environment variables", + "MPIRUN and MDRUN. If these are not present, 'mpirun' and 'mdrun'", + "will be used as defaults. Note that for certain MPI frameworks you", + "need to provide a machine- or hostfile. This can also be passed", + "via the MPIRUN variable, e.g.[PAR]", + "[TT]export MPIRUN=\"/usr/local/mpirun -machinefile hosts\"[tt][PAR]", + "Please call [THISMODULE] with the normal options you would pass to", + "[gmx-mdrun] and add [TT]-np[tt] for the number of processors to perform the", + "tests on, or [TT]-ntmpi[tt] for the number of threads. You can also add [TT]-r[tt]", + "to repeat each test several times to get better statistics. [PAR]", + "[THISMODULE] can test various real space / reciprocal space workloads", + "for you. With [TT]-ntpr[tt] you control how many extra [TT].tpr[tt] files will be", + "written with enlarged cutoffs and smaller Fourier grids respectively.", + "Typically, the first test (number 0) will be with the settings from the input", + "[TT].tpr[tt] file; the last test (number [TT]ntpr[tt]) will have the Coulomb cutoff", + "specified by [TT]-rmax[tt] with a somwhat smaller PME grid at the same time. ", + "In this last test, the Fourier spacing is multiplied with [TT]rmax[tt]/rcoulomb. ", + "The remaining [TT].tpr[tt] files will have equally-spaced Coulomb radii (and Fourier " + "spacings) between these extremes. [BB]Note[bb] that you can set [TT]-ntpr[tt] to 1", + "if you just seek the optimal number of PME-only nodes; in that case", + "your input [TT].tpr[tt] file will remain unchanged.[PAR]", + "For the benchmark runs, the default of 1000 time steps should suffice for most", + "MD systems. The dynamic load balancing needs about 100 time steps", + "to adapt to local load imbalances, therefore the time step counters", + "are by default reset after 100 steps. For large systems (>1M atoms), as well as ", + "for a higher accuarcy of the measurements, you should set [TT]-resetstep[tt] to a higher value.", + "From the 'DD' load imbalance entries in the md.log output file you", + "can tell after how many steps the load is sufficiently balanced. Example call:[PAR]" + "[TT]gmx tune_pme -np 64 -s protein.tpr -launch[tt][PAR]", + "After calling [gmx-mdrun] several times, detailed performance information", + "is available in the output file [TT]perf.out[tt].", + "[BB]Note[bb] that during the benchmarks, a couple of temporary files are written", + "(options [TT]-b[tt]*), these will be automatically deleted after each test.[PAR]", + "If you want the simulation to be started automatically with the", + "optimized parameters, use the command line option [TT]-launch[tt].[PAR]", + }; + + int nnodes = 1; + int repeats = 2; + int pmeentries = 0; /* How many values for -npme do we actually test for each tpr file */ + real maxPMEfraction = 0.50; + real minPMEfraction = 0.25; + int maxPMEnodes, minPMEnodes; + float guessPMEratio; /* guessed PME:PP ratio based on the tpr file */ + float guessPMEnodes; + int npme_fixed = -2; /* If >= -1, use only this number + * of PME-only nodes */ + int ntprs = 0; + real rmin = 0.0, rmax = 0.0; /* min and max value for rcoulomb if scaling is requested */ + real rcoulomb = -1.0; /* Coulomb radius as set in .tpr file */ + gmx_bool bScaleRvdw = TRUE; + gmx_int64_t bench_nsteps = BENCHSTEPS; + gmx_int64_t new_sim_nsteps = -1; /* -1 indicates: not set by the user */ + gmx_int64_t cpt_steps = 0; /* Step counter in .cpt input file */ + int presteps = 100; /* Do a full cycle reset after presteps steps */ + gmx_bool bOverwrite = FALSE, bKeepTPR; + gmx_bool bLaunch = FALSE; + char *ExtraArgs = NULL; + char **tpr_names = NULL; + const char *simulation_tpr = NULL; + int best_npme, best_tpr; + int sim_part = 1; /* For benchmarks with checkpoint files */ + char bbuf[STRLEN]; + + /* Default program names if nothing else is found */ + char *cmd_mpirun = NULL, *cmd_mdrun = NULL; + char *cmd_args_bench, *cmd_args_launch; + char *cmd_np = NULL; + + t_perf **perfdata = NULL; + t_inputinfo *info; + int i; + FILE *fp; + t_commrec *cr; + + /* Print out how long the tuning took */ + double seconds; + + static t_filenm fnm[] = { + /* g_tune_pme */ + { efOUT, "-p", "perf", ffWRITE }, + { efLOG, "-err", "bencherr", ffWRITE }, + { efTPX, "-so", "tuned", ffWRITE }, + /* mdrun: */ + { efTPX, NULL, NULL, ffREAD }, + { efTRN, "-o", NULL, ffWRITE }, + { efCOMPRESSED, "-x", NULL, ffOPTWR }, + { efCPT, "-cpi", NULL, ffOPTRD }, + { efCPT, "-cpo", NULL, ffOPTWR }, + { efSTO, "-c", "confout", ffWRITE }, + { efEDR, "-e", "ener", ffWRITE }, + { efLOG, "-g", "md", ffWRITE }, + { efXVG, "-dhdl", "dhdl", ffOPTWR }, + { efXVG, "-field", "field", ffOPTWR }, + { efXVG, "-table", "table", ffOPTRD }, + { efXVG, "-tabletf", "tabletf", ffOPTRD }, + { efXVG, "-tablep", "tablep", ffOPTRD }, + { efXVG, "-tableb", "table", ffOPTRD }, + { efTRX, "-rerun", "rerun", ffOPTRD }, + { efXVG, "-tpi", "tpi", ffOPTWR }, + { efXVG, "-tpid", "tpidist", ffOPTWR }, + { efEDI, "-ei", "sam", ffOPTRD }, + { efXVG, "-eo", "edsam", ffOPTWR }, + { efXVG, "-devout", "deviatie", ffOPTWR }, + { efXVG, "-runav", "runaver", ffOPTWR }, + { efXVG, "-px", "pullx", ffOPTWR }, + { efXVG, "-pf", "pullf", ffOPTWR }, + { efXVG, "-ro", "rotation", ffOPTWR }, + { efLOG, "-ra", "rotangles", ffOPTWR }, + { efLOG, "-rs", "rotslabs", ffOPTWR }, + { efLOG, "-rt", "rottorque", ffOPTWR }, + { efMTX, "-mtx", "nm", ffOPTWR }, + { efNDX, "-dn", "dipole", ffOPTWR }, + { efXVG, "-swap", "swapions", ffOPTWR }, + /* Output files that are deleted after each benchmark run */ + { efTRN, "-bo", "bench", ffWRITE }, + { efXTC, "-bx", "bench", ffWRITE }, + { efCPT, "-bcpo", "bench", ffWRITE }, + { efSTO, "-bc", "bench", ffWRITE }, + { efEDR, "-be", "bench", ffWRITE }, + { efLOG, "-bg", "bench", ffWRITE }, + { efXVG, "-beo", "benchedo", ffOPTWR }, + { efXVG, "-bdhdl", "benchdhdl", ffOPTWR }, + { efXVG, "-bfield", "benchfld", ffOPTWR }, + { efXVG, "-btpi", "benchtpi", ffOPTWR }, + { efXVG, "-btpid", "benchtpid", ffOPTWR }, + { efXVG, "-bdevout", "benchdev", ffOPTWR }, + { efXVG, "-brunav", "benchrnav", ffOPTWR }, + { efXVG, "-bpx", "benchpx", ffOPTWR }, + { efXVG, "-bpf", "benchpf", ffOPTWR }, + { efXVG, "-bro", "benchrot", ffOPTWR }, + { efLOG, "-bra", "benchrota", ffOPTWR }, + { efLOG, "-brs", "benchrots", ffOPTWR }, + { efLOG, "-brt", "benchrott", ffOPTWR }, + { efMTX, "-bmtx", "benchn", ffOPTWR }, + { efNDX, "-bdn", "bench", ffOPTWR }, + { efXVG, "-bswap", "benchswp", ffOPTWR } + }; + + gmx_bool bThreads = FALSE; + + int nthreads = 1; + + const char *procstring[] = + { NULL, "-np", "-n", "none", NULL }; + const char *npmevalues_opt[] = + { NULL, "auto", "all", "subset", NULL }; + + gmx_bool bAppendFiles = TRUE; + gmx_bool bKeepAndNumCPT = FALSE; + gmx_bool bResetCountersHalfWay = FALSE; + gmx_bool bBenchmark = TRUE; + + output_env_t oenv = NULL; + + t_pargs pa[] = { + /***********************/ + /* g_tune_pme options: */ + /***********************/ + { "-np", FALSE, etINT, {&nnodes}, + "Number of nodes to run the tests on (must be > 2 for separate PME nodes)" }, + { "-npstring", FALSE, etENUM, {procstring}, + "Specify the number of processors to [TT]$MPIRUN[tt] using this string"}, + { "-ntmpi", FALSE, etINT, {&nthreads}, + "Number of MPI-threads to run the tests on (turns MPI & mpirun off)"}, + { "-r", FALSE, etINT, {&repeats}, + "Repeat each test this often" }, + { "-max", FALSE, etREAL, {&maxPMEfraction}, + "Max fraction of PME nodes to test with" }, + { "-min", FALSE, etREAL, {&minPMEfraction}, + "Min fraction of PME nodes to test with" }, + { "-npme", FALSE, etENUM, {npmevalues_opt}, + "Within -min and -max, benchmark all possible values for [TT]-npme[tt], or just a reasonable subset. " + "Auto neglects -min and -max and chooses reasonable values around a guess for npme derived from the .tpr"}, + { "-fix", FALSE, etINT, {&npme_fixed}, + "If >= -1, do not vary the number of PME-only nodes, instead use this fixed value and only vary rcoulomb and the PME grid spacing."}, + { "-rmax", FALSE, etREAL, {&rmax}, + "If >0, maximal rcoulomb for -ntpr>1 (rcoulomb upscaling results in fourier grid downscaling)" }, + { "-rmin", FALSE, etREAL, {&rmin}, + "If >0, minimal rcoulomb for -ntpr>1" }, + { "-scalevdw", FALSE, etBOOL, {&bScaleRvdw}, + "Scale rvdw along with rcoulomb"}, + { "-ntpr", FALSE, etINT, {&ntprs}, + "Number of [TT].tpr[tt] files to benchmark. Create this many files with different rcoulomb scaling factors depending on -rmin and -rmax. " + "If < 1, automatically choose the number of [TT].tpr[tt] files to test" }, + { "-steps", FALSE, etINT64, {&bench_nsteps}, + "Take timings for this many steps in the benchmark runs" }, + { "-resetstep", FALSE, etINT, {&presteps}, + "Let dlb equilibrate this many steps before timings are taken (reset cycle counters after this many steps)" }, + { "-simsteps", FALSE, etINT64, {&new_sim_nsteps}, + "If non-negative, perform this many steps in the real run (overwrites nsteps from [TT].tpr[tt], add [TT].cpt[tt] steps)" }, + { "-launch", FALSE, etBOOL, {&bLaunch}, + "Launch the real simulation after optimization" }, + { "-bench", FALSE, etBOOL, {&bBenchmark}, + "Run the benchmarks or just create the input [TT].tpr[tt] files?" }, + /******************/ + /* mdrun options: */ + /******************/ + /* We let g_tune_pme parse and understand these options, because we need to + * prevent that they appear on the mdrun command line for the benchmarks */ + { "-append", FALSE, etBOOL, {&bAppendFiles}, + "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names (for launch only)" }, + { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT}, + "Keep and number checkpoint files (launch only)" }, + { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay}, + "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt] (launch only)" } + }; + +#define NFILE asize(fnm) + + seconds = gmx_gettime(); + + if (!parse_common_args(&argc, argv, PCA_NOEXIT_ON_ARGS, + NFILE, fnm, asize(pa), pa, asize(desc), desc, + 0, NULL, &oenv)) + { + return 0; + } + + /* Store the remaining unparsed command line entries in a string which + * is then attached to the mdrun command line */ + snew(ExtraArgs, 1); + ExtraArgs[0] = '\0'; + for (i = 1; i < argc; i++) /* argc will now be 1 if everything was understood */ + { + add_to_string(&ExtraArgs, argv[i]); + add_to_string(&ExtraArgs, " "); + } + + if (opt2parg_bSet("-ntmpi", asize(pa), pa)) + { + bThreads = TRUE; + if (opt2parg_bSet("-npstring", asize(pa), pa)) + { + fprintf(stderr, "WARNING: -npstring has no effect when using threads.\n"); + } + + if (nnodes > 1) + { + gmx_fatal(FARGS, "Can't run multi-threaded MPI simulation yet!"); + } + /* and now we just set this; a bit of an ugly hack*/ + nnodes = nthreads; + } + /* Check for PME:PP ratio and whether tpr triggers additional output files */ + guessPMEratio = inspect_tpr(NFILE, fnm, &rcoulomb); + + /* Automatically set -beo options if -eo is set etc. */ + couple_files_options(NFILE, fnm); + + /* Construct the command line arguments for benchmark runs + * as well as for the simulation run */ + if (bThreads) + { + sprintf(bbuf, " -ntmpi %d ", nthreads); + } + else + { + /* This string will be used for MPI runs and will appear after the + * mpirun command. */ + if (strcmp(procstring[0], "none") != 0) + { + sprintf(bbuf, " %s %d ", procstring[0], nnodes); + } + else + { + sprintf(bbuf, " "); + } + } + + cmd_np = bbuf; + + create_command_line_snippets(bAppendFiles, bKeepAndNumCPT, bResetCountersHalfWay, presteps, + NFILE, fnm, &cmd_args_bench, &cmd_args_launch, ExtraArgs); + + /* Read in checkpoint file if requested */ + sim_part = 1; + if (opt2bSet("-cpi", NFILE, fnm)) + { + snew(cr, 1); + cr->duty = DUTY_PP; /* makes the following routine happy */ + read_checkpoint_simulation_part(opt2fn("-cpi", NFILE, fnm), + &sim_part, &cpt_steps, cr, + FALSE, NFILE, fnm, NULL, NULL); + sfree(cr); + sim_part++; + /* sim_part will now be 1 if no checkpoint file was found */ + if (sim_part <= 1) + { + gmx_fatal(FARGS, "Checkpoint file %s not found!", opt2fn("-cpi", NFILE, fnm)); + } + } + + /* Open performance output file and write header info */ + fp = gmx_ffopen(opt2fn("-p", NFILE, fnm), "w"); + + /* Make a quick consistency check of command line parameters */ + check_input(nnodes, repeats, &ntprs, &rmin, rcoulomb, &rmax, + maxPMEfraction, minPMEfraction, npme_fixed, + bench_nsteps, fnm, NFILE, sim_part, presteps, + asize(pa), pa); + + /* Determine the maximum and minimum number of PME nodes to test, + * the actual list of settings is build in do_the_tests(). */ + if ((nnodes > 2) && (npme_fixed < -1)) + { + if (0 == strcmp(npmevalues_opt[0], "auto")) + { + /* Determine the npme range automatically based on the PME:PP load guess */ + if (guessPMEratio > 1.0) + { + /* More PME than PP work, probably we do not need separate PME nodes at all! */ + maxPMEnodes = nnodes/2; + minPMEnodes = nnodes/2; + } + else + { + /* PME : PP load is in the range 0..1, let's test around the guess */ + guessPMEnodes = nnodes/(1.0 + 1.0/guessPMEratio); + minPMEnodes = floor(0.7*guessPMEnodes); + maxPMEnodes = ceil(1.6*guessPMEnodes); + maxPMEnodes = min(maxPMEnodes, nnodes/2); + } + } + else + { + /* Determine the npme range based on user input */ + maxPMEnodes = floor(maxPMEfraction*nnodes); + minPMEnodes = max(floor(minPMEfraction*nnodes), 0); + fprintf(stdout, "Will try runs with %d ", minPMEnodes); + if (maxPMEnodes != minPMEnodes) + { + fprintf(stdout, "- %d ", maxPMEnodes); + } + fprintf(stdout, "PME-only nodes.\n Note that the automatic number of PME-only nodes and no separate PME nodes are always tested.\n"); + } + } + else + { + maxPMEnodes = 0; + minPMEnodes = 0; + } + + /* Get the commands we need to set up the runs from environment variables */ + get_program_paths(bThreads, &cmd_mpirun, &cmd_mdrun); + if (bBenchmark && repeats > 0) + { + check_mdrun_works(bThreads, cmd_mpirun, cmd_np, cmd_mdrun); + } + + /* Print some header info to file */ + sep_line(fp); + fprintf(fp, "\n P E R F O R M A N C E R E S U L T S\n"); + sep_line(fp); + fprintf(fp, "%s for Gromacs %s\n", ShortProgram(), GromacsVersion()); + if (!bThreads) + { + fprintf(fp, "Number of nodes : %d\n", nnodes); + fprintf(fp, "The mpirun command is : %s\n", cmd_mpirun); + if (strcmp(procstring[0], "none") != 0) + { + fprintf(fp, "Passing # of nodes via : %s\n", procstring[0]); + } + else + { + fprintf(fp, "Not setting number of nodes in system call\n"); + } + } + else + { + fprintf(fp, "Number of threads : %d\n", nnodes); + } + + fprintf(fp, "The mdrun command is : %s\n", cmd_mdrun); + fprintf(fp, "mdrun args benchmarks : %s\n", cmd_args_bench); + fprintf(fp, "Benchmark steps : "); + fprintf(fp, "%"GMX_PRId64, bench_nsteps); + fprintf(fp, "\n"); + fprintf(fp, "dlb equilibration steps : %d\n", presteps); + if (sim_part > 1) + { + fprintf(fp, "Checkpoint time step : "); + fprintf(fp, "%"GMX_PRId64, cpt_steps); + fprintf(fp, "\n"); + } + fprintf(fp, "mdrun args at launchtime: %s\n", cmd_args_launch); + + if (new_sim_nsteps >= 0) + { + bOverwrite = TRUE; + fprintf(stderr, "Note: Simulation input file %s will have ", opt2fn("-so", NFILE, fnm)); + fprintf(stderr, "%"GMX_PRId64, new_sim_nsteps+cpt_steps); + fprintf(stderr, " steps.\n"); + fprintf(fp, "Simulation steps : "); + fprintf(fp, "%"GMX_PRId64, new_sim_nsteps); + fprintf(fp, "\n"); + } + if (repeats > 1) + { + fprintf(fp, "Repeats for each test : %d\n", repeats); + } + + if (npme_fixed >= -1) + { + fprintf(fp, "Fixing -npme at : %d\n", npme_fixed); + } + + fprintf(fp, "Input file : %s\n", opt2fn("-s", NFILE, fnm)); + fprintf(fp, " PME/PP load estimate : %g\n", guessPMEratio); + + /* Allocate memory for the inputinfo struct: */ + snew(info, 1); + info->nr_inputfiles = ntprs; + for (i = 0; i < ntprs; i++) + { + snew(info->rcoulomb, ntprs); + snew(info->rvdw, ntprs); + snew(info->rlist, ntprs); + snew(info->rlistlong, ntprs); + snew(info->nkx, ntprs); + snew(info->nky, ntprs); + snew(info->nkz, ntprs); + snew(info->fsx, ntprs); + snew(info->fsy, ntprs); + snew(info->fsz, ntprs); + } + /* Make alternative tpr files to test: */ + snew(tpr_names, ntprs); + for (i = 0; i < ntprs; i++) + { + snew(tpr_names[i], STRLEN); + } + + /* It can be that ntprs is reduced by make_benchmark_tprs if not enough + * different grids could be found. */ + make_benchmark_tprs(opt2fn("-s", NFILE, fnm), tpr_names, bench_nsteps+presteps, + cpt_steps, rmin, rmax, bScaleRvdw, &ntprs, info, fp); + + /********************************************************************************/ + /* Main loop over all scenarios we need to test: tpr files, PME nodes, repeats */ + /********************************************************************************/ + snew(perfdata, ntprs); + if (bBenchmark) + { + do_the_tests(fp, tpr_names, maxPMEnodes, minPMEnodes, npme_fixed, npmevalues_opt[0], perfdata, &pmeentries, + repeats, nnodes, ntprs, bThreads, cmd_mpirun, cmd_np, cmd_mdrun, + cmd_args_bench, fnm, NFILE, presteps, cpt_steps); + + fprintf(fp, "\nTuning took%8.1f minutes.\n", (gmx_gettime()-seconds)/60.0); + + /* Analyse the results and give a suggestion for optimal settings: */ + bKeepTPR = analyze_data(fp, opt2fn("-p", NFILE, fnm), perfdata, nnodes, ntprs, pmeentries, + repeats, info, &best_tpr, &best_npme); + + /* Take the best-performing tpr file and enlarge nsteps to original value */ + if (bKeepTPR && !bOverwrite) + { + simulation_tpr = opt2fn("-s", NFILE, fnm); + } + else + { + simulation_tpr = opt2fn("-so", NFILE, fnm); + modify_PMEsettings(bOverwrite ? (new_sim_nsteps+cpt_steps) : info->orig_sim_steps, + info->orig_init_step, tpr_names[best_tpr], simulation_tpr); + } + + /* Let's get rid of the temporary benchmark input files */ + for (i = 0; i < ntprs; i++) + { + fprintf(stdout, "Deleting temporary benchmark input file %s\n", tpr_names[i]); + remove(tpr_names[i]); + } + + /* Now start the real simulation if the user requested it ... */ + launch_simulation(bLaunch, fp, bThreads, cmd_mpirun, cmd_np, cmd_mdrun, + cmd_args_launch, simulation_tpr, best_npme); + } + gmx_ffclose(fp); + + /* ... or simply print the performance results to screen: */ + if (!bLaunch) + { + finalize(opt2fn("-p", NFILE, fnm)); + } + + return 0; +} diff --cc src/gromacs/pulling/pull_rotation.c index 0590801eb5,0000000000..6f2d45accb mode 100644,000000..100644 --- a/src/gromacs/pulling/pull_rotation.c +++ b/src/gromacs/pulling/pull_rotation.c @@@ -1,4095 -1,0 +1,4096 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2008, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "domdec.h" +#include "smalloc.h" +#include "network.h" +#include "pbc.h" +#include "mdrun.h" +#include "txtdump.h" +#include "names.h" +#include "mtop_util.h" +#include "names.h" +#include "nrjac.h" +#include "vec.h" +#include "gmx_ga2la.h" +#include "xvgr.h" +#include "copyrite.h" +#include "macros.h" + +#include "gromacs/fileio/futil.h" +#include "gromacs/fileio/gmxfio.h" +#include "gromacs/fileio/trnio.h" +#include "gromacs/timing/cyclecounter.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/qsort_threadsafe.h" +#include "gromacs/pulling/pull_rotation.h" +#include "gromacs/mdlib/groupcoord.h" ++#include "gromacs/math/utilities.h" + +static char *RotStr = {"Enforced rotation:"}; + +/* Set the minimum weight for the determination of the slab centers */ +#define WEIGHT_MIN (10*GMX_FLOAT_MIN) + +/* Helper structure for sorting positions along rotation vector */ +typedef struct { + real xcproj; /* Projection of xc on the rotation vector */ + int ind; /* Index of xc */ + real m; /* Mass */ + rvec x; /* Position */ + rvec x_ref; /* Reference position */ +} sort_along_vec_t; + + +/* Enforced rotation / flexible: determine the angle of each slab */ +typedef struct gmx_slabdata +{ + int nat; /* Number of atoms belonging to this slab */ + rvec *x; /* The positions belonging to this slab. In + general, this should be all positions of the + whole rotation group, but we leave those away + that have a small enough weight */ + rvec *ref; /* Same for reference */ + real *weight; /* The weight for each atom */ +} t_gmx_slabdata; + + +/* Helper structure for potential fitting */ +typedef struct gmx_potfit +{ + real *degangle; /* Set of angles for which the potential is + calculated. The optimum fit is determined as + the angle for with the potential is minimal */ + real *V; /* Potential for the different angles */ + matrix *rotmat; /* Rotation matrix corresponding to the angles */ +} t_gmx_potfit; + + +/* Enforced rotation data for all groups */ +typedef struct gmx_enfrot +{ + FILE *out_rot; /* Output file for rotation data */ + FILE *out_torque; /* Output file for torque data */ + FILE *out_angles; /* Output file for slab angles for flexible type */ + FILE *out_slabs; /* Output file for slab centers */ + int bufsize; /* Allocation size of buf */ + rvec *xbuf; /* Coordinate buffer variable for sorting */ + real *mbuf; /* Masses buffer variable for sorting */ + sort_along_vec_t *data; /* Buffer variable needed for position sorting */ + real *mpi_inbuf; /* MPI buffer */ + real *mpi_outbuf; /* MPI buffer */ + int mpi_bufsize; /* Allocation size of in & outbuf */ + unsigned long Flags; /* mdrun flags */ + gmx_bool bOut; /* Used to skip first output when appending to + * avoid duplicate entries in rotation outfiles */ +} t_gmx_enfrot; + + +/* Global enforced rotation data for a single rotation group */ +typedef struct gmx_enfrotgrp +{ + real degangle; /* Rotation angle in degrees */ + matrix rotmat; /* Rotation matrix */ + atom_id *ind_loc; /* Local rotation indices */ + int nat_loc; /* Number of local group atoms */ + int nalloc_loc; /* Allocation size for ind_loc and weight_loc */ + + real V; /* Rotation potential for this rotation group */ + rvec *f_rot_loc; /* Array to store the forces on the local atoms + resulting from enforced rotation potential */ + + /* Collective coordinates for the whole rotation group */ + real *xc_ref_length; /* Length of each x_rotref vector after x_rotref + has been put into origin */ + int *xc_ref_ind; /* Position of each local atom in the collective + array */ + rvec xc_center; /* Center of the rotation group positions, may + be mass weighted */ + rvec xc_ref_center; /* dito, for the reference positions */ + rvec *xc; /* Current (collective) positions */ + ivec *xc_shifts; /* Current (collective) shifts */ + ivec *xc_eshifts; /* Extra shifts since last DD step */ + rvec *xc_old; /* Old (collective) positions */ + rvec *xc_norm; /* Normalized form of the current positions */ + rvec *xc_ref_sorted; /* Reference positions (sorted in the same order + as xc when sorted) */ + int *xc_sortind; /* Where is a position found after sorting? */ + real *mc; /* Collective masses */ + real *mc_sorted; + real invmass; /* one over the total mass of the rotation group */ + + real torque_v; /* Torque in the direction of rotation vector */ + real angle_v; /* Actual angle of the whole rotation group */ + /* Fixed rotation only */ + real weight_v; /* Weights for angle determination */ + rvec *xr_loc; /* Local reference coords, correctly rotated */ + rvec *x_loc_pbc; /* Local current coords, correct PBC image */ + real *m_loc; /* Masses of the current local atoms */ + + /* Flexible rotation only */ + int nslabs_alloc; /* For this many slabs memory is allocated */ + int slab_first; /* Lowermost slab for that the calculation needs + to be performed at a given time step */ + int slab_last; /* Uppermost slab ... */ + int slab_first_ref; /* First slab for which ref. center is stored */ + int slab_last_ref; /* Last ... */ + int slab_buffer; /* Slab buffer region around reference slabs */ + int *firstatom; /* First relevant atom for a slab */ + int *lastatom; /* Last relevant atom for a slab */ + rvec *slab_center; /* Gaussian-weighted slab center */ + rvec *slab_center_ref; /* Gaussian-weighted slab center for the + reference positions */ + real *slab_weights; /* Sum of gaussian weights in a slab */ + real *slab_torque_v; /* Torque T = r x f for each slab. */ + /* torque_v = m.v = angular momentum in the + direction of v */ + real max_beta; /* min_gaussian from inputrec->rotgrp is the + minimum value the gaussian must have so that + the force is actually evaluated max_beta is + just another way to put it */ + real *gn_atom; /* Precalculated gaussians for a single atom */ + int *gn_slabind; /* Tells to which slab each precalculated gaussian + belongs */ + rvec *slab_innersumvec; /* Inner sum of the flexible2 potential per slab; + this is precalculated for optimization reasons */ + t_gmx_slabdata *slab_data; /* Holds atom positions and gaussian weights + of atoms belonging to a slab */ + + /* For potential fits with varying angle: */ + t_gmx_potfit *PotAngleFit; /* Used for fit type 'potential' */ +} t_gmx_enfrotgrp; + + +/* Activate output of forces for correctness checks */ +/* #define PRINT_FORCES */ +#ifdef PRINT_FORCES +#define PRINT_FORCE_J fprintf(stderr, "f%d = %15.8f %15.8f %15.8f\n", erg->xc_ref_ind[j], erg->f_rot_loc[j][XX], erg->f_rot_loc[j][YY], erg->f_rot_loc[j][ZZ]); +#define PRINT_POT_TAU if (MASTER(cr)) { \ + fprintf(stderr, "potential = %15.8f\n" "torque = %15.8f\n", erg->V, erg->torque_v); \ +} +#else +#define PRINT_FORCE_J +#define PRINT_POT_TAU +#endif + +/* Shortcuts for often used queries */ +#define ISFLEX(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) ) +#define ISCOLL(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) || (rg->eType == erotgRMPF) || (rg->eType == erotgRM2PF) ) + + +/* Does any of the rotation groups use slab decomposition? */ +static gmx_bool HaveFlexibleGroups(t_rot *rot) +{ + int g; + t_rotgrp *rotg; + + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + return TRUE; + } + } + + return FALSE; +} + + +/* Is for any group the fit angle determined by finding the minimum of the + * rotation potential? */ +static gmx_bool HavePotFitGroups(t_rot *rot) +{ + int g; + t_rotgrp *rotg; + + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (erotgFitPOT == rotg->eFittype) + { + return TRUE; + } + } + + return FALSE; +} + + +static double** allocate_square_matrix(int dim) +{ + int i; + double** mat = NULL; + + + snew(mat, dim); + for (i = 0; i < dim; i++) + { + snew(mat[i], dim); + } + + return mat; +} + + +static void free_square_matrix(double** mat, int dim) +{ + int i; + + + for (i = 0; i < dim; i++) + { + sfree(mat[i]); + } + sfree(mat); +} + + +/* Return the angle for which the potential is minimal */ +static real get_fitangle(t_rotgrp *rotg, gmx_enfrotgrp_t erg) +{ + int i; + real fitangle = -999.9; + real pot_min = GMX_FLOAT_MAX; + t_gmx_potfit *fit; + + + fit = erg->PotAngleFit; + + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + if (fit->V[i] < pot_min) + { + pot_min = fit->V[i]; + fitangle = fit->degangle[i]; + } + } + + return fitangle; +} + + +/* Reduce potential angle fit data for this group at this time step? */ +static gmx_inline gmx_bool bPotAngle(t_rot *rot, t_rotgrp *rotg, gmx_int64_t step) +{ + return ( (erotgFitPOT == rotg->eFittype) && (do_per_step(step, rot->nstsout) || do_per_step(step, rot->nstrout)) ); +} + +/* Reduce slab torqe data for this group at this time step? */ +static gmx_inline gmx_bool bSlabTau(t_rot *rot, t_rotgrp *rotg, gmx_int64_t step) +{ + return ( (ISFLEX(rotg)) && do_per_step(step, rot->nstsout) ); +} + +/* Output rotation energy, torques, etc. for each rotation group */ +static void reduce_output(t_commrec *cr, t_rot *rot, real t, gmx_int64_t step) +{ + int g, i, islab, nslabs = 0; + int count; /* MPI element counter */ + t_rotgrp *rotg; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real fitangle; + gmx_bool bFlex; + + + er = rot->enfrot; + + /* Fill the MPI buffer with stuff to reduce. If items are added for reduction + * here, the MPI buffer size has to be enlarged also in calc_mpi_bufsize() */ + if (PAR(cr)) + { + count = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + nslabs = erg->slab_last - erg->slab_first + 1; + er->mpi_inbuf[count++] = erg->V; + er->mpi_inbuf[count++] = erg->torque_v; + er->mpi_inbuf[count++] = erg->angle_v; + er->mpi_inbuf[count++] = erg->weight_v; /* weights are not needed for flex types, but this is just a single value */ + + if (bPotAngle(rot, rotg, step)) + { + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + er->mpi_inbuf[count++] = erg->PotAngleFit->V[i]; + } + } + if (bSlabTau(rot, rotg, step)) + { + for (i = 0; i < nslabs; i++) + { + er->mpi_inbuf[count++] = erg->slab_torque_v[i]; + } + } + } + if (count > er->mpi_bufsize) + { + gmx_fatal(FARGS, "%s MPI buffer overflow, please report this error.", RotStr); + } + +#ifdef GMX_MPI + MPI_Reduce(er->mpi_inbuf, er->mpi_outbuf, count, GMX_MPI_REAL, MPI_SUM, MASTERRANK(cr), cr->mpi_comm_mygroup); +#endif + + /* Copy back the reduced data from the buffer on the master */ + if (MASTER(cr)) + { + count = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + nslabs = erg->slab_last - erg->slab_first + 1; + erg->V = er->mpi_outbuf[count++]; + erg->torque_v = er->mpi_outbuf[count++]; + erg->angle_v = er->mpi_outbuf[count++]; + erg->weight_v = er->mpi_outbuf[count++]; + + if (bPotAngle(rot, rotg, step)) + { + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + erg->PotAngleFit->V[i] = er->mpi_outbuf[count++]; + } + } + if (bSlabTau(rot, rotg, step)) + { + for (i = 0; i < nslabs; i++) + { + erg->slab_torque_v[i] = er->mpi_outbuf[count++]; + } + } + } + } + } + + /* Output */ + if (MASTER(cr)) + { + /* Angle and torque for each rotation group */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + bFlex = ISFLEX(rotg); + + erg = rotg->enfrotgrp; + + /* Output to main rotation output file: */ + if (do_per_step(step, rot->nstrout) ) + { + if (erotgFitPOT == rotg->eFittype) + { + fitangle = get_fitangle(rotg, erg); + } + else + { + if (bFlex) + { + fitangle = erg->angle_v; /* RMSD fit angle */ + } + else + { + fitangle = (erg->angle_v/erg->weight_v)*180.0*M_1_PI; + } + } + fprintf(er->out_rot, "%12.4f", fitangle); + fprintf(er->out_rot, "%12.3e", erg->torque_v); + fprintf(er->out_rot, "%12.3e", erg->V); + } + + if (do_per_step(step, rot->nstsout) ) + { + /* Output to torque log file: */ + if (bFlex) + { + fprintf(er->out_torque, "%12.3e%6d", t, g); + for (i = erg->slab_first; i <= erg->slab_last; i++) + { + islab = i - erg->slab_first; /* slab index */ + /* Only output if enough weight is in slab */ + if (erg->slab_weights[islab] > rotg->min_gaussian) + { + fprintf(er->out_torque, "%6d%12.3e", i, erg->slab_torque_v[islab]); + } + } + fprintf(er->out_torque, "\n"); + } + + /* Output to angles log file: */ + if (erotgFitPOT == rotg->eFittype) + { + fprintf(er->out_angles, "%12.3e%6d%12.4f", t, g, erg->degangle); + /* Output energies at a set of angles around the reference angle */ + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + fprintf(er->out_angles, "%12.3e", erg->PotAngleFit->V[i]); + } + fprintf(er->out_angles, "\n"); + } + } + } + if (do_per_step(step, rot->nstrout) ) + { + fprintf(er->out_rot, "\n"); + } + } +} + + +/* Add the forces from enforced rotation potential to the local forces. + * Should be called after the SR forces have been evaluated */ +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_int64_t step, real t) +{ + int g, l, ii; + t_rotgrp *rotg; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real Vrot = 0.0; /* If more than one rotation group is present, Vrot + assembles the local parts from all groups */ + + + er = rot->enfrot; + + /* Loop over enforced rotation groups (usually 1, though) + * Apply the forces from rotation potentials */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + Vrot += erg->V; /* add the local parts from the nodes */ + for (l = 0; l < erg->nat_loc; l++) + { + /* Get the right index of the local force */ + ii = erg->ind_loc[l]; + /* Add */ + rvec_inc(f[ii], erg->f_rot_loc[l]); + } + } + + /* Reduce energy,torque, angles etc. to get the sum values (per rotation group) + * on the master and output these values to file. */ + if ( (do_per_step(step, rot->nstrout) || do_per_step(step, rot->nstsout)) && er->bOut) + { + reduce_output(cr, rot, t, step); + } + + /* When appending, er->bOut is FALSE the first time to avoid duplicate entries */ + er->bOut = TRUE; + + PRINT_POT_TAU + + return Vrot; +} + + +/* The Gaussian norm is chosen such that the sum of the gaussian functions + * over the slabs is approximately 1.0 everywhere */ +#define GAUSS_NORM 0.569917543430618 + + +/* Calculate the maximum beta that leads to a gaussian larger min_gaussian, + * also does some checks + */ +static double calc_beta_max(real min_gaussian, real slab_dist) +{ + double sigma; + double arg; + + + /* Actually the next two checks are already made in grompp */ + if (slab_dist <= 0) + { + gmx_fatal(FARGS, "Slab distance of flexible rotation groups must be >=0 !"); + } + if (min_gaussian <= 0) + { + gmx_fatal(FARGS, "Cutoff value for Gaussian must be > 0. (You requested %f)"); + } + + /* Define the sigma value */ + sigma = 0.7*slab_dist; + + /* Calculate the argument for the logarithm and check that the log() result is negative or 0 */ + arg = min_gaussian/GAUSS_NORM; + if (arg > 1.0) + { + gmx_fatal(FARGS, "min_gaussian of flexible rotation groups must be <%g", GAUSS_NORM); + } + + return sqrt(-2.0*sigma*sigma*log(min_gaussian/GAUSS_NORM)); +} + + +static gmx_inline real calc_beta(rvec curr_x, t_rotgrp *rotg, int n) +{ + return iprod(curr_x, rotg->vec) - rotg->slab_dist * n; +} + + +static gmx_inline real gaussian_weight(rvec curr_x, t_rotgrp *rotg, int n) +{ + const real norm = GAUSS_NORM; + real sigma; + + + /* Define the sigma value */ + sigma = 0.7*rotg->slab_dist; + /* Calculate the Gaussian value of slab n for position curr_x */ + return norm * exp( -0.5 * sqr( calc_beta(curr_x, rotg, n)/sigma ) ); +} + + +/* Returns the weight in a single slab, also calculates the Gaussian- and mass- + * weighted sum of positions for that slab */ +static real get_slab_weight(int j, t_rotgrp *rotg, rvec xc[], real mc[], rvec *x_weighted_sum) +{ + rvec curr_x; /* The position of an atom */ + rvec curr_x_weighted; /* The gaussian-weighted position */ + real gaussian; /* A single gaussian weight */ + real wgauss; /* gaussian times current mass */ + real slabweight = 0.0; /* The sum of weights in the slab */ + int i, islab; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + clear_rvec(*x_weighted_sum); + + /* Slab index */ + islab = j - erg->slab_first; + + /* Loop over all atoms in the rotation group */ + for (i = 0; i < rotg->nat; i++) + { + copy_rvec(xc[i], curr_x); + gaussian = gaussian_weight(curr_x, rotg, j); + wgauss = gaussian * mc[i]; + svmul(wgauss, curr_x, curr_x_weighted); + rvec_add(*x_weighted_sum, curr_x_weighted, *x_weighted_sum); + slabweight += wgauss; + } /* END of loop over rotation group atoms */ + + return slabweight; +} + + +static void get_slab_centers( + t_rotgrp *rotg, /* The rotation group information */ + rvec *xc, /* The rotation group positions; will + typically be enfrotgrp->xc, but at first call + it is enfrotgrp->xc_ref */ + real *mc, /* The masses of the rotation group atoms */ + int g, /* The number of the rotation group */ + real time, /* Used for output only */ + FILE *out_slabs, /* For outputting center per slab information */ + gmx_bool bOutStep, /* Is this an output step? */ + gmx_bool bReference) /* If this routine is called from + init_rot_group we need to store + the reference slab centers */ +{ + int j, islab; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Loop over slabs */ + for (j = erg->slab_first; j <= erg->slab_last; j++) + { + islab = j - erg->slab_first; + erg->slab_weights[islab] = get_slab_weight(j, rotg, xc, mc, &erg->slab_center[islab]); + + /* We can do the calculations ONLY if there is weight in the slab! */ + if (erg->slab_weights[islab] > WEIGHT_MIN) + { + svmul(1.0/erg->slab_weights[islab], erg->slab_center[islab], erg->slab_center[islab]); + } + else + { + /* We need to check this here, since we divide through slab_weights + * in the flexible low-level routines! */ + gmx_fatal(FARGS, "Not enough weight in slab %d. Slab center cannot be determined!", j); + } + + /* At first time step: save the centers of the reference structure */ + if (bReference) + { + copy_rvec(erg->slab_center[islab], erg->slab_center_ref[islab]); + } + } /* END of loop over slabs */ + + /* Output on the master */ + if ( (NULL != out_slabs) && bOutStep) + { + fprintf(out_slabs, "%12.3e%6d", time, g); + for (j = erg->slab_first; j <= erg->slab_last; j++) + { + islab = j - erg->slab_first; + fprintf(out_slabs, "%6d%12.3e%12.3e%12.3e", + j, erg->slab_center[islab][XX], erg->slab_center[islab][YY], erg->slab_center[islab][ZZ]); + } + fprintf(out_slabs, "\n"); + } +} + + +static void calc_rotmat( + rvec vec, + real degangle, /* Angle alpha of rotation at time t in degrees */ + matrix rotmat) /* Rotation matrix */ +{ + real radangle; /* Rotation angle in radians */ + real cosa; /* cosine alpha */ + real sina; /* sine alpha */ + real OMcosa; /* 1 - cos(alpha) */ + real dumxy, dumxz, dumyz; /* save computations */ + rvec rot_vec; /* Rotate around rot_vec ... */ + + + radangle = degangle * M_PI/180.0; + copy_rvec(vec, rot_vec ); + + /* Precompute some variables: */ + cosa = cos(radangle); + sina = sin(radangle); + OMcosa = 1.0 - cosa; + dumxy = rot_vec[XX]*rot_vec[YY]*OMcosa; + dumxz = rot_vec[XX]*rot_vec[ZZ]*OMcosa; + dumyz = rot_vec[YY]*rot_vec[ZZ]*OMcosa; + + /* Construct the rotation matrix for this rotation group: */ + /* 1st column: */ + rotmat[XX][XX] = cosa + rot_vec[XX]*rot_vec[XX]*OMcosa; + rotmat[YY][XX] = dumxy + rot_vec[ZZ]*sina; + rotmat[ZZ][XX] = dumxz - rot_vec[YY]*sina; + /* 2nd column: */ + rotmat[XX][YY] = dumxy - rot_vec[ZZ]*sina; + rotmat[YY][YY] = cosa + rot_vec[YY]*rot_vec[YY]*OMcosa; + rotmat[ZZ][YY] = dumyz + rot_vec[XX]*sina; + /* 3rd column: */ + rotmat[XX][ZZ] = dumxz + rot_vec[YY]*sina; + rotmat[YY][ZZ] = dumyz - rot_vec[XX]*sina; + rotmat[ZZ][ZZ] = cosa + rot_vec[ZZ]*rot_vec[ZZ]*OMcosa; + +#ifdef PRINTMATRIX + int iii, jjj; + + for (iii = 0; iii < 3; iii++) + { + for (jjj = 0; jjj < 3; jjj++) + { + fprintf(stderr, " %10.8f ", rotmat[iii][jjj]); + } + fprintf(stderr, "\n"); + } +#endif +} + + +/* Calculates torque on the rotation axis tau = position x force */ +static gmx_inline real torque( + rvec rotvec, /* rotation vector; MUST be normalized! */ + rvec force, /* force */ + rvec x, /* position of atom on which the force acts */ + rvec pivot) /* pivot point of rotation axis */ +{ + rvec vectmp, tau; + + + /* Subtract offset */ + rvec_sub(x, pivot, vectmp); + + /* position x force */ + cprod(vectmp, force, tau); + + /* Return the part of the torque which is parallel to the rotation vector */ + return iprod(tau, rotvec); +} + + +/* Right-aligned output of value with standard width */ +static void print_aligned(FILE *fp, char *str) +{ + fprintf(fp, "%12s", str); +} + + +/* Right-aligned output of value with standard short width */ +static void print_aligned_short(FILE *fp, char *str) +{ + fprintf(fp, "%6s", str); +} + + +static FILE *open_output_file(const char *fn, int steps, const char what[]) +{ + FILE *fp; + + + fp = gmx_ffopen(fn, "w"); + + fprintf(fp, "# Output of %s is written in intervals of %d time step%s.\n#\n", + what, steps, steps > 1 ? "s" : ""); + + return fp; +} + + +/* Open output file for slab center data. Call on master only */ +static FILE *open_slab_out(const char *fn, t_rot *rot) +{ + FILE *fp; + int g, i; + t_rotgrp *rotg; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = open_output_file(fn, rot->nstsout, "gaussian weighted slab centers"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + fprintf(fp, "# Rotation group %d (%s), slab distance %f nm, %s.\n", + g, erotg_names[rotg->eType], rotg->slab_dist, + rotg->bMassW ? "centers of mass" : "geometrical centers"); + } + } + + fprintf(fp, "# Reference centers are listed first (t=-1).\n"); + fprintf(fp, "# The following columns have the syntax:\n"); + fprintf(fp, "# "); + print_aligned_short(fp, "t"); + print_aligned_short(fp, "grp"); + /* Print legend for the first two entries only ... */ + for (i = 0; i < 2; i++) + { + print_aligned_short(fp, "slab"); + print_aligned(fp, "X center"); + print_aligned(fp, "Y center"); + print_aligned(fp, "Z center"); + } + fprintf(fp, " ...\n"); + fflush(fp); + } + + return fp; +} + + +/* Adds 'buf' to 'str' */ +static void add_to_string(char **str, char *buf) +{ + int len; + + + len = strlen(*str) + strlen(buf) + 1; + srenew(*str, len); + strcat(*str, buf); +} + + +static void add_to_string_aligned(char **str, char *buf) +{ + char buf_aligned[STRLEN]; + + sprintf(buf_aligned, "%12s", buf); + add_to_string(str, buf_aligned); +} + + +/* Open output file and print some general information about the rotation groups. + * Call on master only */ +static FILE *open_rot_out(const char *fn, t_rot *rot, const output_env_t oenv) +{ + FILE *fp; + int g, nsets; + t_rotgrp *rotg; + const char **setname; + char buf[50], buf2[75]; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + gmx_bool bFlex; + char *LegendStr = NULL; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = xvgropen(fn, "Rotation angles and energy", "Time (ps)", "angles (degrees) and energies (kJ/mol)", oenv); + fprintf(fp, "# Output of enforced rotation data is written in intervals of %d time step%s.\n#\n", rot->nstrout, rot->nstrout > 1 ? "s" : ""); + fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector v.\n"); + fprintf(fp, "# To obtain the vectorial torque, multiply tau with the group's rot_vec.\n"); + fprintf(fp, "# For flexible groups, tau(t,n) from all slabs n have been summed in a single value tau(t) here.\n"); + fprintf(fp, "# The torques tau(t,n) are found in the rottorque.log (-rt) output file\n"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + bFlex = ISFLEX(rotg); + + fprintf(fp, "#\n"); + fprintf(fp, "# ROTATION GROUP %d, potential type '%s':\n", g, erotg_names[rotg->eType]); + fprintf(fp, "# rot_massw%d %s\n", g, yesno_names[rotg->bMassW]); + fprintf(fp, "# rot_vec%d %12.5e %12.5e %12.5e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]); + fprintf(fp, "# rot_rate%d %12.5e degrees/ps\n", g, rotg->rate); + fprintf(fp, "# rot_k%d %12.5e kJ/(mol*nm^2)\n", g, rotg->k); + if (rotg->eType == erotgISO || rotg->eType == erotgPM || rotg->eType == erotgRM || rotg->eType == erotgRM2) + { + fprintf(fp, "# rot_pivot%d %12.5e %12.5e %12.5e nm\n", g, rotg->pivot[XX], rotg->pivot[YY], rotg->pivot[ZZ]); + } + + if (bFlex) + { + fprintf(fp, "# rot_slab_distance%d %f nm\n", g, rotg->slab_dist); + fprintf(fp, "# rot_min_gaussian%d %12.5e\n", g, rotg->min_gaussian); + } + + /* Output the centers of the rotation groups for the pivot-free potentials */ + if ((rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) || (rotg->eType == erotgRMPF) || (rotg->eType == erotgRM2PF + || (rotg->eType == erotgFLEXT) || (rotg->eType == erotgFLEX2T)) ) + { + fprintf(fp, "# ref. grp. %d center %12.5e %12.5e %12.5e\n", g, + erg->xc_ref_center[XX], erg->xc_ref_center[YY], erg->xc_ref_center[ZZ]); + + fprintf(fp, "# grp. %d init.center %12.5e %12.5e %12.5e\n", g, + erg->xc_center[XX], erg->xc_center[YY], erg->xc_center[ZZ]); + } + + if ( (rotg->eType == erotgRM2) || (rotg->eType == erotgFLEX2) || (rotg->eType == erotgFLEX2T) ) + { + fprintf(fp, "# rot_eps%d %12.5e nm^2\n", g, rotg->eps); + } + if (erotgFitPOT == rotg->eFittype) + { + fprintf(fp, "#\n"); + fprintf(fp, "# theta_fit%d is determined by first evaluating the potential for %d angles around theta_ref%d.\n", + g, rotg->PotAngle_nstep, g); + fprintf(fp, "# The fit angle is the one with the smallest potential. It is given as the deviation\n"); + fprintf(fp, "# from the reference angle, i.e. if theta_ref=X and theta_fit=Y, then the angle with\n"); + fprintf(fp, "# minimal value of the potential is X+Y. Angular resolution is %g degrees.\n", rotg->PotAngle_step); + } + } + + /* Print a nice legend */ + snew(LegendStr, 1); + LegendStr[0] = '\0'; + sprintf(buf, "# %6s", "time"); + add_to_string_aligned(&LegendStr, buf); + + nsets = 0; + snew(setname, 4*rot->ngrp); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + sprintf(buf, "theta_ref%d", g); + add_to_string_aligned(&LegendStr, buf); + + sprintf(buf2, "%s (degrees)", buf); + setname[nsets] = strdup(buf2); + nsets++; + } + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + bFlex = ISFLEX(rotg); + + /* For flexible axis rotation we use RMSD fitting to determine the + * actual angle of the rotation group */ + if (bFlex || erotgFitPOT == rotg->eFittype) + { + sprintf(buf, "theta_fit%d", g); + } + else + { + sprintf(buf, "theta_av%d", g); + } + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (degrees)", buf); + setname[nsets] = strdup(buf2); + nsets++; + + sprintf(buf, "tau%d", g); + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (kJ/mol)", buf); + setname[nsets] = strdup(buf2); + nsets++; + + sprintf(buf, "energy%d", g); + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (kJ/mol)", buf); + setname[nsets] = strdup(buf2); + nsets++; + } + fprintf(fp, "#\n"); + + if (nsets > 1) + { + xvgr_legend(fp, nsets, setname, oenv); + } + sfree(setname); + + fprintf(fp, "#\n# Legend for the following data columns:\n"); + fprintf(fp, "%s\n", LegendStr); + sfree(LegendStr); + + fflush(fp); + } + + return fp; +} + + +/* Call on master only */ +static FILE *open_angles_out(const char *fn, t_rot *rot) +{ + int g, i; + FILE *fp; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + char buf[100]; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + /* Open output file and write some information about it's structure: */ + fp = open_output_file(fn, rot->nstsout, "rotation group angles"); + fprintf(fp, "# All angles given in degrees, time in ps.\n"); + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Output for this group happens only if potential type is flexible or + * if fit type is potential! */ + if (ISFLEX(rotg) || (erotgFitPOT == rotg->eFittype) ) + { + if (ISFLEX(rotg)) + { + sprintf(buf, " slab distance %f nm, ", rotg->slab_dist); + } + else + { + buf[0] = '\0'; + } + + fprintf(fp, "#\n# ROTATION GROUP %d '%s',%s fit type '%s'.\n", + g, erotg_names[rotg->eType], buf, erotg_fitnames[rotg->eFittype]); + + /* Special type of fitting using the potential minimum. This is + * done for the whole group only, not for the individual slabs. */ + if (erotgFitPOT == rotg->eFittype) + { + fprintf(fp, "# To obtain theta_fit%d, the potential is evaluated for %d angles around theta_ref%d\n", g, rotg->PotAngle_nstep, g); + fprintf(fp, "# The fit angle in the rotation standard outfile is the one with minimal energy E(theta_fit) [kJ/mol].\n"); + fprintf(fp, "#\n"); + } + + fprintf(fp, "# Legend for the group %d data columns:\n", g); + fprintf(fp, "# "); + print_aligned_short(fp, "time"); + print_aligned_short(fp, "grp"); + print_aligned(fp, "theta_ref"); + + if (erotgFitPOT == rotg->eFittype) + { + /* Output the set of angles around the reference angle */ + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + sprintf(buf, "E(%g)", erg->PotAngleFit->degangle[i]); + print_aligned(fp, buf); + } + } + else + { + /* Output fit angle for each slab */ + print_aligned_short(fp, "slab"); + print_aligned_short(fp, "atoms"); + print_aligned(fp, "theta_fit"); + print_aligned_short(fp, "slab"); + print_aligned_short(fp, "atoms"); + print_aligned(fp, "theta_fit"); + fprintf(fp, " ..."); + } + fprintf(fp, "\n"); + } + } + fflush(fp); + } + + return fp; +} + + +/* Open torque output file and write some information about it's structure. + * Call on master only */ +static FILE *open_torque_out(const char *fn, t_rot *rot) +{ + FILE *fp; + int g; + t_rotgrp *rotg; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = open_output_file(fn, rot->nstsout, "torques"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + fprintf(fp, "# Rotation group %d (%s), slab distance %f nm.\n", g, erotg_names[rotg->eType], rotg->slab_dist); + fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector.\n"); + fprintf(fp, "# To obtain the vectorial torque, multiply tau with\n"); + fprintf(fp, "# rot_vec%d %10.3e %10.3e %10.3e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]); + fprintf(fp, "#\n"); + } + } + fprintf(fp, "# Legend for the following data columns: (tau=torque for that slab):\n"); + fprintf(fp, "# "); + print_aligned_short(fp, "t"); + print_aligned_short(fp, "grp"); + print_aligned_short(fp, "slab"); + print_aligned(fp, "tau"); + print_aligned_short(fp, "slab"); + print_aligned(fp, "tau"); + fprintf(fp, " ...\n"); + fflush(fp); + } + + return fp; +} + + +static void swap_val(double* vec, int i, int j) +{ + double tmp = vec[j]; + + + vec[j] = vec[i]; + vec[i] = tmp; +} + + +static void swap_col(double **mat, int i, int j) +{ + double tmp[3] = {mat[0][j], mat[1][j], mat[2][j]}; + + + mat[0][j] = mat[0][i]; + mat[1][j] = mat[1][i]; + mat[2][j] = mat[2][i]; + + mat[0][i] = tmp[0]; + mat[1][i] = tmp[1]; + mat[2][i] = tmp[2]; +} + + +/* Eigenvectors are stored in columns of eigen_vec */ +static void diagonalize_symmetric( + double **matrix, + double **eigen_vec, + double eigenval[3]) +{ + int n_rot; + + + jacobi(matrix, 3, eigenval, eigen_vec, &n_rot); + + /* sort in ascending order */ + if (eigenval[0] > eigenval[1]) + { + swap_val(eigenval, 0, 1); + swap_col(eigen_vec, 0, 1); + } + if (eigenval[1] > eigenval[2]) + { + swap_val(eigenval, 1, 2); + swap_col(eigen_vec, 1, 2); + } + if (eigenval[0] > eigenval[1]) + { + swap_val(eigenval, 0, 1); + swap_col(eigen_vec, 0, 1); + } +} + + +static void align_with_z( + rvec* s, /* Structure to align */ + int natoms, + rvec axis) +{ + int i, j, k; + rvec zet = {0.0, 0.0, 1.0}; + rvec rot_axis = {0.0, 0.0, 0.0}; + rvec *rotated_str = NULL; + real ooanorm; + real angle; + matrix rotmat; + + + snew(rotated_str, natoms); + + /* Normalize the axis */ + ooanorm = 1.0/norm(axis); + svmul(ooanorm, axis, axis); + + /* Calculate the angle for the fitting procedure */ + cprod(axis, zet, rot_axis); + angle = acos(axis[2]); + if (angle < 0.0) + { + angle += M_PI; + } + + /* Calculate the rotation matrix */ + calc_rotmat(rot_axis, angle*180.0/M_PI, rotmat); + + /* Apply the rotation matrix to s */ + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + rotated_str[i][j] += rotmat[j][k]*s[i][k]; + } + } + } + + /* Rewrite the rotated structure to s */ + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + s[i][j] = rotated_str[i][j]; + } + } + + sfree(rotated_str); +} + + +static void calc_correl_matrix(rvec* Xstr, rvec* Ystr, double** Rmat, int natoms) +{ + int i, j, k; + + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + Rmat[i][j] = 0.0; + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < natoms; k++) + { + Rmat[i][j] += Ystr[k][i] * Xstr[k][j]; + } + } + } +} + + +static void weigh_coords(rvec* str, real* weight, int natoms) +{ + int i, j; + + + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + str[i][j] *= sqrt(weight[i]); + } + } +} + + +static real opt_angle_analytic( + rvec* ref_s, + rvec* act_s, + real* weight, + int natoms, + rvec ref_com, + rvec act_com, + rvec axis) +{ + int i, j, k; + rvec *ref_s_1 = NULL; + rvec *act_s_1 = NULL; + rvec shift; + double **Rmat, **RtR, **eigvec; + double eigval[3]; + double V[3][3], WS[3][3]; + double rot_matrix[3][3]; + double opt_angle; + + + /* Do not change the original coordinates */ + snew(ref_s_1, natoms); + snew(act_s_1, natoms); + for (i = 0; i < natoms; i++) + { + copy_rvec(ref_s[i], ref_s_1[i]); + copy_rvec(act_s[i], act_s_1[i]); + } + + /* Translate the structures to the origin */ + shift[XX] = -ref_com[XX]; + shift[YY] = -ref_com[YY]; + shift[ZZ] = -ref_com[ZZ]; + translate_x(ref_s_1, natoms, shift); + + shift[XX] = -act_com[XX]; + shift[YY] = -act_com[YY]; + shift[ZZ] = -act_com[ZZ]; + translate_x(act_s_1, natoms, shift); + + /* Align rotation axis with z */ + align_with_z(ref_s_1, natoms, axis); + align_with_z(act_s_1, natoms, axis); + + /* Correlation matrix */ + Rmat = allocate_square_matrix(3); + + for (i = 0; i < natoms; i++) + { + ref_s_1[i][2] = 0.0; + act_s_1[i][2] = 0.0; + } + + /* Weight positions with sqrt(weight) */ + if (NULL != weight) + { + weigh_coords(ref_s_1, weight, natoms); + weigh_coords(act_s_1, weight, natoms); + } + + /* Calculate correlation matrices R=YXt (X=ref_s; Y=act_s) */ + calc_correl_matrix(ref_s_1, act_s_1, Rmat, natoms); + + /* Calculate RtR */ + RtR = allocate_square_matrix(3); + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + RtR[i][j] += Rmat[k][i] * Rmat[k][j]; + } + } + } + /* Diagonalize RtR */ + snew(eigvec, 3); + for (i = 0; i < 3; i++) + { + snew(eigvec[i], 3); + } + + diagonalize_symmetric(RtR, eigvec, eigval); + swap_col(eigvec, 0, 1); + swap_col(eigvec, 1, 2); + swap_val(eigval, 0, 1); + swap_val(eigval, 1, 2); + + /* Calculate V */ + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + V[i][j] = 0.0; + WS[i][j] = 0.0; + } + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + WS[i][j] = eigvec[i][j] / sqrt(eigval[j]); + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + V[i][j] += Rmat[i][k]*WS[k][j]; + } + } + } + free_square_matrix(Rmat, 3); + + /* Calculate optimal rotation matrix */ + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + rot_matrix[i][j] = 0.0; + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + rot_matrix[i][j] += eigvec[i][k]*V[j][k]; + } + } + } + rot_matrix[2][2] = 1.0; + + /* In some cases abs(rot_matrix[0][0]) can be slighly larger + * than unity due to numerical inacurracies. To be able to calculate + * the acos function, we put these values back in range. */ + if (rot_matrix[0][0] > 1.0) + { + rot_matrix[0][0] = 1.0; + } + else if (rot_matrix[0][0] < -1.0) + { + rot_matrix[0][0] = -1.0; + } + + /* Determine the optimal rotation angle: */ + opt_angle = (-1.0)*acos(rot_matrix[0][0])*180.0/M_PI; + if (rot_matrix[0][1] < 0.0) + { + opt_angle = (-1.0)*opt_angle; + } + + /* Give back some memory */ + free_square_matrix(RtR, 3); + sfree(ref_s_1); + sfree(act_s_1); + for (i = 0; i < 3; i++) + { + sfree(eigvec[i]); + } + sfree(eigvec); + + return (real) opt_angle; +} + + +/* Determine angle of the group by RMSD fit to the reference */ +/* Not parallelized, call this routine only on the master */ +static real flex_fit_angle(t_rotgrp *rotg) +{ + int i; + rvec *fitcoords = NULL; + rvec center; /* Center of positions passed to the fit routine */ + real fitangle; /* Angle of the rotation group derived by fitting */ + rvec coord; + real scal; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Get the center of the rotation group. + * Note, again, erg->xc has been sorted in do_flexible */ + get_center(erg->xc, erg->mc_sorted, rotg->nat, center); + + /* === Determine the optimal fit angle for the rotation group === */ + if (rotg->eFittype == erotgFitNORM) + { + /* Normalize every position to it's reference length */ + for (i = 0; i < rotg->nat; i++) + { + /* Put the center of the positions into the origin */ + rvec_sub(erg->xc[i], center, coord); + /* Determine the scaling factor for the length: */ + scal = erg->xc_ref_length[erg->xc_sortind[i]] / norm(coord); + /* Get position, multiply with the scaling factor and save */ + svmul(scal, coord, erg->xc_norm[i]); + } + fitcoords = erg->xc_norm; + } + else + { + fitcoords = erg->xc; + } + /* From the point of view of the current positions, the reference has rotated + * backwards. Since we output the angle relative to the fixed reference, + * we need the minus sign. */ + fitangle = -opt_angle_analytic(erg->xc_ref_sorted, fitcoords, erg->mc_sorted, + rotg->nat, erg->xc_ref_center, center, rotg->vec); + + return fitangle; +} + + +/* Determine actual angle of each slab by RMSD fit to the reference */ +/* Not parallelized, call this routine only on the master */ +static void flex_fit_angle_perslab( + int g, + t_rotgrp *rotg, + double t, + real degangle, + FILE *fp) +{ + int i, l, n, islab, ind; + rvec curr_x, ref_x; + rvec act_center; /* Center of actual positions that are passed to the fit routine */ + rvec ref_center; /* Same for the reference positions */ + real fitangle; /* Angle of a slab derived from an RMSD fit to + * the reference structure at t=0 */ + t_gmx_slabdata *sd; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real OOm_av; /* 1/average_mass of a rotation group atom */ + real m_rel; /* Relative mass of a rotation group atom */ + + + erg = rotg->enfrotgrp; + + /* Average mass of a rotation group atom: */ + OOm_av = erg->invmass*rotg->nat; + + /**********************************/ + /* First collect the data we need */ + /**********************************/ + + /* Collect the data for the individual slabs */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + sd = &(rotg->enfrotgrp->slab_data[islab]); + sd->nat = erg->lastatom[islab]-erg->firstatom[islab]+1; + ind = 0; + + /* Loop over the relevant atoms in the slab */ + for (l = erg->firstatom[islab]; l <= erg->lastatom[islab]; l++) + { + /* Current position of this atom: x[ii][XX/YY/ZZ] */ + copy_rvec(erg->xc[l], curr_x); + + /* The (unrotated) reference position of this atom is copied to ref_x. + * Beware, the xc coords have been sorted in do_flexible */ + copy_rvec(erg->xc_ref_sorted[l], ref_x); + + /* Save data for doing angular RMSD fit later */ + /* Save the current atom position */ + copy_rvec(curr_x, sd->x[ind]); + /* Save the corresponding reference position */ + copy_rvec(ref_x, sd->ref[ind]); + + /* Maybe also mass-weighting was requested. If yes, additionally + * multiply the weights with the relative mass of the atom. If not, + * multiply with unity. */ + m_rel = erg->mc_sorted[l]*OOm_av; + + /* Save the weight for this atom in this slab */ + sd->weight[ind] = gaussian_weight(curr_x, rotg, n) * m_rel; + + /* Next atom in this slab */ + ind++; + } + } + + /******************************/ + /* Now do the fit calculation */ + /******************************/ + + fprintf(fp, "%12.3e%6d%12.3f", t, g, degangle); + + /* === Now do RMSD fitting for each slab === */ + /* We require at least SLAB_MIN_ATOMS in a slab, such that the fit makes sense. */ +#define SLAB_MIN_ATOMS 4 + + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + sd = &(rotg->enfrotgrp->slab_data[islab]); + if (sd->nat >= SLAB_MIN_ATOMS) + { + /* Get the center of the slabs reference and current positions */ + get_center(sd->ref, sd->weight, sd->nat, ref_center); + get_center(sd->x, sd->weight, sd->nat, act_center); + if (rotg->eFittype == erotgFitNORM) + { + /* Normalize every position to it's reference length + * prior to performing the fit */ + for (i = 0; i < sd->nat; i++) /* Center */ + { + rvec_dec(sd->ref[i], ref_center); + rvec_dec(sd->x[i], act_center); + /* Normalize x_i such that it gets the same length as ref_i */ + svmul( norm(sd->ref[i])/norm(sd->x[i]), sd->x[i], sd->x[i] ); + } + /* We already subtracted the centers */ + clear_rvec(ref_center); + clear_rvec(act_center); + } + fitangle = -opt_angle_analytic(sd->ref, sd->x, sd->weight, sd->nat, + ref_center, act_center, rotg->vec); + fprintf(fp, "%6d%6d%12.3f", n, sd->nat, fitangle); + } + } + fprintf(fp, "\n"); + +#undef SLAB_MIN_ATOMS +} + + +/* Shift x with is */ +static gmx_inline void shift_single_coord(matrix box, rvec x, const ivec is) +{ + int tx, ty, tz; + + + tx = is[XX]; + ty = is[YY]; + tz = is[ZZ]; + + if (TRICLINIC(box)) + { + x[XX] += tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX]; + x[YY] += ty*box[YY][YY]+tz*box[ZZ][YY]; + x[ZZ] += tz*box[ZZ][ZZ]; + } + else + { + x[XX] += tx*box[XX][XX]; + x[YY] += ty*box[YY][YY]; + x[ZZ] += tz*box[ZZ][ZZ]; + } +} + + +/* Determine the 'home' slab of this atom which is the + * slab with the highest Gaussian weight of all */ +#define round(a) (int)(a+0.5) +static gmx_inline int get_homeslab( + rvec curr_x, /* The position for which the home slab shall be determined */ + rvec rotvec, /* The rotation vector */ + real slabdist) /* The slab distance */ +{ + real dist; + + + /* The distance of the atom to the coordinate center (where the + * slab with index 0) is */ + dist = iprod(rotvec, curr_x); + + return round(dist / slabdist); +} + + +/* For a local atom determine the relevant slabs, i.e. slabs in + * which the gaussian is larger than min_gaussian + */ +static int get_single_atom_gaussians( + rvec curr_x, + t_rotgrp *rotg) +{ + int slab, homeslab; + real g; + int count = 0; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Determine the 'home' slab of this atom: */ + homeslab = get_homeslab(curr_x, rotg->vec, rotg->slab_dist); + + /* First determine the weight in the atoms home slab: */ + g = gaussian_weight(curr_x, rotg, homeslab); + + erg->gn_atom[count] = g; + erg->gn_slabind[count] = homeslab; + count++; + + + /* Determine the max slab */ + slab = homeslab; + while (g > rotg->min_gaussian) + { + slab++; + g = gaussian_weight(curr_x, rotg, slab); + erg->gn_slabind[count] = slab; + erg->gn_atom[count] = g; + count++; + } + count--; + + /* Determine the min slab */ + slab = homeslab; + do + { + slab--; + g = gaussian_weight(curr_x, rotg, slab); + erg->gn_slabind[count] = slab; + erg->gn_atom[count] = g; + count++; + } + while (g > rotg->min_gaussian); + count--; + + return count; +} + + +static void flex2_precalc_inner_sum(t_rotgrp *rotg) +{ + int i, n, islab; + rvec xi; /* positions in the i-sum */ + rvec xcn, ycn; /* the current and the reference slab centers */ + real gaussian_xi; + rvec yi0; + rvec rin; /* Helper variables */ + real fac, fac2; + rvec innersumvec; + real OOpsii, OOpsiistar; + real sin_rin; /* s_ii.r_ii */ + rvec s_in, tmpvec, tmpvec2; + real mi, wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over all slabs that contain something */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + /*** D. Calculate the whole inner sum used for second and third sum */ + /* For slab n, we need to loop over all atoms i again. Since we sorted + * the atoms with respect to the rotation vector, we know that it is sufficient + * to calculate from firstatom to lastatom only. All other contributions will + * be very small. */ + clear_rvec(innersumvec); + for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++) + { + /* Coordinate xi of this atom */ + copy_rvec(erg->xc[i], xi); + + /* The i-weights */ + gaussian_xi = gaussian_weight(xi, rotg, n); + mi = erg->mc_sorted[i]; /* need the sorted mass here */ + wi = N_M*mi; + + /* Calculate rin */ + copy_rvec(erg->xc_ref_sorted[i], yi0); /* Reference position yi0 */ + rvec_sub(yi0, ycn, tmpvec2); /* tmpvec2 = yi0 - ycn */ + mvmul(erg->rotmat, tmpvec2, rin); /* rin = Omega.(yi0 - ycn) */ + + /* Calculate psi_i* and sin */ + rvec_sub(xi, xcn, tmpvec2); /* tmpvec2 = xi - xcn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x (xi - xcn) */ + OOpsiistar = norm2(tmpvec)+rotg->eps; /* OOpsii* = 1/psii* = |v x (xi-xcn)|^2 + eps */ + OOpsii = norm(tmpvec); /* OOpsii = 1 / psii = |v x (xi - xcn)| */ + + /* * v x (xi - xcn) */ + unitv(tmpvec, s_in); /* sin = ---------------- */ + /* |v x (xi - xcn)| */ + + sin_rin = iprod(s_in, rin); /* sin_rin = sin . rin */ + + /* Now the whole sum */ + fac = OOpsii/OOpsiistar; + svmul(fac, rin, tmpvec); + fac2 = fac*fac*OOpsii; + svmul(fac2*sin_rin, s_in, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + + svmul(wi*gaussian_xi*sin_rin, tmpvec, tmpvec2); + + rvec_inc(innersumvec, tmpvec2); + } /* now we have the inner sum, used both for sum2 and sum3 */ + + /* Save it to be used in do_flex2_lowlevel */ + copy_rvec(innersumvec, erg->slab_innersumvec[islab]); + } /* END of loop over slabs */ +} + + +static void flex_precalc_inner_sum(t_rotgrp *rotg) +{ + int i, n, islab; + rvec xi; /* position */ + rvec xcn, ycn; /* the current and the reference slab centers */ + rvec qin, rin; /* q_i^n and r_i^n */ + real bin; + rvec tmpvec; + rvec innersumvec; /* Inner part of sum_n2 */ + real gaussian_xi; /* Gaussian weight gn(xi) */ + real mi, wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over all slabs that contain something */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + /* For slab n, we need to loop over all atoms i again. Since we sorted + * the atoms with respect to the rotation vector, we know that it is sufficient + * to calculate from firstatom to lastatom only. All other contributions will + * be very small. */ + clear_rvec(innersumvec); + for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++) + { + /* Coordinate xi of this atom */ + copy_rvec(erg->xc[i], xi); + + /* The i-weights */ + gaussian_xi = gaussian_weight(xi, rotg, n); + mi = erg->mc_sorted[i]; /* need the sorted mass here */ + wi = N_M*mi; + + /* Calculate rin and qin */ + rvec_sub(erg->xc_ref_sorted[i], ycn, tmpvec); /* tmpvec = yi0-ycn */ + mvmul(erg->rotmat, tmpvec, rin); /* rin = Omega.(yi0 - ycn) */ + cprod(rotg->vec, rin, tmpvec); /* tmpvec = v x Omega*(yi0-ycn) */ + + /* * v x Omega*(yi0-ycn) */ + unitv(tmpvec, qin); /* qin = --------------------- */ + /* |v x Omega*(yi0-ycn)| */ + + /* Calculate bin */ + rvec_sub(xi, xcn, tmpvec); /* tmpvec = xi-xcn */ + bin = iprod(qin, tmpvec); /* bin = qin*(xi-xcn) */ + + svmul(wi*gaussian_xi*bin, qin, tmpvec); + + /* Add this contribution to the inner sum: */ + rvec_add(innersumvec, tmpvec, innersumvec); + } /* now we have the inner sum vector S^n for this slab */ + /* Save it to be used in do_flex_lowlevel */ + copy_rvec(innersumvec, erg->slab_innersumvec[islab]); + } +} + + +static real do_flex2_lowlevel( + t_rotgrp *rotg, + real sigma, /* The Gaussian width sigma */ + rvec x[], + gmx_bool bOutstepRot, + gmx_bool bOutstepSlab, + matrix box) +{ + int count, ic, ii, j, m, n, islab, iigrp, ifit; + rvec xj; /* position in the i-sum */ + rvec yj0; /* the reference position in the j-sum */ + rvec xcn, ycn; /* the current and the reference slab centers */ + real V; /* This node's part of the rotation pot. energy */ + real gaussian_xj; /* Gaussian weight */ + real beta; + + real numerator, fit_numerator; + rvec rjn, fit_rjn; /* Helper variables */ + real fac, fac2; + + real OOpsij, OOpsijstar; + real OOsigma2; /* 1/(sigma^2) */ + real sjn_rjn; + real betasigpsi; + rvec sjn, tmpvec, tmpvec2, yj0_ycn; + rvec sum1vec_part, sum1vec, sum2vec_part, sum2vec, sum3vec, sum4vec, innersumvec; + real sum3, sum4; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real mj, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + real Wjn; /* g_n(x_j) m_j / Mjn */ + gmx_bool bCalcPotFit; + + /* To calculate the torque per slab */ + rvec slab_force; /* Single force from slab n on one atom */ + rvec slab_sum1vec_part; + real slab_sum3part, slab_sum4part; + rvec slab_sum1vec, slab_sum2vec, slab_sum3vec, slab_sum4vec; + + + erg = rotg->enfrotgrp; + + /* Pre-calculate the inner sums, so that we do not have to calculate + * them again for every atom */ + flex2_precalc_inner_sum(rotg); + + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + /********************************************************/ + /* Main loop over all local atoms of the rotation group */ + /********************************************************/ + N_M = rotg->nat * erg->invmass; + V = 0.0; + OOsigma2 = 1.0 / (sigma*sigma); + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] + * Note that erg->xc_center contains the center of mass in case the flex2-t + * potential was chosen. For the flex2 potential erg->xc_center must be + * zero. */ + rvec_sub(x[ii], erg->xc_center, xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* Determine the slabs to loop over, i.e. the ones with contributions + * larger than min_gaussian */ + count = get_single_atom_gaussians(xj, rotg); + + clear_rvec(sum1vec_part); + clear_rvec(sum2vec_part); + sum3 = 0.0; + sum4 = 0.0; + /* Loop over the relevant slabs for this atom */ + for (ic = 0; ic < count; ic++) + { + n = erg->gn_slabind[ic]; + + /* Get the precomputed Gaussian value of curr_slab for curr_x */ + gaussian_xj = erg->gn_atom[ic]; + + islab = n - erg->slab_first; /* slab index */ + + /* The (unrotated) reference position of this atom is copied to yj0: */ + copy_rvec(rotg->x_ref[iigrp], yj0); + + beta = calc_beta(xj, rotg, n); + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */ + + /* Rotate: */ + mvmul(erg->rotmat, yj0_ycn, rjn); /* rjn = Omega.(yj0 - ycn) */ + + /* Subtract the slab center from xj */ + rvec_sub(xj, xcn, tmpvec2); /* tmpvec2 = xj - xcn */ + + /* In rare cases, when an atom position coincides with a slab center + * (tmpvec2 == 0) we cannot compute the vector product for sjn. + * However, since the atom is located directly on the pivot, this + * slab's contribution to the force on that atom will be zero + * anyway. Therefore, we directly move on to the next slab. */ - if (0 == norm(tmpvec2) ) ++ if (gmx_numzero(norm(tmpvec2))) /* 0 == norm(xj - xcn) */ + { + continue; + } + + /* Calculate sjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x (xj - xcn) */ + + OOpsijstar = norm2(tmpvec)+rotg->eps; /* OOpsij* = 1/psij* = |v x (xj-xcn)|^2 + eps */ + + numerator = sqr(iprod(tmpvec, rjn)); + + /*********************************/ + /* Add to the rotation potential */ + /*********************************/ + V += 0.5*rotg->k*wj*gaussian_xj*numerator/OOpsijstar; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, fit_rjn); + fit_numerator = sqr(iprod(tmpvec, fit_rjn)); + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*fit_numerator/OOpsijstar; + } + } + + /*************************************/ + /* Now calculate the force on atom j */ + /*************************************/ + + OOpsij = norm(tmpvec); /* OOpsij = 1 / psij = |v x (xj - xcn)| */ + + /* * v x (xj - xcn) */ + unitv(tmpvec, sjn); /* sjn = ---------------- */ + /* |v x (xj - xcn)| */ + + sjn_rjn = iprod(sjn, rjn); /* sjn_rjn = sjn . rjn */ + + + /*** A. Calculate the first of the four sum terms: ****************/ + fac = OOpsij/OOpsijstar; + svmul(fac, rjn, tmpvec); + fac2 = fac*fac*OOpsij; + svmul(fac2*sjn_rjn, sjn, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + fac2 = wj*gaussian_xj; /* also needed for sum4 */ + svmul(fac2*sjn_rjn, tmpvec, slab_sum1vec_part); + /********************/ + /*** Add to sum1: ***/ + /********************/ + rvec_inc(sum1vec_part, slab_sum1vec_part); /* sum1 still needs to vector multiplied with v */ + + /*** B. Calculate the forth of the four sum terms: ****************/ + betasigpsi = beta*OOsigma2*OOpsij; /* this is also needed for sum3 */ + /********************/ + /*** Add to sum4: ***/ + /********************/ + slab_sum4part = fac2*betasigpsi*fac*sjn_rjn*sjn_rjn; /* Note that fac is still valid from above */ + sum4 += slab_sum4part; + + /*** C. Calculate Wjn for second and third sum */ + /* Note that we can safely divide by slab_weights since we check in + * get_slab_centers that it is non-zero. */ + Wjn = gaussian_xj*mj/erg->slab_weights[islab]; + + /* We already have precalculated the inner sum for slab n */ + copy_rvec(erg->slab_innersumvec[islab], innersumvec); + + /* Weigh the inner sum vector with Wjn */ + svmul(Wjn, innersumvec, innersumvec); + + /*** E. Calculate the second of the four sum terms: */ + /********************/ + /*** Add to sum2: ***/ + /********************/ + rvec_inc(sum2vec_part, innersumvec); /* sum2 still needs to be vector crossproduct'ed with v */ + + /*** F. Calculate the third of the four sum terms: */ + slab_sum3part = betasigpsi * iprod(sjn, innersumvec); + sum3 += slab_sum3part; /* still needs to be multiplied with v */ + + /*** G. Calculate the torque on the local slab's axis: */ + if (bOutstepRot) + { + /* Sum1 */ + cprod(slab_sum1vec_part, rotg->vec, slab_sum1vec); + /* Sum2 */ + cprod(innersumvec, rotg->vec, slab_sum2vec); + /* Sum3 */ + svmul(slab_sum3part, rotg->vec, slab_sum3vec); + /* Sum4 */ + svmul(slab_sum4part, rotg->vec, slab_sum4vec); + + /* The force on atom ii from slab n only: */ + for (m = 0; m < DIM; m++) + { + slab_force[m] = rotg->k * (-slab_sum1vec[m] + slab_sum2vec[m] - slab_sum3vec[m] + 0.5*slab_sum4vec[m]); + } + + erg->slab_torque_v[islab] += torque(rotg->vec, slab_force, xj, xcn); + } + } /* END of loop over slabs */ + + /* Construct the four individual parts of the vector sum: */ + cprod(sum1vec_part, rotg->vec, sum1vec); /* sum1vec = { } x v */ + cprod(sum2vec_part, rotg->vec, sum2vec); /* sum2vec = { } x v */ + svmul(sum3, rotg->vec, sum3vec); /* sum3vec = { } . v */ + svmul(sum4, rotg->vec, sum4vec); /* sum4vec = { } . v */ + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + for (m = 0; m < DIM; m++) + { + erg->f_rot_loc[j][m] = rotg->k * (-sum1vec[m] + sum2vec[m] - sum3vec[m] + 0.5*sum4vec[m]); + } + +#ifdef SUM_PARTS + fprintf(stderr, "sum1: %15.8f %15.8f %15.8f\n", -rotg->k*sum1vec[XX], -rotg->k*sum1vec[YY], -rotg->k*sum1vec[ZZ]); + fprintf(stderr, "sum2: %15.8f %15.8f %15.8f\n", rotg->k*sum2vec[XX], rotg->k*sum2vec[YY], rotg->k*sum2vec[ZZ]); + fprintf(stderr, "sum3: %15.8f %15.8f %15.8f\n", -rotg->k*sum3vec[XX], -rotg->k*sum3vec[YY], -rotg->k*sum3vec[ZZ]); + fprintf(stderr, "sum4: %15.8f %15.8f %15.8f\n", 0.5*rotg->k*sum4vec[XX], 0.5*rotg->k*sum4vec[YY], 0.5*rotg->k*sum4vec[ZZ]); +#endif + + PRINT_FORCE_J + + } /* END of loop over local atoms */ + + return V; +} + + +static real do_flex_lowlevel( + t_rotgrp *rotg, + real sigma, /* The Gaussian width sigma */ + rvec x[], + gmx_bool bOutstepRot, + gmx_bool bOutstepSlab, + matrix box) +{ + int count, ic, ifit, ii, j, m, n, islab, iigrp; + rvec xj, yj0; /* current and reference position */ + rvec xcn, ycn; /* the current and the reference slab centers */ + rvec yj0_ycn; /* yj0 - ycn */ + rvec xj_xcn; /* xj - xcn */ + rvec qjn, fit_qjn; /* q_i^n */ + rvec sum_n1, sum_n2; /* Two contributions to the rotation force */ + rvec innersumvec; /* Inner part of sum_n2 */ + rvec s_n; + rvec force_n; /* Single force from slab n on one atom */ + rvec force_n1, force_n2; /* First and second part of force_n */ + rvec tmpvec, tmpvec2, tmp_f; /* Helper variables */ + real V; /* The rotation potential energy */ + real OOsigma2; /* 1/(sigma^2) */ + real beta; /* beta_n(xj) */ + real bjn, fit_bjn; /* b_j^n */ + real gaussian_xj; /* Gaussian weight gn(xj) */ + real betan_xj_sigma2; + real mj, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + gmx_bool bCalcPotFit; + + + erg = rotg->enfrotgrp; + + /* Pre-calculate the inner sums, so that we do not have to calculate + * them again for every atom */ + flex_precalc_inner_sum(rotg); + + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + /********************************************************/ + /* Main loop over all local atoms of the rotation group */ + /********************************************************/ + OOsigma2 = 1.0/(sigma*sigma); + N_M = rotg->nat * erg->invmass; + V = 0.0; + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] + * Note that erg->xc_center contains the center of mass in case the flex-t + * potential was chosen. For the flex potential erg->xc_center must be + * zero. */ + rvec_sub(x[ii], erg->xc_center, xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* Determine the slabs to loop over, i.e. the ones with contributions + * larger than min_gaussian */ + count = get_single_atom_gaussians(xj, rotg); + + clear_rvec(sum_n1); + clear_rvec(sum_n2); + + /* Loop over the relevant slabs for this atom */ + for (ic = 0; ic < count; ic++) + { + n = erg->gn_slabind[ic]; + + /* Get the precomputed Gaussian for xj in slab n */ + gaussian_xj = erg->gn_atom[ic]; + + islab = n - erg->slab_first; /* slab index */ + + /* The (unrotated) reference position of this atom is saved in yj0: */ + copy_rvec(rotg->x_ref[iigrp], yj0); + + beta = calc_beta(xj, rotg, n); + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */ + - /* Rotate: */ - mvmul(erg->rotmat, yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */ - - /* Subtract the slab center from xj */ - rvec_sub(xj, xcn, xj_xcn); /* xj_xcn = xj - xcn */ - - /* In rare cases, when an atom position coincides with a slab center - * (xj_xcn == 0) we cannot compute the vector product for qjn. ++ /* In rare cases, when an atom position coincides with a reference slab ++ * center (yj0_ycn == 0) we cannot compute the normal vector qjn. + * However, since the atom is located directly on the pivot, this + * slab's contribution to the force on that atom will be zero + * anyway. Therefore, we directly move on to the next slab. */ - if (0 == norm(xj_xcn) ) ++ if (gmx_numzero(norm(yj0_ycn))) /* 0 == norm(yj0 - ycn) */ + { + continue; + } + ++ /* Rotate: */ ++ mvmul(erg->rotmat, yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */ ++ ++ /* Subtract the slab center from xj */ ++ rvec_sub(xj, xcn, xj_xcn); /* xj_xcn = xj - xcn */ ++ + /* Calculate qjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */ + + /* * v x Omega.(yj0-ycn) */ + unitv(tmpvec, qjn); /* qjn = --------------------- */ + /* |v x Omega.(yj0-ycn)| */ + + bjn = iprod(qjn, xj_xcn); /* bjn = qjn * (xj - xcn) */ + + /*********************************/ + /* Add to the rotation potential */ + /*********************************/ + V += 0.5*rotg->k*wj*gaussian_xj*sqr(bjn); + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* As above calculate Omega.(yj0-ycn), now for the other angles */ + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */ + /* As above calculate qjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */ + /* * v x Omega.(yj0-ycn) */ + unitv(tmpvec, fit_qjn); /* fit_qjn = --------------------- */ + /* |v x Omega.(yj0-ycn)| */ + fit_bjn = iprod(fit_qjn, xj_xcn); /* fit_bjn = fit_qjn * (xj - xcn) */ + /* Add to the rotation potential for this angle */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*sqr(fit_bjn); + } + } + + /****************************************************************/ + /* sum_n1 will typically be the main contribution to the force: */ + /****************************************************************/ + betan_xj_sigma2 = beta*OOsigma2; /* beta_n(xj)/sigma^2 */ + + /* The next lines calculate + * qjn - (bjn*beta(xj)/(2sigma^2))v */ + svmul(bjn*0.5*betan_xj_sigma2, rotg->vec, tmpvec2); + rvec_sub(qjn, tmpvec2, tmpvec); + + /* Multiply with gn(xj)*bjn: */ + svmul(gaussian_xj*bjn, tmpvec, tmpvec2); + + /* Sum over n: */ + rvec_inc(sum_n1, tmpvec2); + + /* We already have precalculated the Sn term for slab n */ + copy_rvec(erg->slab_innersumvec[islab], s_n); + /* * beta_n(xj) */ + svmul(betan_xj_sigma2*iprod(s_n, xj_xcn), rotg->vec, tmpvec); /* tmpvec = ---------- s_n (xj-xcn) */ + /* sigma^2 */ + + rvec_sub(s_n, tmpvec, innersumvec); + + /* We can safely divide by slab_weights since we check in get_slab_centers + * that it is non-zero. */ + svmul(gaussian_xj/erg->slab_weights[islab], innersumvec, innersumvec); + + rvec_add(sum_n2, innersumvec, sum_n2); + + /* Calculate the torque: */ + if (bOutstepRot) + { + /* The force on atom ii from slab n only: */ + svmul(-rotg->k*wj, tmpvec2, force_n1); /* part 1 */ + svmul( rotg->k*mj, innersumvec, force_n2); /* part 2 */ + rvec_add(force_n1, force_n2, force_n); + erg->slab_torque_v[islab] += torque(rotg->vec, force_n, xj, xcn); + } + } /* END of loop over slabs */ + + /* Put both contributions together: */ + svmul(wj, sum_n1, sum_n1); + svmul(mj, sum_n2, sum_n2); + rvec_sub(sum_n2, sum_n1, tmp_f); /* F = -grad V */ + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + for (m = 0; m < DIM; m++) + { + erg->f_rot_loc[j][m] = rotg->k*tmp_f[m]; + } + + PRINT_FORCE_J + + } /* END of loop over local atoms */ + + return V; +} + +#ifdef PRINT_COORDS +static void print_coordinates(t_rotgrp *rotg, rvec x[], matrix box, int step) +{ + int i; + static FILE *fp; + static char buf[STRLEN]; + static gmx_bool bFirst = 1; + + + if (bFirst) + { + sprintf(buf, "coords%d.txt", cr->nodeid); + fp = fopen(buf, "w"); + bFirst = 0; + } + + fprintf(fp, "\nStep %d\n", step); + fprintf(fp, "box: %f %f %f %f %f %f %f %f %f\n", + box[XX][XX], box[XX][YY], box[XX][ZZ], + box[YY][XX], box[YY][YY], box[YY][ZZ], + box[ZZ][XX], box[ZZ][ZZ], box[ZZ][ZZ]); + for (i = 0; i < rotg->nat; i++) + { + fprintf(fp, "%4d %f %f %f\n", i, + erg->xc[i][XX], erg->xc[i][YY], erg->xc[i][ZZ]); + } + fflush(fp); + +} +#endif + + +static int projection_compare(const void *a, const void *b) +{ + sort_along_vec_t *xca, *xcb; + + + xca = (sort_along_vec_t *)a; + xcb = (sort_along_vec_t *)b; + + if (xca->xcproj < xcb->xcproj) + { + return -1; + } + else if (xca->xcproj > xcb->xcproj) + { + return 1; + } + else + { + return 0; + } +} + + +static void sort_collective_coordinates( + t_rotgrp *rotg, /* Rotation group */ + sort_along_vec_t *data) /* Buffer for sorting the positions */ +{ + int i; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* The projection of the position vector on the rotation vector is + * the relevant value for sorting. Fill the 'data' structure */ + for (i = 0; i < rotg->nat; i++) + { + data[i].xcproj = iprod(erg->xc[i], rotg->vec); /* sort criterium */ + data[i].m = erg->mc[i]; + data[i].ind = i; + copy_rvec(erg->xc[i], data[i].x ); + copy_rvec(rotg->x_ref[i], data[i].x_ref); + } + /* Sort the 'data' structure */ + gmx_qsort(data, rotg->nat, sizeof(sort_along_vec_t), projection_compare); + + /* Copy back the sorted values */ + for (i = 0; i < rotg->nat; i++) + { + copy_rvec(data[i].x, erg->xc[i] ); + copy_rvec(data[i].x_ref, erg->xc_ref_sorted[i]); + erg->mc_sorted[i] = data[i].m; + erg->xc_sortind[i] = data[i].ind; + } +} + + +/* For each slab, get the first and the last index of the sorted atom + * indices */ +static void get_firstlast_atom_per_slab(t_rotgrp *rotg) +{ + int i, islab, n; + real beta; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Find the first atom that needs to enter the calculation for each slab */ + n = erg->slab_first; /* slab */ + i = 0; /* start with the first atom */ + do + { + /* Find the first atom that significantly contributes to this slab */ + do /* move forward in position until a large enough beta is found */ + { + beta = calc_beta(erg->xc[i], rotg, n); + i++; + } + while ((beta < -erg->max_beta) && (i < rotg->nat)); + i--; + islab = n - erg->slab_first; /* slab index */ + erg->firstatom[islab] = i; + /* Proceed to the next slab */ + n++; + } + while (n <= erg->slab_last); + + /* Find the last atom for each slab */ + n = erg->slab_last; /* start with last slab */ + i = rotg->nat-1; /* start with the last atom */ + do + { + do /* move backward in position until a large enough beta is found */ + { + beta = calc_beta(erg->xc[i], rotg, n); + i--; + } + while ((beta > erg->max_beta) && (i > -1)); + i++; + islab = n - erg->slab_first; /* slab index */ + erg->lastatom[islab] = i; + /* Proceed to the next slab */ + n--; + } + while (n >= erg->slab_first); +} + + +/* Determine the very first and very last slab that needs to be considered + * For the first slab that needs to be considered, we have to find the smallest + * n that obeys: + * + * x_first * v - n*Delta_x <= beta_max + * + * slab index n, slab distance Delta_x, rotation vector v. For the last slab we + * have to find the largest n that obeys + * + * x_last * v - n*Delta_x >= -beta_max + * + */ +static gmx_inline int get_first_slab( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + real max_beta, /* The max_beta value, instead of min_gaussian */ + rvec firstatom) /* First atom after sorting along the rotation vector v */ +{ + /* Find the first slab for the first atom */ + return ceil((iprod(firstatom, rotg->vec) - max_beta)/rotg->slab_dist); +} + + +static gmx_inline int get_last_slab( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + real max_beta, /* The max_beta value, instead of min_gaussian */ + rvec lastatom) /* Last atom along v */ +{ + /* Find the last slab for the last atom */ + return floor((iprod(lastatom, rotg->vec) + max_beta)/rotg->slab_dist); +} + + +static void get_firstlast_slab_check( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + t_gmx_enfrotgrp *erg, /* The rotation group (data only accessible in this file) */ + rvec firstatom, /* First atom after sorting along the rotation vector v */ + rvec lastatom) /* Last atom along v */ +{ + erg->slab_first = get_first_slab(rotg, erg->max_beta, firstatom); + erg->slab_last = get_last_slab(rotg, erg->max_beta, lastatom); + + /* Calculate the slab buffer size, which changes when slab_first changes */ + erg->slab_buffer = erg->slab_first - erg->slab_first_ref; + + /* Check whether we have reference data to compare against */ + if (erg->slab_first < erg->slab_first_ref) + { + gmx_fatal(FARGS, "%s No reference data for first slab (n=%d), unable to proceed.", + RotStr, erg->slab_first); + } + + /* Check whether we have reference data to compare against */ + if (erg->slab_last > erg->slab_last_ref) + { + gmx_fatal(FARGS, "%s No reference data for last slab (n=%d), unable to proceed.", + RotStr, erg->slab_last); + } +} + + +/* Enforced rotation with a flexible axis */ +static void do_flexible( + gmx_bool bMaster, + gmx_enfrot_t enfrot, /* Other rotation data */ + t_rotgrp *rotg, /* The rotation group */ + int g, /* Group number */ + rvec x[], /* The local positions */ + matrix box, + double t, /* Time in picoseconds */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int l, nslabs; + real sigma; /* The Gaussian width sigma */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Define the sigma value */ + sigma = 0.7*rotg->slab_dist; + + /* Sort the collective coordinates erg->xc along the rotation vector. This is + * an optimization for the inner loop. */ + sort_collective_coordinates(rotg, enfrot->data); + + /* Determine the first relevant slab for the first atom and the last + * relevant slab for the last atom */ + get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1]); + + /* Determine for each slab depending on the min_gaussian cutoff criterium, + * a first and a last atom index inbetween stuff needs to be calculated */ + get_firstlast_atom_per_slab(rotg); + + /* Determine the gaussian-weighted center of positions for all slabs */ + get_slab_centers(rotg, erg->xc, erg->mc_sorted, g, t, enfrot->out_slabs, bOutstepSlab, FALSE); + + /* Clear the torque per slab from last time step: */ + nslabs = erg->slab_last - erg->slab_first + 1; + for (l = 0; l < nslabs; l++) + { + erg->slab_torque_v[l] = 0.0; + } + + /* Call the rotational forces kernel */ + if (rotg->eType == erotgFLEX || rotg->eType == erotgFLEXT) + { + erg->V = do_flex_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box); + } + else if (rotg->eType == erotgFLEX2 || rotg->eType == erotgFLEX2T) + { + erg->V = do_flex2_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box); + } + else + { + gmx_fatal(FARGS, "Unknown flexible rotation type"); + } + + /* Determine angle by RMSD fit to the reference - Let's hope this */ + /* only happens once in a while, since this is not parallelized! */ + if (bMaster && (erotgFitPOT != rotg->eFittype) ) + { + if (bOutstepRot) + { + /* Fit angle of the whole rotation group */ + erg->angle_v = flex_fit_angle(rotg); + } + if (bOutstepSlab) + { + /* Fit angle of each slab */ + flex_fit_angle_perslab(g, rotg, t, erg->degangle, enfrot->out_angles); + } + } + + /* Lump together the torques from all slabs: */ + erg->torque_v = 0.0; + for (l = 0; l < nslabs; l++) + { + erg->torque_v += erg->slab_torque_v[l]; + } +} + + +/* Calculate the angle between reference and actual rotation group atom, + * both projected into a plane perpendicular to the rotation vector: */ +static void angle(t_rotgrp *rotg, + rvec x_act, + rvec x_ref, + real *alpha, + real *weight) /* atoms near the rotation axis should count less than atoms far away */ +{ + rvec xp, xrp; /* current and reference positions projected on a plane perpendicular to pg->vec */ + rvec dum; + + + /* Project x_ref and x into a plane through the origin perpendicular to rot_vec: */ + /* Project x_ref: xrp = x_ref - (vec * x_ref) * vec */ + svmul(iprod(rotg->vec, x_ref), rotg->vec, dum); + rvec_sub(x_ref, dum, xrp); + /* Project x_act: */ + svmul(iprod(rotg->vec, x_act), rotg->vec, dum); + rvec_sub(x_act, dum, xp); + + /* Retrieve information about which vector precedes. gmx_angle always + * returns a positive angle. */ + cprod(xp, xrp, dum); /* if reference precedes, this is pointing into the same direction as vec */ + + if (iprod(rotg->vec, dum) >= 0) + { + *alpha = -gmx_angle(xrp, xp); + } + else + { + *alpha = +gmx_angle(xrp, xp); + } + + /* Also return the weight */ + *weight = norm(xp); +} + + +/* Project first vector onto a plane perpendicular to the second vector + * dr = dr - (dr.v)v + * Note that v must be of unit length. + */ +static gmx_inline void project_onto_plane(rvec dr, const rvec v) +{ + rvec tmp; + + + svmul(iprod(dr, v), v, tmp); /* tmp = (dr.v)v */ + rvec_dec(dr, tmp); /* dr = dr - (dr.v)v */ +} + + +/* Fixed rotation: The rotation reference group rotates around the v axis. */ +/* The atoms of the actual rotation group are attached with imaginary */ +/* springs to the reference atoms. */ +static void do_fixed( + t_rotgrp *rotg, /* The rotation group */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int ifit, j, jj, m; + rvec dr; + rvec tmp_f; /* Force */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xi_xc; /* xi - xc */ + gmx_bool bCalcPotFit; + rvec fit_xr_loc; + + /* for mass weighting: */ + real wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + real k_wi; /* k times wi */ + + gmx_bool bProject; + + + erg = rotg->enfrotgrp; + bProject = (rotg->eType == erotgPM) || (rotg->eType == erotgPMPF); + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Calculate (x_i-x_c) resp. (x_i-u) */ + rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xi_xc); + + /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */ + rvec_sub(erg->xr_loc[j], xi_xc, dr); + + if (bProject) + { + project_onto_plane(dr, rotg->vec); + } + + /* Mass-weighting */ + wi = N_M*erg->m_loc[j]; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + k_wi = rotg->k*wi; + for (m = 0; m < DIM; m++) + { + tmp_f[m] = k_wi*dr[m]; + erg->f_rot_loc[j][m] = tmp_f[m]; + erg->V += 0.5*k_wi*sqr(dr[m]); + } + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + jj = erg->xc_ref_ind[j]; + + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_xr_loc); /* fit_xr_loc = Omega*(y_i-y_c) */ + + /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */ + rvec_sub(fit_xr_loc, xi_xc, dr); + + if (bProject) + { + project_onto_plane(dr, rotg->vec); + } + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*k_wi*norm2(dr); + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xi_xc, erg->xr_loc[j], &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + /* If you want enforced rotation to contribute to the virial, + * activate the following lines: + if (MASTER(cr)) + { + Add the rotation contribution to the virial + for(j=0; jenfrotgrp; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Calculate (xj-u) */ + rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xj_u); /* xj_u = xj-u */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, erg->xr_loc[j], tmpvec); /* tmpvec = v x Omega.(yj0-u) */ + + /* * v x Omega.(yj0-u) */ + unitv(tmpvec, pj); /* pj = --------------------- */ + /* | v x Omega.(yj0-u) | */ + + fac = iprod(pj, xj_u); /* fac = pj.(xj-u) */ + fac2 = fac*fac; + + /* Mass-weighting */ + wj = N_M*erg->m_loc[j]; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*fac, pj, tmp_f); + copy_rvec(tmp_f, erg->f_rot_loc[j]); + sum += wj*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + jj = erg->xc_ref_ind[j]; + + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_tmpvec); /* fit_tmpvec = Omega*(yj0-u) */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, fit_tmpvec, tmpvec); /* tmpvec = v x Omega.(yj0-u) */ + /* * v x Omega.(yj0-u) */ + unitv(tmpvec, pj); /* pj = --------------------- */ + /* | v x Omega.(yj0-u) | */ + + fac = iprod(pj, xj_u); /* fac = pj.(xj-u) */ + fac2 = fac*fac; + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_u, erg->xr_loc[j], &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*sum; +} + + +/* Calculate the radial motion pivot-free potential and forces */ +static void do_radial_motion_pf( + t_rotgrp *rotg, /* The rotation group */ + rvec x[], /* The positions */ + matrix box, /* The simulation box */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int i, ii, iigrp, ifit, j; + rvec xj; /* Current position */ + rvec xj_xc; /* xj - xc */ + rvec yj0_yc0; /* yj0 - yc0 */ + rvec tmp_f; /* Force */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec tmpvec, tmpvec2; + rvec innersumvec; /* Precalculation of the inner sum */ + rvec innersumveckM; + real fac, fac2, V = 0.0; + rvec qi, qj; + gmx_bool bCalcPotFit; + + /* For mass weighting: */ + real mj, wi, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + + + erg = rotg->enfrotgrp; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Get the current center of the rotation group: */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + + /* Precalculate Sum_i [ wi qi.(xi-xc) qi ] which is needed for every single j */ + clear_rvec(innersumvec); + for (i = 0; i < rotg->nat; i++) + { + /* Mass-weighting */ + wi = N_M*erg->mc[i]; + + /* Calculate qi. Note that xc_ref_center has already been subtracted from + * x_ref in init_rot_group.*/ + mvmul(erg->rotmat, rotg->x_ref[i], tmpvec); /* tmpvec = Omega.(yi0-yc0) */ + + cprod(rotg->vec, tmpvec, tmpvec2); /* tmpvec2 = v x Omega.(yi0-yc0) */ + + /* * v x Omega.(yi0-yc0) */ + unitv(tmpvec2, qi); /* qi = ----------------------- */ + /* | v x Omega.(yi0-yc0) | */ + + rvec_sub(erg->xc[i], erg->xc_center, tmpvec); /* tmpvec = xi-xc */ + + svmul(wi*iprod(qi, tmpvec), qi, tmpvec2); + + rvec_inc(innersumvec, tmpvec2); + } + svmul(rotg->k*erg->invmass, innersumvec, innersumveckM); + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] */ + copy_rvec(x[ii], xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* The (unrotated) reference position is yj0. yc0 has already + * been subtracted in init_rot_group */ + copy_rvec(rotg->x_ref[iigrp], yj0_yc0); /* yj0_yc0 = yj0 - yc0 */ + + /* Calculate Omega.(yj0-yc0) */ + mvmul(erg->rotmat, yj0_yc0, tmpvec2); /* tmpvec2 = Omega.(yj0 - yc0) */ + + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x Omega.(yj0-yc0) */ + + /* * v x Omega.(yj0-yc0) */ + unitv(tmpvec, qj); /* qj = ----------------------- */ + /* | v x Omega.(yj0-yc0) | */ + + /* Calculate (xj-xc) */ + rvec_sub(xj, erg->xc_center, xj_xc); /* xj_xc = xj-xc */ + + fac = iprod(qj, xj_xc); /* fac = qj.(xj-xc) */ + fac2 = fac*fac; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*fac, qj, tmp_f); /* part 1 of force */ + svmul(mj, innersumveckM, tmpvec); /* part 2 of force */ + rvec_inc(tmp_f, tmpvec); + copy_rvec(tmp_f, erg->f_rot_loc[j]); + V += wj*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, tmpvec2); /* tmpvec2 = Omega*(yj0-yc0) */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x Omega.(yj0-yc0) */ + /* * v x Omega.(yj0-yc0) */ + unitv(tmpvec, qj); /* qj = ----------------------- */ + /* | v x Omega.(yj0-yc0) | */ + + fac = iprod(qj, xj_xc); /* fac = qj.(xj-xc) */ + fac2 = fac*fac; + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, xj, erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_xc, yj0_yc0, &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*V; +} + + +/* Precalculate the inner sum for the radial motion 2 forces */ +static void radial_motion2_precalc_inner_sum(t_rotgrp *rotg, rvec innersumvec) +{ + int i; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xi_xc; /* xj - xc */ + rvec tmpvec, tmpvec2; + real fac, fac2; + rvec ri, si; + real siri; + rvec v_xi_xc; /* v x (xj - u) */ + real psii, psiistar; + real wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + rvec sumvec; + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over the collective set of positions */ + clear_rvec(sumvec); + for (i = 0; i < rotg->nat; i++) + { + /* Mass-weighting */ + wi = N_M*erg->mc[i]; + + rvec_sub(erg->xc[i], erg->xc_center, xi_xc); /* xi_xc = xi-xc */ + + /* Calculate ri. Note that xc_ref_center has already been subtracted from + * x_ref in init_rot_group.*/ + mvmul(erg->rotmat, rotg->x_ref[i], ri); /* ri = Omega.(yi0-yc0) */ + + cprod(rotg->vec, xi_xc, v_xi_xc); /* v_xi_xc = v x (xi-u) */ + + fac = norm2(v_xi_xc); + /* * 1 */ + psiistar = 1.0/(fac + rotg->eps); /* psiistar = --------------------- */ + /* |v x (xi-xc)|^2 + eps */ + + psii = gmx_invsqrt(fac); /* 1 */ + /* psii = ------------- */ + /* |v x (xi-xc)| */ + + svmul(psii, v_xi_xc, si); /* si = psii * (v x (xi-xc) ) */ + + fac = iprod(v_xi_xc, ri); /* fac = (v x (xi-xc)).ri */ + fac2 = fac*fac; + + siri = iprod(si, ri); /* siri = si.ri */ + + svmul(psiistar/psii, ri, tmpvec); + svmul(psiistar*psiistar/(psii*psii*psii) * siri, si, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + cprod(tmpvec, rotg->vec, tmpvec2); + + svmul(wi*siri, tmpvec2, tmpvec); + + rvec_inc(sumvec, tmpvec); + } + svmul(rotg->k*erg->invmass, sumvec, innersumvec); +} + + +/* Calculate the radial motion 2 potential and forces */ +static void do_radial_motion2( + t_rotgrp *rotg, /* The rotation group */ + rvec x[], /* The positions */ + matrix box, /* The simulation box */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int ii, iigrp, ifit, j; + rvec xj; /* Position */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xj_u; /* xj - u */ + rvec yj0_yc0; /* yj0 -yc0 */ + rvec tmpvec, tmpvec2; + real fac, fit_fac, fac2, Vpart = 0.0; + rvec rj, fit_rj, sj; + real sjrj; + rvec v_xj_u; /* v x (xj - u) */ + real psij, psijstar; + real mj, wj; /* For mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_bool bPF; + rvec innersumvec; + gmx_bool bCalcPotFit; + + + erg = rotg->enfrotgrp; + + bPF = rotg->eType == erotgRM2PF; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + + clear_rvec(yj0_yc0); /* Make the compiler happy */ + + clear_rvec(innersumvec); + if (bPF) + { + /* For the pivot-free variant we have to use the current center of + * mass of the rotation group instead of the pivot u */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + + /* Also, we precalculate the second term of the forces that is identical + * (up to the weight factor mj) for all forces */ + radial_motion2_precalc_inner_sum(rotg, innersumvec); + } + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + if (bPF) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; + + /* Current position of this atom: x[ii] */ + copy_rvec(x[ii], xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* The (unrotated) reference position is yj0. yc0 has already + * been subtracted in init_rot_group */ + copy_rvec(rotg->x_ref[iigrp], yj0_yc0); /* yj0_yc0 = yj0 - yc0 */ + + /* Calculate Omega.(yj0-yc0) */ + mvmul(erg->rotmat, yj0_yc0, rj); /* rj = Omega.(yj0-yc0) */ + } + else + { + mj = erg->m_loc[j]; + copy_rvec(erg->x_loc_pbc[j], xj); + copy_rvec(erg->xr_loc[j], rj); /* rj = Omega.(yj0-u) */ + } + /* Mass-weighting */ + wj = N_M*mj; + + /* Calculate (xj-u) resp. (xj-xc) */ + rvec_sub(xj, erg->xc_center, xj_u); /* xj_u = xj-u */ + + cprod(rotg->vec, xj_u, v_xj_u); /* v_xj_u = v x (xj-u) */ + + fac = norm2(v_xj_u); + /* * 1 */ + psijstar = 1.0/(fac + rotg->eps); /* psistar = -------------------- */ + /* |v x (xj-u)|^2 + eps */ + + psij = gmx_invsqrt(fac); /* 1 */ + /* psij = ------------ */ + /* |v x (xj-u)| */ + + svmul(psij, v_xj_u, sj); /* sj = psij * (v x (xj-u) ) */ + + fac = iprod(v_xj_u, rj); /* fac = (v x (xj-u)).rj */ + fac2 = fac*fac; + + sjrj = iprod(sj, rj); /* sjrj = sj.rj */ + + svmul(psijstar/psij, rj, tmpvec); + svmul(psijstar*psijstar/(psij*psij*psij) * sjrj, sj, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + cprod(tmpvec, rotg->vec, tmpvec2); + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*sjrj, tmpvec2, tmpvec); + svmul(mj, innersumvec, tmpvec2); /* This is != 0 only for the pivot-free variant */ + + rvec_add(tmpvec2, tmpvec, erg->f_rot_loc[j]); + Vpart += wj*psijstar*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + if (bPF) + { + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, fit_rj); /* fit_rj = Omega.(yj0-yc0) */ + } + else + { + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[iigrp], fit_rj); /* fit_rj = Omega*(yj0-u) */ + } + fit_fac = iprod(v_xj_u, fit_rj); /* fac = (v x (xj-u)).fit_rj */ + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*psijstar*fit_fac*fit_fac; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, erg->f_rot_loc[j], xj, erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_u, rj, &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*Vpart; +} + + +/* Determine the smallest and largest position vector (with respect to the + * rotation vector) for the reference group */ +static void get_firstlast_atom_ref( + t_rotgrp *rotg, + int *firstindex, + int *lastindex) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int i; + real xcproj; /* The projection of a reference position on the + rotation vector */ + real minproj, maxproj; /* Smallest and largest projection on v */ + + + + erg = rotg->enfrotgrp; + + /* Start with some value */ + minproj = iprod(rotg->x_ref[0], rotg->vec); + maxproj = minproj; + + /* This is just to ensure that it still works if all the atoms of the + * reference structure are situated in a plane perpendicular to the rotation + * vector */ + *firstindex = 0; + *lastindex = rotg->nat-1; + + /* Loop over all atoms of the reference group, + * project them on the rotation vector to find the extremes */ + for (i = 0; i < rotg->nat; i++) + { + xcproj = iprod(rotg->x_ref[i], rotg->vec); + if (xcproj < minproj) + { + minproj = xcproj; + *firstindex = i; + } + if (xcproj > maxproj) + { + maxproj = xcproj; + *lastindex = i; + } + } +} + + +/* Allocate memory for the slabs */ +static void allocate_slabs( + t_rotgrp *rotg, + FILE *fplog, + int g, + gmx_bool bVerbose) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int i, nslabs; + + + erg = rotg->enfrotgrp; + + /* More slabs than are defined for the reference are never needed */ + nslabs = erg->slab_last_ref - erg->slab_first_ref + 1; + + /* Remember how many we allocated */ + erg->nslabs_alloc = nslabs; + + if ( (NULL != fplog) && bVerbose) + { + fprintf(fplog, "%s allocating memory to store data for %d slabs (rotation group %d).\n", + RotStr, nslabs, g); + } + snew(erg->slab_center, nslabs); + snew(erg->slab_center_ref, nslabs); + snew(erg->slab_weights, nslabs); + snew(erg->slab_torque_v, nslabs); + snew(erg->slab_data, nslabs); + snew(erg->gn_atom, nslabs); + snew(erg->gn_slabind, nslabs); + snew(erg->slab_innersumvec, nslabs); + for (i = 0; i < nslabs; i++) + { + snew(erg->slab_data[i].x, rotg->nat); + snew(erg->slab_data[i].ref, rotg->nat); + snew(erg->slab_data[i].weight, rotg->nat); + } + snew(erg->xc_ref_sorted, rotg->nat); + snew(erg->xc_sortind, rotg->nat); + snew(erg->firstatom, nslabs); + snew(erg->lastatom, nslabs); +} + + +/* From the extreme positions of the reference group, determine the first + * and last slab of the reference. We can never have more slabs in the real + * simulation than calculated here for the reference. + */ +static void get_firstlast_slab_ref(t_rotgrp *rotg, real mc[], int ref_firstindex, int ref_lastindex) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int first, last; + rvec dummy; + + + erg = rotg->enfrotgrp; + first = get_first_slab(rotg, erg->max_beta, rotg->x_ref[ref_firstindex]); + last = get_last_slab( rotg, erg->max_beta, rotg->x_ref[ref_lastindex ]); + + while (get_slab_weight(first, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN) + { + first--; + } + erg->slab_first_ref = first+1; + while (get_slab_weight(last, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN) + { + last++; + } + erg->slab_last_ref = last-1; +} + + +/* Special version of copy_rvec: + * During the copy procedure of xcurr to b, the correct PBC image is chosen + * such that the copied vector ends up near its reference position xref */ +static gmx_inline void copy_correct_pbc_image( + const rvec xcurr, /* copy vector xcurr ... */ + rvec b, /* ... to b ... */ + const rvec xref, /* choosing the PBC image such that b ends up near xref */ + matrix box, + int npbcdim) +{ + rvec dx; + int d, m; + ivec shift; + + + /* Shortest PBC distance between the atom and its reference */ + rvec_sub(xcurr, xref, dx); + + /* Determine the shift for this atom */ + clear_ivec(shift); + for (m = npbcdim-1; m >= 0; m--) + { + while (dx[m] < -0.5*box[m][m]) + { + for (d = 0; d < DIM; d++) + { + dx[d] += box[m][d]; + } + shift[m]++; + } + while (dx[m] >= 0.5*box[m][m]) + { + for (d = 0; d < DIM; d++) + { + dx[d] -= box[m][d]; + } + shift[m]--; + } + } + + /* Apply the shift to the position */ + copy_rvec(xcurr, b); + shift_single_coord(box, b, shift); +} + + +static void init_rot_group(FILE *fplog, t_commrec *cr, int g, t_rotgrp *rotg, + rvec *x, gmx_mtop_t *mtop, gmx_bool bVerbose, FILE *out_slabs, matrix box, + t_inputrec *ir, gmx_bool bOutputCenters) +{ + int i, ii; + rvec coord, xref, *xdum; + gmx_bool bFlex, bColl; + t_atom *atom; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int ref_firstindex, ref_lastindex; + gmx_mtop_atomlookup_t alook = NULL; + real mass, totalmass; + real start = 0.0; + double t_start; + + + /* Do we have a flexible axis? */ + bFlex = ISFLEX(rotg); + /* Do we use a global set of coordinates? */ + bColl = ISCOLL(rotg); + + erg = rotg->enfrotgrp; + + /* Allocate space for collective coordinates if needed */ + if (bColl) + { + snew(erg->xc, rotg->nat); + snew(erg->xc_shifts, rotg->nat); + snew(erg->xc_eshifts, rotg->nat); + snew(erg->xc_old, rotg->nat); + + if (rotg->eFittype == erotgFitNORM) + { + snew(erg->xc_ref_length, rotg->nat); /* in case fit type NORM is chosen */ + snew(erg->xc_norm, rotg->nat); + } + } + else + { + snew(erg->xr_loc, rotg->nat); + snew(erg->x_loc_pbc, rotg->nat); + } + + snew(erg->f_rot_loc, rotg->nat); + snew(erg->xc_ref_ind, rotg->nat); + + /* Make space for the calculation of the potential at other angles (used + * for fitting only) */ + if (erotgFitPOT == rotg->eFittype) + { + snew(erg->PotAngleFit, 1); + snew(erg->PotAngleFit->degangle, rotg->PotAngle_nstep); + snew(erg->PotAngleFit->V, rotg->PotAngle_nstep); + snew(erg->PotAngleFit->rotmat, rotg->PotAngle_nstep); + + /* Get the set of angles around the reference angle */ + start = -0.5 * (rotg->PotAngle_nstep - 1)*rotg->PotAngle_step; + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + erg->PotAngleFit->degangle[i] = start + i*rotg->PotAngle_step; + } + } + else + { + erg->PotAngleFit = NULL; + } + + /* xc_ref_ind needs to be set to identity in the serial case */ + if (!PAR(cr)) + { + for (i = 0; i < rotg->nat; i++) + { + erg->xc_ref_ind[i] = i; + } + } + + /* Copy the masses so that the center can be determined. For all types of + * enforced rotation, we store the masses in the erg->mc array. */ + if (rotg->bMassW) + { + alook = gmx_mtop_atomlookup_init(mtop); + } + snew(erg->mc, rotg->nat); + if (bFlex) + { + snew(erg->mc_sorted, rotg->nat); + } + if (!bColl) + { + snew(erg->m_loc, rotg->nat); + } + totalmass = 0.0; + for (i = 0; i < rotg->nat; i++) + { + if (rotg->bMassW) + { + gmx_mtop_atomnr_to_atom(alook, rotg->ind[i], &atom); + mass = atom->m; + } + else + { + mass = 1.0; + } + erg->mc[i] = mass; + totalmass += mass; + } + erg->invmass = 1.0/totalmass; + + if (rotg->bMassW) + { + gmx_mtop_atomlookup_destroy(alook); + } + + /* Set xc_ref_center for any rotation potential */ + if ((rotg->eType == erotgISO) || (rotg->eType == erotgPM) || (rotg->eType == erotgRM) || (rotg->eType == erotgRM2)) + { + /* Set the pivot point for the fixed, stationary-axis potentials. This + * won't change during the simulation */ + copy_rvec(rotg->pivot, erg->xc_ref_center); + copy_rvec(rotg->pivot, erg->xc_center ); + } + else + { + /* Center of the reference positions */ + get_center(rotg->x_ref, erg->mc, rotg->nat, erg->xc_ref_center); + + /* Center of the actual positions */ + if (MASTER(cr)) + { + snew(xdum, rotg->nat); + for (i = 0; i < rotg->nat; i++) + { + ii = rotg->ind[i]; + copy_rvec(x[ii], xdum[i]); + } + get_center(xdum, erg->mc, rotg->nat, erg->xc_center); + sfree(xdum); + } +#ifdef GMX_MPI + if (PAR(cr)) + { + gmx_bcast(sizeof(erg->xc_center), erg->xc_center, cr); + } +#endif + } + + if (bColl) + { + /* Save the original (whole) set of positions in xc_old such that at later + * steps the rotation group can always be made whole again. If the simulation is + * restarted, we compute the starting reference positions (given the time) + * and assume that the correct PBC image of each position is the one nearest + * to the current reference */ + if (MASTER(cr)) + { + /* Calculate the rotation matrix for this angle: */ + t_start = ir->init_t + ir->init_step*ir->delta_t; + erg->degangle = rotg->rate * t_start; + calc_rotmat(rotg->vec, erg->degangle, erg->rotmat); + + for (i = 0; i < rotg->nat; i++) + { + ii = rotg->ind[i]; + + /* Subtract pivot, rotate, and add pivot again. This will yield the + * reference position for time t */ + rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord); + mvmul(erg->rotmat, coord, xref); + rvec_inc(xref, erg->xc_ref_center); + + copy_correct_pbc_image(x[ii], erg->xc_old[i], xref, box, 3); + } + } +#ifdef GMX_MPI + if (PAR(cr)) + { + gmx_bcast(rotg->nat*sizeof(erg->xc_old[0]), erg->xc_old, cr); + } +#endif + } + + if ( (rotg->eType != erotgFLEX) && (rotg->eType != erotgFLEX2) ) + { + /* Put the reference positions into origin: */ + for (i = 0; i < rotg->nat; i++) + { + rvec_dec(rotg->x_ref[i], erg->xc_ref_center); + } + } + + /* Enforced rotation with flexible axis */ + if (bFlex) + { + /* Calculate maximum beta value from minimum gaussian (performance opt.) */ + erg->max_beta = calc_beta_max(rotg->min_gaussian, rotg->slab_dist); + + /* Determine the smallest and largest coordinate with respect to the rotation vector */ + get_firstlast_atom_ref(rotg, &ref_firstindex, &ref_lastindex); + + /* From the extreme positions of the reference group, determine the first + * and last slab of the reference. */ + get_firstlast_slab_ref(rotg, erg->mc, ref_firstindex, ref_lastindex); + + /* Allocate memory for the slabs */ + allocate_slabs(rotg, fplog, g, bVerbose); + + /* Flexible rotation: determine the reference centers for the rest of the simulation */ + erg->slab_first = erg->slab_first_ref; + erg->slab_last = erg->slab_last_ref; + get_slab_centers(rotg, rotg->x_ref, erg->mc, g, -1, out_slabs, bOutputCenters, TRUE); + + /* Length of each x_rotref vector from center (needed if fit routine NORM is chosen): */ + if (rotg->eFittype == erotgFitNORM) + { + for (i = 0; i < rotg->nat; i++) + { + rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord); + erg->xc_ref_length[i] = norm(coord); + } + } + } +} + + +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd, t_rot *rot) +{ + gmx_ga2la_t ga2la; + int g; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + ga2la = dd->ga2la; + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + + dd_make_local_group_indices(ga2la, rotg->nat, rotg->ind, + &erg->nat_loc, &erg->ind_loc, &erg->nalloc_loc, erg->xc_ref_ind); + } +} + + +/* Calculate the size of the MPI buffer needed in reduce_output() */ +static int calc_mpi_bufsize(t_rot *rot) +{ + int g; + int count_group, count_total; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + count_total = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Count the items that are transferred for this group: */ + count_group = 4; /* V, torque, angle, weight */ + + /* Add the maximum number of slabs for flexible groups */ + if (ISFLEX(rotg)) + { + count_group += erg->slab_last_ref - erg->slab_first_ref + 1; + } + + /* Add space for the potentials at different angles: */ + if (erotgFitPOT == rotg->eFittype) + { + count_group += rotg->PotAngle_nstep; + } + + /* Add to the total number: */ + count_total += count_group; + } + + return count_total; +} + + +extern void init_rot(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[], + t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv, + gmx_bool bVerbose, unsigned long Flags) +{ + t_rot *rot; + t_rotgrp *rotg; + int g; + int nat_max = 0; /* Size of biggest rotation group */ + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec *x_pbc = NULL; /* Space for the pbc-correct atom positions */ + + + if (MASTER(cr) && bVerbose) + { + fprintf(stdout, "%s Initializing ...\n", RotStr); + } + + rot = ir->rot; + snew(rot->enfrot, 1); + er = rot->enfrot; + er->Flags = Flags; + + /* When appending, skip first output to avoid duplicate entries in the data files */ + if (er->Flags & MD_APPENDFILES) + { + er->bOut = FALSE; + } + else + { + er->bOut = TRUE; + } + + if (MASTER(cr) && er->bOut) + { + please_cite(fplog, "Kutzner2011"); + } + + /* Output every step for reruns */ + if (er->Flags & MD_RERUN) + { + if (NULL != fplog) + { + fprintf(fplog, "%s rerun - will write rotation output every available step.\n", RotStr); + } + rot->nstrout = 1; + rot->nstsout = 1; + } + + er->out_slabs = NULL; + if (MASTER(cr) && HaveFlexibleGroups(rot) ) + { + er->out_slabs = open_slab_out(opt2fn("-rs", nfile, fnm), rot); + } + + if (MASTER(cr)) + { + /* Remove pbc, make molecule whole. + * When ir->bContinuation=TRUE this has already been done, but ok. */ + snew(x_pbc, mtop->natoms); + m_rveccopy(mtop->natoms, x, x_pbc); + do_pbc_first_mtop(NULL, ir->ePBC, box, mtop, x_pbc); + /* All molecules will be whole now, but not necessarily in the home box. + * Additionally, if a rotation group consists of more than one molecule + * (e.g. two strands of DNA), each one of them can end up in a different + * periodic box. This is taken care of in init_rot_group. */ + } + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + + if (NULL != fplog) + { + fprintf(fplog, "%s group %d type '%s'\n", RotStr, g, erotg_names[rotg->eType]); + } + + if (rotg->nat > 0) + { + /* Allocate space for the rotation group's data: */ + snew(rotg->enfrotgrp, 1); + erg = rotg->enfrotgrp; + + nat_max = max(nat_max, rotg->nat); + + if (PAR(cr)) + { + erg->nat_loc = 0; + erg->nalloc_loc = 0; + erg->ind_loc = NULL; + } + else + { + erg->nat_loc = rotg->nat; + erg->ind_loc = rotg->ind; + } + init_rot_group(fplog, cr, g, rotg, x_pbc, mtop, bVerbose, er->out_slabs, box, ir, + !(er->Flags & MD_APPENDFILES) ); /* Do not output the reference centers + * again if we are appending */ + } + } + + /* Allocate space for enforced rotation buffer variables */ + er->bufsize = nat_max; + snew(er->data, nat_max); + snew(er->xbuf, nat_max); + snew(er->mbuf, nat_max); + + /* Buffers for MPI reducing torques, angles, weights (for each group), and V */ + if (PAR(cr)) + { + er->mpi_bufsize = calc_mpi_bufsize(rot) + 100; /* larger to catch errors */ + snew(er->mpi_inbuf, er->mpi_bufsize); + snew(er->mpi_outbuf, er->mpi_bufsize); + } + else + { + er->mpi_bufsize = 0; + er->mpi_inbuf = NULL; + er->mpi_outbuf = NULL; + } + + /* Only do I/O on the MASTER */ + er->out_angles = NULL; + er->out_rot = NULL; + er->out_torque = NULL; + if (MASTER(cr)) + { + er->out_rot = open_rot_out(opt2fn("-ro", nfile, fnm), rot, oenv); + + if (rot->nstsout > 0) + { + if (HaveFlexibleGroups(rot) || HavePotFitGroups(rot) ) + { + er->out_angles = open_angles_out(opt2fn("-ra", nfile, fnm), rot); + } + if (HaveFlexibleGroups(rot) ) + { + er->out_torque = open_torque_out(opt2fn("-rt", nfile, fnm), rot); + } + } + + sfree(x_pbc); + } +} + + +extern void finish_rot(t_rot *rot) +{ + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + + + er = rot->enfrot; + if (er->out_rot) + { + gmx_fio_fclose(er->out_rot); + } + if (er->out_slabs) + { + gmx_fio_fclose(er->out_slabs); + } + if (er->out_angles) + { + gmx_fio_fclose(er->out_angles); + } + if (er->out_torque) + { + gmx_fio_fclose(er->out_torque); + } +} + + +/* Rotate the local reference positions and store them in + * erg->xr_loc[0...(nat_loc-1)] + * + * Note that we already subtracted u or y_c from the reference positions + * in init_rot_group(). + */ +static void rotate_local_reference(t_rotgrp *rotg) +{ + gmx_enfrotgrp_t erg; + int i, ii; + + + erg = rotg->enfrotgrp; + + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + ii = erg->xc_ref_ind[i]; + /* Rotate */ + mvmul(erg->rotmat, rotg->x_ref[ii], erg->xr_loc[i]); + } +} + + +/* Select the PBC representation for each local x position and store that + * for later usage. We assume the right PBC image of an x is the one nearest to + * its rotated reference */ +static void choose_pbc_image(rvec x[], t_rotgrp *rotg, matrix box, int npbcdim) +{ + int i, ii; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xref; + + + erg = rotg->enfrotgrp; + + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of a rotation group atom */ + ii = erg->ind_loc[i]; + + /* Get the correctly rotated reference position. The pivot was already + * subtracted in init_rot_group() from the reference positions. Also, + * the reference positions have already been rotated in + * rotate_local_reference(). For the current reference position we thus + * only need to add the pivot again. */ + copy_rvec(erg->xr_loc[i], xref); + rvec_inc(xref, erg->xc_ref_center); + + copy_correct_pbc_image(x[ii], erg->x_loc_pbc[i], xref, box, npbcdim); + } +} + + +extern void do_rotation( + t_commrec *cr, + t_inputrec *ir, + matrix box, + rvec x[], + real t, + gmx_int64_t step, + gmx_wallcycle_t wcycle, + gmx_bool bNS) +{ + int g, i, ii; + t_rot *rot; + t_rotgrp *rotg; + gmx_bool outstep_slab, outstep_rot; + gmx_bool bFlex, bColl; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec transvec; + t_gmx_potfit *fit = NULL; /* For fit type 'potential' determine the fit + angle via the potential minimum */ + + /* Enforced rotation cycle counting: */ + gmx_cycles_t cycles_comp; /* Cycles for the enf. rotation computation + only, does not count communication. This + counter is used for load-balancing */ + +#ifdef TAKETIME + double t0; +#endif + + rot = ir->rot; + er = rot->enfrot; + + /* When to output in main rotation output file */ + outstep_rot = do_per_step(step, rot->nstrout) && er->bOut; + /* When to output per-slab data */ + outstep_slab = do_per_step(step, rot->nstsout) && er->bOut; + + /* Output time into rotation output file */ + if (outstep_rot && MASTER(cr)) + { + fprintf(er->out_rot, "%12.3e", t); + } + + /**************************************************************************/ + /* First do ALL the communication! */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Do we have a flexible axis? */ + bFlex = ISFLEX(rotg); + /* Do we use a collective (global) set of coordinates? */ + bColl = ISCOLL(rotg); + + /* Calculate the rotation matrix for this angle: */ + erg->degangle = rotg->rate * t; + calc_rotmat(rotg->vec, erg->degangle, erg->rotmat); + + if (bColl) + { + /* Transfer the rotation group's positions such that every node has + * all of them. Every node contributes its local positions x and stores + * it in the collective erg->xc array. */ + communicate_group_positions(cr, erg->xc, erg->xc_shifts, erg->xc_eshifts, bNS, + x, rotg->nat, erg->nat_loc, erg->ind_loc, erg->xc_ref_ind, erg->xc_old, box); + } + else + { + /* Fill the local masses array; + * this array changes in DD/neighborsearching steps */ + if (bNS) + { + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of local atom w.r.t. the collective rotation group */ + ii = erg->xc_ref_ind[i]; + erg->m_loc[i] = erg->mc[ii]; + } + } + + /* Calculate Omega*(y_i-y_c) for the local positions */ + rotate_local_reference(rotg); + + /* Choose the nearest PBC images of the group atoms with respect + * to the rotated reference positions */ + choose_pbc_image(x, rotg, box, 3); + + /* Get the center of the rotation group */ + if ( (rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) ) + { + get_center_comm(cr, erg->x_loc_pbc, erg->m_loc, erg->nat_loc, rotg->nat, erg->xc_center); + } + } + + } /* End of loop over rotation groups */ + + /**************************************************************************/ + /* Done communicating, we can start to count cycles for the load balancing now ... */ + cycles_comp = gmx_cycles_read(); + + +#ifdef TAKETIME + t0 = MPI_Wtime(); +#endif + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + bFlex = ISFLEX(rotg); + bColl = ISCOLL(rotg); + + if (outstep_rot && MASTER(cr)) + { + fprintf(er->out_rot, "%12.4f", erg->degangle); + } + + /* Calculate angles and rotation matrices for potential fitting: */ + if ( (outstep_rot || outstep_slab) && (erotgFitPOT == rotg->eFittype) ) + { + fit = erg->PotAngleFit; + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + calc_rotmat(rotg->vec, erg->degangle + fit->degangle[i], fit->rotmat[i]); + + /* Clear value from last step */ + erg->PotAngleFit->V[i] = 0.0; + } + } + + /* Clear values from last time step */ + erg->V = 0.0; + erg->torque_v = 0.0; + erg->angle_v = 0.0; + erg->weight_v = 0.0; + + switch (rotg->eType) + { + case erotgISO: + case erotgISOPF: + case erotgPM: + case erotgPMPF: + do_fixed(rotg, outstep_rot, outstep_slab); + break; + case erotgRM: + do_radial_motion(rotg, outstep_rot, outstep_slab); + break; + case erotgRMPF: + do_radial_motion_pf(rotg, x, box, outstep_rot, outstep_slab); + break; + case erotgRM2: + case erotgRM2PF: + do_radial_motion2(rotg, x, box, outstep_rot, outstep_slab); + break; + case erotgFLEXT: + case erotgFLEX2T: + /* Subtract the center of the rotation group from the collective positions array + * Also store the center in erg->xc_center since it needs to be subtracted + * in the low level routines from the local coordinates as well */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + svmul(-1.0, erg->xc_center, transvec); + translate_x(erg->xc, rotg->nat, transvec); + do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab); + break; + case erotgFLEX: + case erotgFLEX2: + /* Do NOT subtract the center of mass in the low level routines! */ + clear_rvec(erg->xc_center); + do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab); + break; + default: + gmx_fatal(FARGS, "No such rotation potential."); + break; + } + } + +#ifdef TAKETIME + if (MASTER(cr)) + { + fprintf(stderr, "%s calculation (step %d) took %g seconds.\n", RotStr, step, MPI_Wtime()-t0); + } +#endif + + /* Stop the enforced rotation cycle counter and add the computation-only + * cycles to the force cycles for load balancing */ + cycles_comp = gmx_cycles_read() - cycles_comp; + + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles_comp, ddCyclF); + } +}