From: Mark Abraham Date: Wed, 1 Oct 2014 20:02:02 +0000 (+0200) Subject: Merge release-5-0 into master X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?p=alexxy%2Fgromacs.git;a=commitdiff_plain;h=19d3c2e5d0c401eb59010960d11a18b6ba2c54c6 Merge release-5-0 into master Conflicts: CMakeLists.txt Version numbers not bumped; fixed to use the right name for RelWithDebInfo. cmake/gmxCFlags.cmake Fixed to use the right name for RelWithDebInfo. src/gromacs/listed-forces/bonded.cpp New RB SIMD function in bonded.cpp had unused variables, now eliminated src/gromacs/mdlib/domdec.cpp Bug fixes from release-5-0 incorporated. std::max now used in code newly arrived from release-5-0. md.cpp had no conflict, but fr->nbv->bUseGPU had to be replaced by use_GPU(fr->nbv) to work in master branch. Change-Id: I65326b691745111fbdaa9435be6c92fa1acf6e7d --- 19d3c2e5d0c401eb59010960d11a18b6ba2c54c6 diff --cc CMakeLists.txt index 70185949c4,53498532af..7fbe947522 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -81,10 -128,34 +81,10 @@@ if(CMAKE_CONFIGURATION_TYPES "List of configuration types" FORCE) endif() - set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE) -set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL) ++set(build_types_with_explicit_flags RELEASE DEBUG RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE) -enable_language(C) -enable_language(CXX) set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) -set(CPACK_PACKAGE_NAME "gromacs") -set(CPACK_PACKAGE_VERSION ${PROJECT_VERSION}) -set(CPACK_SOURCE_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}") -set(CPACK_PACKAGE_VENDOR "gromacs.org") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Gromacs - a toolkit for high-performance molecular simulation") -set(CPACK_RESOURCE_FILE_WELCOME "${CMAKE_SOURCE_DIR}/admin/InstallWelcome.txt") -# Its GPL/LGPL, so they do not have to agree to a license for mere usage, but some installers require this... -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING") -set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/admin/InstallInfo.txt") -set(CPACK_SOURCE_IGNORE_FILES "\\\\.isreposource$;\\\\.git/;\\\\.gitignore$;\\\\.gitattributes;") -set(CPACK_PROJECT_CONFIG_FILE "${CMAKE_SOURCE_DIR}/CPackInit.cmake") -# CPack source archives include only the directories we list here. -# This variable is a list of pairs of names of source and destination -# directories. Most of these are used for content GROMACS generates as -# part of the configuration or build. -set(CPACK_SOURCE_INSTALLED_DIRECTORIES "${CMAKE_SOURCE_DIR};/;${CMAKE_BINARY_DIR}/src/programs/completion;src/programs/completion;${CMAKE_BINARY_DIR}/docs/man/man1;docs/man/man1;${CMAKE_BINARY_DIR}/docs/man/man7;docs/man/man7;${CMAKE_BINARY_DIR}/docs/old-html/final;docs/old-html/final;${CMAKE_BINARY_DIR}/docs/install-guide/final;/") -set(CPACK_PACKAGE_CONTACT "gmx-users@gromacs.org") -set(CPACK_GMX_BUILD_HELP "${GMX_BUILD_HELP}") #Works even though GMX_BUILD_HELP is defined later because it is off by default. - -#must come after all cpack settings! -include(CPack) - # Set a default valgrind suppression file. # This unfortunately needs to duplicate information from CTest to work as # expected... diff --cc cmake/gmxCFlags.cmake index 011cada36e,124107b65c..3a0e3bbe15 --- a/cmake/gmxCFlags.cmake +++ b/cmake/gmxCFlags.cmake @@@ -64,7 -64,7 +64,7 @@@ function(gmx_set_cmake_compiler_flags # be set up elsewhere and passed to this function, but it is # inconvenient in CMake to pass more than one list, and such a # list is only used here. - foreach(build_type RELWITHDEBUGINFO RELWITHASSERT MINSIZEREL PROFILE) - foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL) ++ foreach(build_type RELWITHDEBINFO RELWITHASSERT MINSIZEREL PROFILE) set(GMXC_${language}FLAGS_${build_type} "${GMXC_${language}FLAGS_RELEASE}") endforeach() # Copy the flags that are only used by the real Release build diff --cc src/gromacs/listed-forces/bonded.cpp index b1ded380a4,0eb8d55791..dfb846d9e7 --- a/src/gromacs/listed-forces/bonded.cpp +++ b/src/gromacs/listed-forces/bonded.cpp @@@ -2117,6 -2103,155 +2117,152 @@@ pdihs_noener_simd(int nbonds } } + /* This is mostly a copy of pdihs_noener_simd above, but with using + * the RB potential instead of a harmonic potential. + * This function can replace rbdihs() when no energy and virial are needed. + */ + static void + rbdihs_noener_simd(int nbonds, + const t_iatom forceatoms[], const t_iparams forceparams[], + const rvec x[], rvec f[], + const t_pbc *pbc, const t_graph gmx_unused *g, + real gmx_unused lambda, + const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd, + int gmx_unused *global_atom_index) + { + const int nfa1 = 5; + int i, iu, s, j; + int type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH]; - real ddphi; + real dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr; + real buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf; - real *parm, *phi, *p, *q, *sf_i, *msf_l; ++ real *parm, *p, *q; + + gmx_simd_real_t phi_S; + gmx_simd_real_t ddphi_S, cosfac_S; + gmx_simd_real_t mx_S, my_S, mz_S; + gmx_simd_real_t nx_S, ny_S, nz_S; + gmx_simd_real_t nrkj_m2_S, nrkj_n2_S; + gmx_simd_real_t parm_S, c_S; + gmx_simd_real_t sin_S, cos_S; + gmx_simd_real_t sf_i_S, msf_l_S; + pbc_simd_t pbc_simd; + + gmx_simd_real_t pi_S = gmx_simd_set1_r(M_PI); + gmx_simd_real_t one_S = gmx_simd_set1_r(1.0); + + /* Ensure SIMD register alignment */ + dr = gmx_simd_align_r(dr_array); + buf = gmx_simd_align_r(buf_array); + + /* Extract aligned pointer for parameters and variables */ + parm = buf; + p = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH; + q = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH; - sf_i = buf + (NR_RBDIHS + 2)*GMX_SIMD_REAL_WIDTH; - msf_l = buf + (NR_RBDIHS + 3)*GMX_SIMD_REAL_WIDTH; + + set_pbc_simd(pbc, &pbc_simd); + + /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */ + for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1) + { + /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals. + * iu indexes into forceatoms, we should not let iu go beyond nbonds. + */ + iu = i; + for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++) + { + type = forceatoms[iu]; + ai[s] = forceatoms[iu+1]; + aj[s] = forceatoms[iu+2]; + ak[s] = forceatoms[iu+3]; + al[s] = forceatoms[iu+4]; + + /* We don't need the first parameter, since that's a constant + * which only affects the energies, not the forces. + */ + for (j = 1; j < NR_RBDIHS; j++) + { + parm[j*GMX_SIMD_REAL_WIDTH + s] = + forceparams[type].rbdihs.rbcA[j]; + } + + /* At the end fill the arrays with identical entries */ + if (iu + nfa1 < nbonds) + { + iu += nfa1; + } + } + + /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */ + dih_angle_simd(x, ai, aj, ak, al, &pbc_simd, + dr, + &phi_S, + &mx_S, &my_S, &mz_S, + &nx_S, &ny_S, &nz_S, + &nrkj_m2_S, + &nrkj_n2_S, + p, q); + + /* Change to polymer convention */ + phi_S = gmx_simd_sub_r(phi_S, pi_S); + + gmx_simd_sincos_r(phi_S, &sin_S, &cos_S); + + ddphi_S = gmx_simd_setzero_r(); + c_S = one_S; + cosfac_S = one_S; + for (j = 1; j < NR_RBDIHS; j++) + { + parm_S = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH); + ddphi_S = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S); + cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S); + c_S = gmx_simd_add_r(c_S, one_S); + } + + /* Note that here we do not use the minus sign which is present + * in the normal RB code. This is corrected for through (m)sf below. + */ + ddphi_S = gmx_simd_mul_r(ddphi_S, sin_S); + + sf_i_S = gmx_simd_mul_r(ddphi_S, nrkj_m2_S); + msf_l_S = gmx_simd_mul_r(ddphi_S, nrkj_n2_S); + + /* After this m?_S will contain f[i] */ + mx_S = gmx_simd_mul_r(sf_i_S, mx_S); + my_S = gmx_simd_mul_r(sf_i_S, my_S); + mz_S = gmx_simd_mul_r(sf_i_S, mz_S); + + /* After this m?_S will contain -f[l] */ + nx_S = gmx_simd_mul_r(msf_l_S, nx_S); + ny_S = gmx_simd_mul_r(msf_l_S, ny_S); + nz_S = gmx_simd_mul_r(msf_l_S, nz_S); + + gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S); + gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S); + gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S); + gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S); + gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S); + gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S); + + iu = i; + s = 0; + do + { + do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s], + p[s], q[s], + dr[ XX *GMX_SIMD_REAL_WIDTH+s], + dr[ YY *GMX_SIMD_REAL_WIDTH+s], + dr[ ZZ *GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s], + dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s], + f); + s++; + iu += nfa1; + } + while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds); + } + } + #endif /* GMX_SIMD_HAVE_REAL */ diff --cc src/gromacs/mdlib/domdec.cpp index 13934d5fa6,6cc6b73895..06f2602916 --- a/src/gromacs/mdlib/domdec.cpp +++ b/src/gromacs/mdlib/domdec.cpp @@@ -6739,6 -6754,13 +6748,13 @@@ gmx_domdec_t *init_domain_decomposition comm->cellsize_limit = 0; comm->bBondComm = FALSE; + /* Atoms should be able to move by up to half the list buffer size (if > 0) + * within nstlist steps. Since boundaries are allowed to displace by half + * a cell size, DD cells should be at least the size of the list buffer. + */ - comm->cellsize_limit = max(comm->cellsize_limit, - ir->rlistlong - max(ir->rvdw, ir->rcoulomb)); ++ comm->cellsize_limit = std::max(comm->cellsize_limit, ++ ir->rlistlong - std::max(ir->rvdw, ir->rcoulomb)); + if (comm->bInterCGBondeds) { if (comm_distance_min > 0) @@@ -9332,9 -9374,10 +9362,9 @@@ void dd_partition_system(FIL * and every 100 partitionings, * so the extra communication cost is negligible. */ - n = std::max(100, nstglobalcomm); + const int nddp_chk_dlb = 100; - bCheckDLB = (comm->n_load_collect == 0 || - comm->n_load_have % n == n-1); + comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1); } else { diff --cc src/programs/mdrun/md.cpp index 9864f8789f,3d98d597c7..0504150c00 --- a/src/programs/mdrun/md.cpp +++ b/src/programs/mdrun/md.cpp @@@ -1881,6 -1909,21 +1881,21 @@@ double do_md(FILE *fplog, t_commrec *cr } dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + if (bPMETuneRunning && - fr->nbv->bUseGPU && DOMAINDECOMP(cr) && ++ use_GPU(fr->nbv) && DOMAINDECOMP(cr) && + !(cr->duty & DUTY_PME)) + { + /* Lock DLB=auto to off (does nothing when DLB=yes/no). + * With GPUs + separate PME ranks, we don't want DLB. + * This could happen when we scan coarse grids and + * it would then never be turned off again. + * This would hurt performance at the final, optimal + * grid spacing, where DLB almost never helps. + * Also, DLB can limit the cut-off for PME tuning. + */ + dd_dlb_set_lock(cr->dd, TRUE); + } + if (bPMETuneRunning || step_rel > ir->nstlist*50) { bPMETuneTry = FALSE;