endif()
# detect GPUs in the build host machine
-if (GMX_GPU OR GMX_GPU_AUTO AND NOT GMX_GPU_DETECTION_DONE)
+if ((GMX_GPU OR GMX_GPU_AUTO) AND NOT GMX_GPU_DETECTION_DONE)
include(gmxDetectGpu)
gmx_detect_gpu()
endif()
# We need to call find_package even when we've already done the detection/setup
if(GMX_GPU OR GMX_GPU_AUTO)
- if(NOT GMX_GPU AND GMX_GPU_AUTO AND GMX_GPU_DETECTION_DONE)
+ if(NOT GMX_GPU AND NOT GMX_DETECT_GPU_AVAILABLE)
# Stay quiet when detection has occured and found no GPU.
# Noise is acceptable when there is a GPU or the user required one.
set(FIND_CUDA_QUIETLY QUIET)
# - ON , FALSE: The user requested GPU builds, will require CUDA and will fail
# if it is not available.
# - ON , TRUE : Can't happen (GMX_GPU=ON can only be user-set at this point)
-if(GMX_GPU OR GMX_GPU_AUTO AND NOT GMX_GPU_DETECTION_DONE)
+if((GMX_GPU OR GMX_GPU_AUTO) AND NOT GMX_GPU_DETECTION_DONE)
if (EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
set(CUDA_FOUND TRUE CACHE INTERNAL "Whether the CUDA toolkit was found" FORCE)
else()
# user turns GMX_GPU=OFF after a failed cmake pass, these variables will be
# left behind in the cache.
mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_SDK_ROOT_DIR CUDA_VERBOSE_BUILD)
+if(NOT GMX_GPU)
+ mark_as_advanced(CUDA_TOOLKIT_ROOT_DIR)
+endif()
macro(gmx_gpu_setup)
# set up nvcc options
}
else
{
- sprintf(sbuf, "%d GPU%s %sselected to be used for this run: ",
+ sprintf(sbuf, "%d GPU%s %sselected for this run: ",
ngpu, (ngpu > 1) ? "s" : "",
gpu_info->bUserSet ? "user-" : "auto-");
for (i = 0; i < ngpu; i++)
#endif
/* inform the user about the settings */
- if (SIMMASTER(cr) && bOMP)
+ if (bOMP)
{
#ifdef GMX_THREAD_MPI
const char *mpi_str="per tMPI thread";
/* for group scheme we print PME threads info only */
if (bFullOmpSupport)
{
- fprintf(stderr, "Using %d OpenMP thread%s %s\n",
- modth.gnth,modth.gnth > 1 ? "s" : "",
- cr->nnodes > 1 ? mpi_str : "");
+ md_print_info(cr, fplog, "Using %d OpenMP thread%s %s\n",
+ modth.gnth,modth.gnth > 1 ? "s" : "",
+ cr->nnodes > 1 ? mpi_str : "");
}
if (bSepPME && modth.gnth_pme != modth.gnth)
{
- fprintf(stderr, "Using %d OpenMP thread%s %s for PME\n",
- modth.gnth_pme,modth.gnth_pme > 1 ? "s" : "",
- cr->nnodes > 1 ? mpi_str : "");
+ md_print_info(cr, fplog, "Using %d OpenMP thread%s %s for PME\n",
+ modth.gnth_pme,modth.gnth_pme > 1 ? "s" : "",
+ cr->nnodes > 1 ? mpi_str : "");
}
}
t_complex ***t;
int i,j;
- snew(t,x);
t = (t_complex ***)calloc(x,sizeof(t_complex**));
if(!t) exit(fprintf(stderr,"\nallocation error"));
t[0] = (t_complex **)calloc(x*y,sizeof(t_complex*));
/* A histidine residue exists that requires automated assignment, so
* doing the analysis of donors and acceptors is worthwhile. */
fprintf(stderr,
- "Analysing hydrogen-bonding network for automated assigment of histidine\n"
+ "Analysing hydrogen-bonding network for automated assignment of histidine\n"
" protonation.");
snew(donor,natom);
EETYPE("DispCorr", ir->eDispCorr, edispc_names);
CTYPE ("Extension of the potential lookup tables beyond the cut-off");
RTYPE ("table-extension", ir->tabext, 1.0);
- CTYPE ("Seperate tables between energy group pairs");
+ CTYPE ("Separate tables between energy group pairs");
STYPE ("energygrp-table", egptable, NULL);
CTYPE ("Spacing for the PME/PPPM FFT grid");
RTYPE ("fourierspacing", ir->fourier_spacing,0.12);
mda->deviceOptions=deviceOptions;
mda->Flags=Flags;
- fprintf(stderr, "Starting %d tMPI threads\n",hw_opt->nthreads_tmpi);
- fflush(stderr);
/* now spawn new threads that start mdrunner_start_fn(), while
the main thread returns */
ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
cr->nnodes==1 ? "process" : "processes"
#endif
);
+ fflush(stderr);
#endif
gmx_omp_nthreads_init(fplog, cr,
if (lossf >= DD_PERF_LOSS)
{
sprintf(buf,
- "NOTE: %.1f %% performance was lost due to load imbalance\n"
+ "NOTE: %.1f %% of the available CPU time was lost due to load imbalance\n"
" in the domain decomposition.\n",lossf*100);
if (!comm->bDynLoadBal)
{
fprintf(fplog, "%s\n", hline);
gpu_cpu_ratio = tot_gpu/tot_cpu_overlap;
- fprintf(fplog, "\n Force evaluation time GPU/CPU: %.3f ms/%.3f ms = %.3f\n",
+ fprintf(fplog, "\nForce evaluation time GPU/CPU: %.3f ms/%.3f ms = %.3f\n",
tot_gpu/gpu_t->nb_c, tot_cpu_overlap/wc->wcc[ewcFORCE].n,
gpu_cpu_ratio);
* but we currently can't check that here.
*/
md_print_warn(NULL,fplog,
- "NOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
+ "\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
" performance loss. Maybe the domain decomposition limits the PME tuning.\n"
- " In that case, try setting the DD grid manually (-dd) or lowering -dds.\n");
+ " In that case, try setting the DD grid manually (-dd) or lowering -dds.");
}
else
{
* too small for increasing the cut-off for PME tuning.
*/
md_print_warn(NULL,fplog,
- "NOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
- " performance loss.\n");
+ "\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
+ " performance loss.");
}
}
if (gpu_cpu_ratio > 1.2)
{
md_print_warn(NULL,fplog,
- "NOTE: The GPU has >20%% more load than the CPU. This imbalance causes\n"
- " performance loss, consider using a shorter cut-off and a finer PME grid.\n");
+ "\nNOTE: The GPU has >20%% more load than the CPU. This imbalance causes\n"
+ " performance loss, consider using a shorter cut-off and a finer PME grid.");
}
}
}
wallcycle_stop(wcycle,ewcWAIT_GPU_NB_L);
/* now clear the GPU outputs while we finish the step on the CPU */
+
+ wallcycle_start_nocount(wcycle,ewcLAUNCH_GPU_NB);
nbnxn_cuda_clear_outputs(nbv->cu_nbv, flags);
+ wallcycle_stop(wcycle,ewcLAUNCH_GPU_NB);
}
else
{
nfn = opt2fn_null("-n",NFILE,fnm);
if (( nfn == NULL ) && ( xfn == NULL))
- gmx_fatal(FARGS,"no index file and no structure file suplied");
+ gmx_fatal(FARGS,"no index file and no structure file supplied");
if ((disre_frac < 0) || (disre_frac >= 1))
gmx_fatal(FARGS,"disre_frac should be between 0 and 1");
*(top.atoms.atomname[index[i]]));
fprintf(fp,"%5d %10.5f %10.5f\n",
- bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : i+1,rmsf[i]*bfac,
+ bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i]+1,rmsf[i]*bfac,
pdb_bfac);
}
}
if (!bRes || i+1==isize ||
top.atoms.atom[index[i]].resind!=top.atoms.atom[index[i+1]].resind)
fprintf(fp,"%5d %8.4f\n",
- bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : i+1,sqrt(rmsf[i]));
+ bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i]+1,sqrt(rmsf[i]));
ffclose(fp);
}
if (!bRes || i+1==isize ||
top.atoms.atom[index[i]].resind!=top.atoms.atom[index[i+1]].resind)
fprintf(fp,"%5d %8.4f\n",
- bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : i+1,sqrt(rmsf[i]));
+ bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i]+1,sqrt(rmsf[i]));
ffclose(fp);
}