/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2009,2010,2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2009,2010,2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
-#include <algorithm>
+#include "fft5d.h"
+#include "config.h"
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <algorithm>
+
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/gmxmpi.h"
+#include "gromacs/utility/smalloc.h"
+
#ifdef NOGMX
#define GMX_PARALLEL_ENV_INITIALIZED 1
#else
#endif
#endif
-#include "gromacs/utility/gmxmpi.h"
-
#ifdef GMX_OPENMP
/* TODO: Do we still need this? Are we still planning ot use fftw + OpenMP? */
#define FFT5D_THREADS
/* #define FFT5D_FFTW_THREADS (now set by cmake) */
#endif
-#include "fft5d.h"
-#include <float.h>
-#include <math.h>
-#include <assert.h>
-#include "smalloc.h"
-
#ifndef __FLT_EPSILON__
#define __FLT_EPSILON__ FLT_EPSILON
#define __DBL_EPSILON__ DBL_EPSILON
FILE* debug = 0;
#endif
-#include "gmx_fatal.h"
-
-
#ifdef GMX_FFT_FFTW3
-#include "thread_mpi/mutex.h"
#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/mutex.h"
/* none of the fftw3 calls, except execute(), are thread-safe, so
we need to serialize them with this mutex. */
-static tMPI::mutex big_fftw_mutex;
+static gmx::Mutex big_fftw_mutex;
#define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
#define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
#endif /* GMX_FFT_FFTW3 */
+#ifdef GMX_MPI
/* largest factor smaller than sqrt */
static int lfactor(int z)
{
- int i;
- for (i = static_cast<int>(sqrt(static_cast<double>(z)));; i--)
+ int i = static_cast<int>(sqrt(static_cast<double>(z)));
+ while (z%i != 0)
{
- if (z%i == 0)
- {
- return i;
- }
+ i--;
}
- return 1;
+ return i;
}
+#endif
/* largest factor */
static int l2factor(int z)
int i;
if (z == 1)
{
- return 1;
+ i = 1;
}
- for (i = z/2;; i--)
+ else
{
- if (z%i == 0)
+ i = z/2;
+ while (z%i != 0)
{
- return i;
+ i--;
}
}
- return 1;
+ return i;
}
/* largest prime factor: WARNING: slow recursion, only use for small numbers */
if (debug)
{
- fprintf(debug, "FFT5D: Using %dx%d processor grid, rank %d,%d\n",
+ fprintf(debug, "FFT5D: Using %dx%d rank grid, rank %d,%d\n",
P[0], P[1], prank[0], prank[1]);
}
fprintf(debug, "Running on %d threads\n", nthreads);
}
-#ifdef GMX_FFT_FFTW3 /*if not FFTW - then we don't do a 3d plan but instead use only 1D plans */
+#ifdef GMX_FFT_FFTW3
+ /* Don't add more stuff here! We have already had at least one bug because we are reimplementing
+ * the low-level FFT interface instead of using the Gromacs FFT module. If we need more
+ * generic functionality it is far better to extend the interface so we can use it for
+ * all FFT libraries instead of writing FFTW-specific code here.
+ */
+
+ /*if not FFTW - then we don't do a 3d plan but instead use only 1D plans */
/* It is possible to use the 3d plan with OMP threads - but in that case it is not allowed to be called from
* within a parallel region. For now deactivated. If it should be supported it has to made sure that
* that the execute of the 3d plan is in a master/serial block (since it contains it own parallel region)
int inNG = NG, outMG = MG, outKG = KG;
FFTW_LOCK;
- if (!(flags&FFT5D_NOMEASURE))
- {
- fftwflags |= FFTW_MEASURE;
- }
+
+ fftwflags |= (flags & FFT5D_NOMEASURE) ? FFTW_ESTIMATE : FFTW_MEASURE;
+
if (flags&FFT5D_REALCOMPLEX)
{
if (!(flags&FFT5D_BACKWARD)) /*input pointer is not complex*/
s = 0;
/*lin: x,y,z*/
- if (plan->flags&FFT5D_DEBUG && thread == 0)
+ if ((plan->flags&FFT5D_DEBUG) && thread == 0)
{
print_localdata(lin, "%d %d: copy in lin\n", s, plan);
}
time_fft += MPI_Wtime()-time;
}
#endif
- if (plan->flags&FFT5D_DEBUG && thread == 0)
+ if ((plan->flags&FFT5D_DEBUG) && thread == 0)
{
print_localdata(lout, "%d %d: FFT %d\n", s, plan);
}
time_local += MPI_Wtime()-time;
}
#endif
- if (plan->flags&FFT5D_DEBUG && thread == 0)
+ if ((plan->flags&FFT5D_DEBUG) && thread == 0)
{
print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
}
}
#endif
- if (plan->flags&FFT5D_DEBUG && thread == 0)
+ if ((plan->flags&FFT5D_DEBUG) && thread == 0)
{
print_localdata(lout, "%d %d: FFT %d\n", s, plan);
}
{
if (prank == 0)
{
- printf("FFT5D: WARNING: Number of processors %d not evenly dividable by %d\n", size, P0);
+ printf("FFT5D: WARNING: Number of ranks %d not evenly divisible by %d\n", size, P0);
}
P0 = lfactor(size);
}