src/gromacs/tables/quadraticsplinetable.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2019,2020,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \libinternal
  37  * \defgroup module_tables  Classes for table interpolation
  38  * \ingroup group_utilitymodules
  39  *
  40  * \brief Table interpolation from analytical or numerical input
  41  *
  42  * This module provides quadratic spline interpolation tables used
  43  * both for the nonbonded kernels and listed interactions.
  44  *
  45  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  46  */
  47
  48
  49 /*! \libinternal \file
  50  * \brief
  51  * Declares classes for quadratic spline table
  52  *
  53  * \inlibraryapi
  54  * \author Erik Lindahl <erik.lindahl@gmail.com>
  55  * \ingroup module_tables
  56  */
  57 #ifndef GMX_TABLES_QUADRATICSPLINETABLE_H
  58 #define GMX_TABLES_QUADRATICSPLINETABLE_H
  59
  60 #include <functional>
  61 #include <initializer_list>
  62 #include <memory>
  63 #include <vector>
  64
  65 #include "gromacs/simd/simd.h"
  66 #include "gromacs/tables/tableinput.h"
  67 #include "gromacs/utility/alignedallocator.h"
  68 #include "gromacs/utility/classhelpers.h"
  69 #include "gromacs/utility/exceptions.h"
  70 #include "gromacs/utility/gmxassert.h"
  71 #include "gromacs/utility/real.h"
  72
  73 namespace gmx
  74 {
  75
  76 /*! \libinternal \brief Quadratic spline interpolation table.
  77  *
  78  * This class interpolates a function specified either as an analytical
  79  * expression or from user-provided table data.
  80  *
  81  * At initialization, you provide the reference function of vectors
  82  * as a list of tuples that contain a brief name, the function, and
  83  * derivative for each function to tabulate. To create a table with
  84  * two functions this initializer list can for instance look like
  85  *
  86  *     { {"LJ6", lj6Func, lj6Der}, {"LJ12", lj12Func, lj12Der} }
  87  *
  88  * The names are only used so exceptions during initialization can
  89  * be traced to a specific table.
  90  *
  91  * When interpolating, there are methods to interpolate either 1, 2, or 3
  92  * functions in one go. By default these interpolation routines will
  93  * operate on tables with the same number of functions as specified in
  94  * the interpolation method (debug builds check that this is consistent with
  95  * the table). However, it is also possible to use optional template
  96  * parameters that specify the total number of functions in a table, and
  97  * what function index to interpolate. For instance, to interpolate the
  98  * derivative of the second function (i.e., index 1) in a
  99  * multi-function-table with three functions in total, you can write
 100  *
 101  *     table.evaluateDerivative<3,1>(x,&der);
 102  *
 103  * Here too, debug builds will check that the template parameters are
 104  * consistent with the table.
 105  *
 106  * The table data is internally adjusted to guarantee that the interpolated
 107  * derivative is the true derivative of the interpolated potential, which is
 108  * important to avoid systematic errors for the common case when the derivative
 109  * is concave/convex in the entire interval.
 110  * We do this by expressing the difference in the function value
 111  * at a small offset h relative to a reference value in position 0 with a forward
 112  * Taylor series expanded around 0, and then doing the opposite of expressing
 113  * difference in the function at position 0 relative to a reference value in
 114  * position h when using a backward Taylor expansion:
 115  *
 116  * \f{eqnarray*}{
 117  *  \Delta V & = & hV'(0) + \frac{1}{2} h^2 V''(0) + \frac{1}{6} h^3 V'''(0) + O(h^4) \\
 118  *  \Delta V & = & hV'(h) - \frac{1}{2} h^2 V''(h) + \frac{1}{6} h^3 V'''(h) + O(h^4)
 119  * \f}
 120  *
 121  * Summing the equations leads to
 122  *
 123  * \f[
 124  *  2 \Delta V = h(V'(0) + V'(h)) + \frac{1}{2} h^2 (V''(0)-V''(h)) +
 125  * \frac{1}{6}h^3(V'''(0)+V'''(h)) + O(h^4) \f]
 126  *
 127  * To make the second term symmetric too, we can replace it with the average of
 128  * the Taylor expansion at 0 and h (i.e., using the third derivative). This gives
 129  *
 130  * \f[
 131  *  2 \Delta V = h(V'(0) + V'(h)) - \frac{1}{12} h^3 (V'''(0)+V'''(h)) + O(h^4)
 132  * \f]
 133  *
 134  * Thus, if we replace the derivative in the internal quadratic table data with
 135  *
 136  * \f[
 137  *  V' - \frac{1}{12}h^2 V'''
 138  * \f]
 139  *
 140  * we will cancel the h^3 term in the error. This will make the integral of the
 141  * forces match the potential much better (The h^4 term actually disappears, so
 142  * when summing over 1/h points the remaining error will be O(h^4).
 143  *
 144  * While it is possible to create tables only from function values
 145  * (i.e., no derivatives), it is recommended to provide derivatives for higher
 146  * accuracy and to avoid issues with numerical differentiation. Note that the
 147  * table input should be smooth, i.e. it should not contain noise e.g. from an
 148  * (iterative) Boltzmann inversion procedure - you have been warned.
 149  *
 150  * \note This class is responsible for fundamental interpolation of any function,
 151  *       which might or might not correspond to a potential. For this reason
 152  *       both input and output derivatives are proper function derivatives, and
 153  *       we do not swap any signs to get forces directly from the table.
 154  *
 155  * \note There will be a small additional accuracy loss from the internal
 156  *       operation where we calculate the epsilon offset from the nearest table
 157  *       point, since the integer part we subtract can get large in those cases.
 158  *       The absolute such error both in the function and derivative value will
 159  *       be roughly f''*x*GMX_REAL_EPS, where x is the argument and f'' the
 160  *       second derivative.
 161  *       While this is technically possible to solve with extended precision
 162  *       arithmetics, that would introduce extra instructions in some highly
 163  *       performance-sensitive code parts. For typical GROMACS interaction
 164  *       functions the derivatives will decay faster than the potential, which
 165  *       means it will never play any role. For other functions it will only
 166  *       cause a small increase in the relative error for arguments where the
 167  *       magnitude of the function or derivative is very small.
 168  *       Since we typically sum several results in GROMACS, this should never
 169  *       show up in any real cases, and for this reason we choose not to do
 170  *       the extended precision arithmetics.
 171  *
 172  * \note These routines are not suitable for table ranges starting far away
 173  *       from zero, since we allocate memory and calculate indices starting from
 174  *       range zero for efficiency reasons.
 175  */
 176 class QuadraticSplineTable
 177 {
 178 private:
 179     /*! \brief Change that function value falls inside range when debugging
 180      *
 181      *  \tparam T   Lookup argument floating-point type, typically SimdReal or real.
 182      *  \param  r   Lookup argument to test
 183      *
 184      *  \throws Debug builds will throw gmx::RangeError for values that are
 185      *          larger than the upper limit of the range, or smaller than 0.
 186      *          We allow the table to be called with arguments between 0 and
 187      *          the lower limit of the range, since this might in theory occur
 188      *          once-in-a-blue-moon with some algorithms.
 189      */
 190     template<typename T>
 191     void rangeCheck(T gmx_unused r) const
 192     {
 193 #ifndef NDEBUG
 194         // Check that all values fall in range when debugging
 195         if (anyTrue(r < T(0.0) || T(range_.second) <= r))
 196         {
 197             GMX_THROW(RangeError("Interpolation input value falls outside table definition range"));
 198         }
 199 #endif
 200     }
 201
 202 public:
 203     /*! \brief Default tolerance for tables is 10*GMX_FLOAT_EPS
 204      *
 205      *  \note Even for double precision builds we set the tolerance to
 206      *        one order of magnitude above the single precision epsilon.
 207      */
 208     static const real defaultTolerance;
 209
 210     /*! \brief Initialize table data from function
 211      *
 212      * \param analyticalInputList Initializer list with one or more functions to tabulate,
 213      *                            specified as pairs containing analytical
 214      *                            functions and their derivatives. The function will also
 215      *                            be called for values smaller than the lower limit of the
 216      *                            range, but we avoid calling it for 0.0 if that value
 217      *                            is not included in the range.
 218      * \param range               Range over which the function will be tabulated.
 219      *                            Constructor will throw gmx::APIError for negative values.
 220      *                            Due to the way the numerical derivative evaluation depends
 221      *                            on machine precision internally, this range must be
 222      *                            at least 0.001, or the constructor throws gmx::APIError.
 223      * \param tolerance           Requested accuracy of the table. This will be used to
 224      *                            calculate the required internal spacing. If this cannot
 225      *                            be achieved (for instance because the table would require
 226      *                            too much memory) the constructor will throw gmx::ToleranceError.
 227      *
 228      * \note The functions are always defined in double precision to avoid
 229      *       losing accuracy when constructing tables.
 230      *
 231      * \note Since we fill the table for values below range.first, you can achieve
 232      *       a smaller table by using a smaller range where the tolerance has to be
 233      *       met, and accept that a few function calls below range.first do not
 234      *       quite reach the tolerance.
 235      *
 236      * \warning For efficiency reasons (since this code is used in some inner
 237      *       (kernels), we always allocate memory and calculate table indices
 238      *       for the complete interval [0,range.second], although the data will
 239      *       not be valid outside the definition range to avoid calling the
 240      *       function there. This means you should \a not use this class
 241      *       to tabulate functions for small ranges very far away from zero,
 242      *       since you would both waste a huge amount of memory and incur
 243      *       truncation errors when calculating the index.
 244      *
 245      * \throws gmx::ToleranceError if the requested tolerance cannot be achieved,
 246      *         and gmx::APIError for other incorrect input.
 247      */
 248     QuadraticSplineTable(std::initializer_list<AnalyticalSplineTableInput> analyticalInputList,
 249                          const std::pair<real, real>&                      range,
 250                          real tolerance = defaultTolerance);
 251
 252     /*! \brief Initialize table data from tabulated values and derivatives
 253      *
 254      * \param numericalInputList  Initializer list with one or more functions to tabulate,
 255      *                            specified as pairs containing containing vectors for the
 256      *                            function values and their derivatives. Data points are
 257      *                            separated by the spacing parameter, starting from 0.
 258      *                            Values below the lower limit of the range will be used to
 259      *                            attempt defining the table, but we avoid using index 0
 260      *                            unless 0.0 is included in the range. Some extra points beyond
 261      *                            range.second are required to re-interpolate values, so add
 262      *                            some margin. The constructor will throw gmx::APIError if the
 263      *                            input vectors are too short to cover the requested range
 264      *                            (and they must always be at least five points).
 265      * \param range               Range over which the function will be tabulated.
 266      *                            Constructor will throw gmx::APIError for negative values,
 267      *                            or if the value/derivative vector does not cover the
 268      *                            range.
 269      * \param tolerance           Requested accuracy of the table in the range. This will be
 270      *                            used to calculate the required internal spacing and possibly
 271      *                            re-interpolate. The constructor will throw
 272      *                            gmx::ToleranceError if the input spacing is too coarse
 273      *                            to achieve this accuracy.
 274      *
 275      * \note The input data vectors are always double precision to avoid
 276      *       losing accuracy when constructing tables.
 277      *
 278      * \note Since we fill the table for values below range.first, you can achieve
 279      *       a smaller table by using a smaller range where the tolerance has to be
 280      *       met, and accept that a few function calls below range.first do not
 281      *       quite reach the tolerance.
 282      *
 283      * \warning For efficiency reasons (since this code is used in some inner
 284      *       (kernels), we always allocate memory and calculate table indices
 285      *       for the complete interval [0,range.second], although the data will
 286      *       not be valid outside the definition range to avoid calling the
 287      *       function there. This means you should \a not use this class
 288      *       to tabulate functions for small ranges very far away from zero,
 289      *       since you would both waste a huge amount of memory and incur
 290      *       truncation errors when calculating the index.
 291      */
 292     QuadraticSplineTable(std::initializer_list<NumericalSplineTableInput> numericalInputList,
 293                          const std::pair<real, real>&                     range,
 294                          real tolerance = defaultTolerance);
 295
 296
 297     /************************************************************
 298      *           Evaluation methods for single functions        *
 299      ************************************************************/
 300
 301     /*! \brief Evaluate both function and derivative, single table function
 302      *
 303      *  This is a templated method where the template can be either real or SimdReal.
 304      *
 305      *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 306      *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 307      *  \tparam     T               Type (SimdReal or real) of lookup and result
 308      *  \param      r               Points for which to evaluate function and derivative
 309      *  \param[out] functionValue   Function value
 310      *  \param[out] derivativeValue Function derivative
 311      *
 312      *  For debug builds we assert that the input values fall in the range
 313      *  specified when constructing the table.
 314      */
 315     template<int numFuncInTable = 1, int funcIndex = 0, typename T>
 316     void evaluateFunctionAndDerivative(T r, T* functionValue, T* derivativeValue) const
 317     {
 318         rangeCheck(r);
 319         GMX_ASSERT(numFuncInTable == numFuncInTable_,
 320                    "Evaluation method not matching number of functions in table");
 321         GMX_ASSERT(funcIndex < numFuncInTable,
 322                    "Function index not in range of the number of tables");
 323
 324         T    rTable   = r * T(tableScale_);
 325         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 326         T    eps      = rTable - trunc(rTable);
 327         T    t0;
 328         T    t1;
 329         T    t2;
 330         T t3 gmx_unused;
 331
 332         // Load Derivative, Delta, Function, and Zero values for each table point.
 333         // The 4 refers to these four values - not any SIMD width.
 334         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 335                 ddfzMultiTableData_.data() + 4 * funcIndex, tabIndex, &t0, &t1, &t2, &t3);
 336
 337         t1               = t0 + eps * t1;
 338         *functionValue   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 339         *derivativeValue = t1;
 340     }
 341
 342     /*! \brief Evaluate function value only, single table function
 343      *
 344      *  This is a templated method where the template can be either real or SimdReal.
 345      *
 346      *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 347      *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 348      *  \tparam     T               Type (SimdReal or real) of lookup and result
 349      *  \param      r               Points for which to evaluate function value
 350      *  \param[out] functionValue   Function value
 351      *
 352      *  For debug builds we assert that the input values fall in the range
 353      *  specified when constructing the table.
 354      */
 355     template<int numFuncInTable = 1, int funcIndex = 0, typename T>
 356     void evaluateFunction(T r, T* functionValue) const
 357     {
 358         T der gmx_unused;
 359
 360         evaluateFunctionAndDerivative<numFuncInTable, funcIndex>(r, functionValue, &der);
 361     }
 362
 363     /*! \brief Evaluate function derivative only, single table function
 364      *
 365      *  This is a templated method where the template can be either real or SimdReal.
 366      *
 367      *  \tparam     numFuncInTable  Number of separate functions in table, default is 1
 368      *  \tparam     funcIndex       Index of function to evaluate in table, default is 0
 369      *  \tparam     T               Type (SimdReal or real) of lookup and result
 370      *  \param      r               Points for which to evaluate function derivative
 371      *  \param[out] derivativeValue Function derivative
 372      *
 373      *  For debug builds we assert that the input values fall in the range
 374      *  specified when constructing the table.
 375      */
 376     template<int numFuncInTable = 1, int funcIndex = 0, typename T>
 377     void evaluateDerivative(T r, T* derivativeValue) const
 378     {
 379         rangeCheck(r);
 380         GMX_ASSERT(numFuncInTable == numFuncInTable_,
 381                    "Evaluation method not matching number of functions in table");
 382         GMX_ASSERT(funcIndex < numFuncInTable,
 383                    "Function index not in range of the number of tables");
 384
 385         T    rTable   = r * T(tableScale_);
 386         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 387         T    eps      = rTable - trunc(rTable);
 388         T    t0;
 389         T    t1;
 390         T t2 gmx_unused;
 391
 392         if (numFuncInTable == 1)
 393         {
 394             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 395                     derivativeMultiTableData_.data() + funcIndex, tabIndex, &t0, &t1); // works for scalar T too
 396         }
 397         else
 398         {
 399             // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 400             // only loads a single value from memory to implement it better (will be written)
 401             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 402                     derivativeMultiTableData_.data() + funcIndex, tabIndex, &t0, &t2); // works for scalar T too
 403             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex,
 404                                                           tabIndex + T(1),
 405                                                           &t1,
 406                                                           &t2); // works for scalar T too
 407         }
 408
 409         // (1-eps)*t0 + eps*t1
 410         *derivativeValue = fma(t1 - t0, eps, t0);
 411     }
 412
 413     /************************************************************
 414      *             Evaluation methods for two functions         *
 415      ************************************************************/
 416
 417     /*! \brief Evaluate both function and derivative, two table functions
 418      *
 419      *  This is a templated method where the template can be either real or SimdReal.
 420      *
 421      *  \tparam     numFuncInTable  Number of separate functions in table, default is 2
 422      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 423      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 424      *  \tparam     T                Type (SimdReal or real) of lookup and result
 425      *  \param      r                Points for which to evaluate function and derivative
 426      *  \param[out] functionValue1   Interpolated value for first function
 427      *  \param[out] derivativeValue1 Interpolated derivative for first function
 428      *  \param[out] functionValue2   Interpolated value for second function
 429      *  \param[out] derivativeValue2 Interpolated derivative for second function
 430      *
 431      *  For debug builds we assert that the input values fall in the range
 432      *  specified when constructing the table.
 433      */
 434     template<int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 435     void evaluateFunctionAndDerivative(T r, T* functionValue1, T* derivativeValue1, T* functionValue2, T* derivativeValue2) const
 436     {
 437         rangeCheck(r);
 438         GMX_ASSERT(numFuncInTable == numFuncInTable_,
 439                    "Evaluation method not matching number of functions in table");
 440         GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable,
 441                    "Function index not in range of the number of tables");
 442
 443         T    rTable   = r * T(tableScale_);
 444         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 445         T    eps      = rTable - trunc(rTable);
 446         T    t0;
 447         T    t1;
 448         T    t2;
 449         T t3 gmx_unused;
 450
 451         // Load Derivative, Delta, Function, and Zero values for each table point.
 452         // The 4 refers to these four values - not any SIMD width.
 453         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 454                 ddfzMultiTableData_.data() + 4 * funcIndex0, tabIndex, &t0, &t1, &t2, &t3);
 455         t1                = t0 + eps * t1;
 456         *functionValue1   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 457         *derivativeValue1 = t1;
 458
 459         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 460                 ddfzMultiTableData_.data() + 4 * funcIndex1, tabIndex, &t0, &t1, &t2, &t3);
 461         t1                = t0 + eps * t1;
 462         *functionValue2   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 463         *derivativeValue2 = t1;
 464     }
 465
 466     /*! \brief Evaluate function value only, two table functions
 467      *
 468      *  This is a templated method where the template can be either real or SimdReal.
 469      *
 470      *  \tparam     numFuncInTable   Number of separate functions in table, default is 2
 471      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 472      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 473      *  \tparam     T                Type (SimdReal or real) of lookup and result
 474      *  \param      r                Points for which to evaluate function value
 475      *  \param[out] functionValue1   Interpolated value for first function
 476      *  \param[out] functionValue2   Interpolated value for second function
 477      *
 478      *  For debug builds we assert that the input values fall in the range
 479      *  specified when constructing the table.
 480      */
 481     template<int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 482     void evaluateFunction(T r, T* functionValue1, T* functionValue2) const
 483     {
 484         T der1 gmx_unused;
 485         T der2 gmx_unused;
 486
 487         evaluateFunctionAndDerivative<numFuncInTable, funcIndex0, funcIndex1>(
 488                 r, functionValue1, &der1, functionValue2, &der2);
 489     }
 490
 491     /*! \brief Evaluate function derivative only, two table functions
 492      *
 493      *  This is a templated method where the template can be either real or SimdReal.
 494      *
 495      *  \tparam     numFuncInTable   Number of separate functions in table, default is 2
 496      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 497      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 498      *  \tparam     T                Type (SimdReal or real) of lookup and result
 499      *  \param      r                Points for which to evaluate function derivative
 500      *  \param[out] derivativeValue1 Interpolated derivative for first function
 501      *  \param[out] derivativeValue2 Interpolated derivative for second function
 502      *
 503      *  For debug builds we assert that the input values fall in the range
 504      *  specified when constructing the table.
 505      */
 506     template<int numFuncInTable = 2, int funcIndex0 = 0, int funcIndex1 = 1, typename T>
 507     void evaluateDerivative(T r, T* derivativeValue1, T* derivativeValue2) const
 508     {
 509         rangeCheck(r);
 510         GMX_ASSERT(numFuncInTable == numFuncInTable_,
 511                    "Evaluation method not matching number of functions in table");
 512         GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable,
 513                    "Function index not in range of the number of tables");
 514
 515         T    rTable   = r * T(tableScale_);
 516         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 517         T    eps      = rTable - trunc(rTable);
 518
 519         if (numFuncInTable == 2 && funcIndex0 == 0 && funcIndex1 == 1)
 520         {
 521             T t0A, t0B, t1A, t1B;
 522             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 523                     derivativeMultiTableData_.data(), tabIndex, &t0A, &t0B); // works for scalar T too
 524             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 525                     derivativeMultiTableData_.data() + 2, tabIndex, &t1A, &t1B); // works for scalar T too
 526             *derivativeValue1 = fma(t1A - t0A, eps, t0A);
 527             *derivativeValue2 = fma(t1B - t0B, eps, t0B);
 528         }
 529         else
 530         {
 531             T t0, t1, t2;
 532             // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 533             // only loads a single value from memory to implement it better (will be written)
 534             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 535                     derivativeMultiTableData_.data() + funcIndex0, tabIndex, &t0, &t2); // works for scalar T too
 536             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0,
 537                                                           tabIndex + T(1),
 538                                                           &t1,
 539                                                           &t2); // works for scalar T too
 540             *derivativeValue1 = fma(t1 - t0, eps, t0);
 541
 542             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 543                     derivativeMultiTableData_.data() + funcIndex1, tabIndex, &t0, &t2); // works for scalar T too
 544             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1,
 545                                                           tabIndex + T(1),
 546                                                           &t1,
 547                                                           &t2); // works for scalar T too
 548             *derivativeValue2 = fma(t1 - t0, eps, t0);
 549         }
 550     }
 551
 552     /************************************************************
 553      *            Evaluation methods for three functions        *
 554      ************************************************************/
 555
 556
 557     /*! \brief Evaluate both function and derivative, three table functions
 558      *
 559      *  This is a templated method where the template can be either real or SimdReal.
 560      *
 561      *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 562      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 563      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 564      *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 565      *  \tparam     T                Type (SimdReal or real) of lookup and result
 566      *  \param      r                Points for which to evaluate function and derivative
 567      *  \param[out] functionValue1   Interpolated value for first function
 568      *  \param[out] derivativeValue1 Interpolated derivative for first function
 569      *  \param[out] functionValue2   Interpolated value for second function
 570      *  \param[out] derivativeValue2 Interpolated derivative for second function
 571      *  \param[out] functionValue3   Interpolated value for third function
 572      *  \param[out] derivativeValue3 Interpolated derivative for third function
 573      *
 574      *  For debug builds we assert that the input values fall in the range
 575      *  specified when constructing the table.
 576      */
 577     template<int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 578     void evaluateFunctionAndDerivative(T  r,
 579                                        T* functionValue1,
 580                                        T* derivativeValue1,
 581                                        T* functionValue2,
 582                                        T* derivativeValue2,
 583                                        T* functionValue3,
 584                                        T* derivativeValue3) const
 585     {
 586         rangeCheck(r);
 587         GMX_ASSERT(numFuncInTable == numFuncInTable,
 588                    "Evaluation method not matching number of functions in table");
 589         GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable && funcIndex2 < numFuncInTable,
 590                    "Function index not in range of the number of tables");
 591
 592         T    rTable   = r * T(tableScale_);
 593         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 594         T    eps      = rTable - trunc(rTable);
 595         T    t0;
 596         T    t1;
 597         T    t2;
 598         T t3 gmx_unused;
 599
 600         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 601                 ddfzMultiTableData_.data() + 4 * funcIndex0, tabIndex, &t0, &t1, &t2, &t3);
 602         t1                = t0 + eps * t1;
 603         *functionValue1   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 604         *derivativeValue1 = t1;
 605
 606         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 607                 ddfzMultiTableData_.data() + 4 * funcIndex1, tabIndex, &t0, &t1, &t2, &t3);
 608         t1                = t0 + eps * t1;
 609         *functionValue2   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 610         *derivativeValue2 = t1;
 611
 612         gatherLoadBySimdIntTranspose<4 * numFuncInTable>(
 613                 ddfzMultiTableData_.data() + 4 * funcIndex2, tabIndex, &t0, &t1, &t2, &t3);
 614         t1                = t0 + eps * t1;
 615         *functionValue3   = fma(eps * T(halfSpacing_), t0 + t1, t2);
 616         *derivativeValue3 = t1;
 617     }
 618
 619     /*! \brief Evaluate function value only, three table functions
 620      *
 621      *  This is a templated method where the template can be either real or SimdReal.
 622      *
 623      *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 624      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 625      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 626      *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 627      *  \tparam     T                Type (SimdReal or real) of lookup and result
 628      *  \param      r                Points for which to evaluate function value
 629      *  \param[out] functionValue1   Interpolated value for first function
 630      *  \param[out] functionValue2   Interpolated value for second function
 631      *  \param[out] functionValue3   Interpolated value for third function
 632      *
 633      *  For debug builds we assert that the input values fall in the range
 634      *  specified when constructing the table.
 635      */
 636     template<int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 637     void evaluateFunction(T r, T* functionValue1, T* functionValue2, T* functionValue3) const
 638     {
 639         T der1 gmx_unused;
 640         T der2 gmx_unused;
 641         T der3 gmx_unused;
 642
 643         evaluateFunctionAndDerivative<numFuncInTable, funcIndex0, funcIndex1, funcIndex2>(
 644                 r, functionValue1, &der1, functionValue2, &der2, functionValue3, &der3);
 645     }
 646
 647     /*! \brief Evaluate function derivative only, three table functions
 648      *
 649      *  This is a templated method where the template can be either real or SimdReal.
 650      *
 651      *  \tparam     numFuncInTable   Number of separate functions in table, default is 3
 652      *  \tparam     funcIndex0       Index of 1st function to evaluate in table, default is 0
 653      *  \tparam     funcIndex1       Index of 2nd function to evaluate in table, default is 1
 654      *  \tparam     funcIndex2       Index of 3rd function to evaluate in table, default is 2
 655      *  \tparam     T                Type (SimdReal or real) of lookup and result
 656      *  \param      r                Points for which to evaluate function derivative
 657      *  \param[out] derivativeValue1 Interpolated derivative for first function
 658      *  \param[out] derivativeValue2 Interpolated derivative for second function
 659      *  \param[out] derivativeValue3 Interpolated derivative for third function
 660      *
 661      *  For debug builds we assert that the input values fall in the range
 662      *  specified when constructing the table.
 663      */
 664     template<int numFuncInTable = 3, int funcIndex0 = 0, int funcIndex1 = 1, int funcIndex2 = 2, typename T>
 665     void evaluateDerivative(T r, T* derivativeValue1, T* derivativeValue2, T* derivativeValue3) const
 666     {
 667         rangeCheck(r);
 668         GMX_ASSERT(numFuncInTable == numFuncInTable_,
 669                    "Evaluation method not matching number of functions in table");
 670         GMX_ASSERT(funcIndex0 < numFuncInTable && funcIndex1 < numFuncInTable && funcIndex2 < numFuncInTable,
 671                    "Function index not in range of the number of tables");
 672
 673         T    rTable   = r * T(tableScale_);
 674         auto tabIndex = cvttR2I(rTable); // type is either std::int32_t or SimdInt32
 675         T    eps      = rTable - trunc(rTable);
 676
 677         if (numFuncInTable == 3 && funcIndex0 == 0 && funcIndex1 == 1 && funcIndex2 == 2)
 678         {
 679             T t0A, t0B, t0C, t1A, t1B, t1C;
 680             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 681                     derivativeMultiTableData_.data(), tabIndex, &t0A, &t0B);
 682             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 683                     derivativeMultiTableData_.data() + 2, tabIndex, &t0C, &t1A);
 684             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 685                     derivativeMultiTableData_.data() + 4, tabIndex, &t1B, &t1C);
 686             *derivativeValue1 = fma(t1A - t0A, eps, t0A);
 687             *derivativeValue2 = fma(t1B - t0B, eps, t0B);
 688             *derivativeValue3 = fma(t1C - t0C, eps, t0C);
 689         }
 690         else
 691         {
 692             T t0, t1, t2;
 693             // This is not ideal, but we need a version of gatherLoadUBySimdIntTranspose that
 694             // only loads a single value from memory to implement it better (will be written)
 695             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 696                     derivativeMultiTableData_.data() + funcIndex0, tabIndex, &t0, &t2); // works for scalar T too
 697             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex0,
 698                                                           tabIndex + T(1),
 699                                                           &t1,
 700                                                           &t2); // works for scalar T too
 701             *derivativeValue1 = fma(t1 - t0, eps, t0);
 702
 703             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 704                     derivativeMultiTableData_.data() + funcIndex1, tabIndex, &t0, &t2); // works for scalar T too
 705             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex1,
 706                                                           tabIndex + T(1),
 707                                                           &t1,
 708                                                           &t2); // works for scalar T too
 709             *derivativeValue2 = fma(t1 - t0, eps, t0);
 710
 711             gatherLoadUBySimdIntTranspose<numFuncInTable>(
 712                     derivativeMultiTableData_.data() + funcIndex2, tabIndex, &t0, &t2); // works for scalar T too
 713             gatherLoadUBySimdIntTranspose<numFuncInTable>(derivativeMultiTableData_.data() + funcIndex2,
 714                                                           tabIndex + T(1),
 715                                                           &t1,
 716                                                           &t2); // works for scalar T too
 717             *derivativeValue3 = fma(t1 - t0, eps, t0);
 718         }
 719     }
 720
 721     /*! \brief Return the table spacing (distance between points)
 722      *
 723      *  You should never have to use this for normal code, but due to the
 724      *  way tables are constructed internally we need this in the unit tests
 725      *  to check relative tolerances over each interval.
 726      *
 727      *  \return table spacing.
 728      */
 729     real tableSpacing() const { return 1.0 / tableScale_; }
 730
 731 private:
 732     std::size_t           numFuncInTable_; //!< Number of separate tabluated functions
 733     std::pair<real, real> range_;          //!< Range for which table evaluation is allowed
 734     real                  tableScale_;     //!< Table scale (inverse of spacing between points)
 735     real                  halfSpacing_;    //!< 0.5*spacing (used for DDFZ table data)
 736
 737     //!< Derivative values only, with the third-derivative subtraction described in the class documentation.
 738     std::vector<real> derivativeMultiTableData_;
 739
 740     /*! \brief Combined derivative, difference to next derivative, value, and zero.
 741      *
 742      *  For table point i, this vector contains the four values:
 743      *  - derivative[i]
 744      *  - (derivative[i+1]-derivative[i])
 745      *  - value[i]
 746      *  - 0.0
 747      *
 748      *  For the derivative terms we have subtracted the third-derivative term described
 749      *  in the main class documentation.
 750      *
 751      *  This is typically more efficient than the individual tables, in particular
 752      *  when using SIMD. The result should be identical outside the class, so this
 753      *  is merely an internal implementation optimization. However, to allow
 754      *  aligned SIMD loads we need to use an aligned allocator for this container.
 755      *  We occasionally abbreviate this data as DDFZ.
 756      */
 757     std::vector<real, AlignedAllocator<real>> ddfzMultiTableData_;
 758
 759     // There should never be any reason to copy the table since it is read-only
 760     GMX_DISALLOW_COPY_AND_ASSIGN(QuadraticSplineTable);
 761 };
 762
 763
 764 } // namespace gmx
 765
 766 #endif // GMX_TABLES_QUADRATICSPLINETABLE_H