#include "gromacs/utility/real.h"
#include "gromacs/utility/smalloc.h"
-static int gmx_dnint(double x)
-{
- return gmx::roundToInt(x);
-}
typedef struct gmx_stats
{
return static_cast<gmx_stats_t>(stats);
}
-StatisticsStatus gmx_stats_get_npoints(gmx_stats_t gstats, int* N)
-{
- gmx_stats* stats = static_cast<gmx_stats*>(gstats);
-
- *N = stats->np;
-
- return StatisticsStatus::Ok;
-}
-
void gmx_stats_free(gmx_stats_t gstats)
{
gmx_stats* stats = static_cast<gmx_stats*>(gstats);
return StatisticsStatus::Ok;
}
-StatisticsStatus gmx_stats_get_point(gmx_stats_t gstats, real* x, real* y, real* dx, real* dy, real level)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
- int outlier;
- real rmsd, r;
-
- if ((ok = gmx_stats_get_rmsd(gstats, &rmsd)) != StatisticsStatus::Ok)
- {
- return ok;
- }
- outlier = 0;
- while ((outlier == 0) && (stats->np_c < stats->np))
- {
- r = std::abs(stats->x[stats->np_c] - stats->y[stats->np_c]);
- outlier = static_cast<int>(r > rmsd * level);
- if (outlier)
- {
- if (nullptr != x)
- {
- *x = stats->x[stats->np_c];
- }
- if (nullptr != y)
- {
- *y = stats->y[stats->np_c];
- }
- if (nullptr != dx)
- {
- *dx = stats->dx[stats->np_c];
- }
- if (nullptr != dy)
- {
- *dy = stats->dy[stats->np_c];
- }
- }
- stats->np_c++;
-
- if (outlier)
- {
- return StatisticsStatus::Ok;
- }
- }
-
- stats->np_c = 0;
-
- return StatisticsStatus::NoPoints;
-}
-
-StatisticsStatus gmx_stats_add_points(gmx_stats_t gstats, int n, real* x, real* y, real* dx, real* dy)
-{
- for (int i = 0; (i < n); i++)
- {
- StatisticsStatus ok;
- if ((ok = gmx_stats_add_point(
- gstats, x[i], y[i], (nullptr != dx) ? dx[i] : 0, (nullptr != dy) ? dy[i] : 0))
- != StatisticsStatus::Ok)
- {
- return ok;
- }
- }
- return StatisticsStatus::Ok;
-}
-
static StatisticsStatus gmx_stats_compute(gmx_stats* stats, int weight)
{
double yy, yx, xx, sx, sy, dy, chi2, chi2aa, d2;
return StatisticsStatus::Ok;
}
-StatisticsStatus gmx_stats_get_a(gmx_stats_t gstats, int weight, real* a, real* da, real* chi2, real* Rfit)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
-
- if ((ok = gmx_stats_compute(stats, weight)) != StatisticsStatus::Ok)
- {
- return ok;
- }
- if (nullptr != a)
- {
- *a = stats->aa;
- }
- if (nullptr != da)
- {
- *da = stats->sigma_aa;
- }
- if (nullptr != chi2)
- {
- *chi2 = stats->chi2aa;
- }
- if (nullptr != Rfit)
- {
- *Rfit = stats->Rfitaa;
- }
-
- return StatisticsStatus::Ok;
-}
-
StatisticsStatus gmx_stats_get_average(gmx_stats_t gstats, real* aver)
{
gmx_stats* stats = gstats;
return StatisticsStatus::Ok;
}
-StatisticsStatus gmx_stats_get_sigma(gmx_stats_t gstats, real* sigma)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
-
- if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
- {
- return ok;
- }
-
- *sigma = stats->sigma_aver;
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_get_error(gmx_stats_t gstats, real* error)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
-
- if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
- {
- return ok;
- }
-
- *error = stats->error;
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_get_corr_coeff(gmx_stats_t gstats, real* R)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
-
- if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
- {
- return ok;
- }
-
- *R = stats->Rdata;
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_get_rmsd(gmx_stats_t gstats, real* rmsd)
-{
- gmx_stats* stats = gstats;
- StatisticsStatus ok;
-
- if ((ok = gmx_stats_compute(stats, elsqWEIGHT_NONE)) != StatisticsStatus::Ok)
- {
- return ok;
- }
-
- *rmsd = stats->rmsd;
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_dump_xy(gmx_stats_t gstats, FILE* fp)
-{
- gmx_stats* stats = gstats;
-
- for (int i = 0; (i < stats->np); i++)
- {
- fprintf(fp, "%12g %12g %12g %12g\n", stats->x[i], stats->y[i], stats->dx[i], stats->dy[i]);
- }
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_remove_outliers(gmx_stats_t gstats, double level)
-{
- gmx_stats* stats = gstats;
- int iter = 1, done = 0;
- StatisticsStatus ok;
- real rmsd, r;
-
- while ((stats->np >= 10) && !done)
- {
- if ((ok = gmx_stats_get_rmsd(gstats, &rmsd)) != StatisticsStatus::Ok)
- {
- return ok;
- }
- done = 1;
- for (int i = 0; (i < stats->np);)
- {
- r = std::abs(stats->x[i] - stats->y[i]);
- if (r > level * rmsd)
- {
- fprintf(stderr,
- "Removing outlier, iter = %d, rmsd = %g, x = %g, y = %g\n",
- iter,
- rmsd,
- stats->x[i],
- stats->y[i]);
- if (i < stats->np - 1)
- {
- stats->x[i] = stats->x[stats->np - 1];
- stats->y[i] = stats->y[stats->np - 1];
- stats->dx[i] = stats->dx[stats->np - 1];
- stats->dy[i] = stats->dy[stats->np - 1];
- }
- stats->np--;
- done = 0;
- }
- else
- {
- i++;
- }
- }
- iter++;
- }
-
- return StatisticsStatus::Ok;
-}
-
-StatisticsStatus gmx_stats_make_histogram(gmx_stats_t gstats,
- real binwidth,
- int* nb,
- int ehisto,
- int normalized,
- real** x,
- real** y)
-{
- gmx_stats* stats = gstats;
- int index = 0, nbins = *nb, *nindex;
- double minx, maxx, maxy, miny, delta, dd, minh;
-
- if (((binwidth <= 0) && (nbins <= 0)) || ((binwidth > 0) && (nbins > 0)))
- {
- return StatisticsStatus::InvalidInput;
- }
- if (stats->np <= 2)
- {
- return StatisticsStatus::NoPoints;
- }
- minx = maxx = stats->x[0];
- miny = maxy = stats->y[0];
- for (int i = 1; (i < stats->np); i++)
- {
- miny = (stats->y[i] < miny) ? stats->y[i] : miny;
- maxy = (stats->y[i] > maxy) ? stats->y[i] : maxy;
- minx = (stats->x[i] < minx) ? stats->x[i] : minx;
- maxx = (stats->x[i] > maxx) ? stats->x[i] : maxx;
- }
- if (ehisto == ehistoX)
- {
- delta = maxx - minx;
- minh = minx;
- }
- else if (ehisto == ehistoY)
- {
- delta = maxy - miny;
- minh = miny;
- }
- else
- {
- return StatisticsStatus::InvalidInput;
- }
-
- if (binwidth == 0)
- {
- binwidth = (delta) / nbins;
- }
- else
- {
- nbins = gmx_dnint((delta) / binwidth + 0.5);
- }
- snew(*x, nbins);
- snew(nindex, nbins);
- for (int i = 0; (i < nbins); i++)
- {
- (*x)[i] = minh + binwidth * (i + 0.5);
- }
- if (normalized == 0)
- {
- dd = 1;
- }
- else
- {
- dd = 1.0 / (binwidth * stats->np);
- }
-
- snew(*y, nbins);
- for (int i = 0; (i < stats->np); i++)
- {
- if (ehisto == ehistoY)
- {
- index = static_cast<int>((stats->y[i] - miny) / binwidth);
- }
- else if (ehisto == ehistoX)
- {
- index = static_cast<int>((stats->x[i] - minx) / binwidth);
- }
- if (index < 0)
- {
- index = 0;
- }
- if (index > nbins - 1)
- {
- index = nbins - 1;
- }
- (*y)[index] += dd;
- nindex[index]++;
- }
- if (*nb == 0)
- {
- *nb = nbins;
- }
- for (int i = 0; (i < nbins); i++)
- {
- if (nindex[i] > 0)
- {
- (*y)[i] /= nindex[i];
- }
- }
-
- sfree(nindex);
-
- return StatisticsStatus::Ok;
-}
-
static const char* enumValueToString(StatisticsStatus enumValue)
{
constexpr gmx::EnumerationArray<StatisticsStatus, const char*> statisticsStatusNames = {
GMX_ASSERT(estats == StatisticsStatus::Ok, enumValueToString(estats));
}
-/* Old convenience functions, should be merged with the core
- statistics above. */
-StatisticsStatus lsq_y_ax(int n, real x[], real y[], real* a)
-{
- gmx_stats_t lsq = gmx_stats_init();
- real da, chi2, Rfit;
-
- gmx_stats_add_points(lsq, n, x, y, nullptr, nullptr);
- StatisticsStatus ok = gmx_stats_get_a(lsq, elsqWEIGHT_NONE, a, &da, &chi2, &Rfit);
- gmx_stats_free(lsq);
-
- return ok;
-}
-
static StatisticsStatus
low_lsq_y_ax_b(int n, const real* xr, const double* xd, real yr[], real* a, real* b, real* r, real* chi2)
{
elsqWEIGHT_NR
};
-//! Enum determining which coordinate to histogram
-enum
-{
- ehistoX,
- ehistoY,
- ehistoNR
-};
-
/*! \brief
* Initiate a data structure
* \return the data structure
*/
void gmx_stats_free(gmx_stats_t stats);
-/*! \brief
- * Remove outliers from a straight line, where level in units of
- * sigma. Level needs to be larger than one obviously.
- * \param[in] stats The data structure
- * \param[in] level The sigma level
- * \return error code
- */
-StatisticsStatus gmx_stats_remove_outliers(gmx_stats_t stats, double level);
-
/*! \brief
* Add a point to the data set
* \param[in] stats The data structure
*/
StatisticsStatus gmx_stats_add_point(gmx_stats_t stats, double x, double y, double dx, double dy);
-/*! \brief
- * Add a series of datapoints at once. The arrays dx and dy may
- * be NULL in that case zero uncertainties will be assumed.
- *
- * \param[in] stats The data structure
- * \param[in] n Number of points
- * \param[in] x The array of x values
- * \param[in] y The array of y values
- * \param[in] dx The error in the x value
- * \param[in] dy The error in the y value
- * \return error code
- */
-StatisticsStatus gmx_stats_add_points(gmx_stats_t stats, int n, real* x, real* y, real* dx, real* dy);
-
-/*! \brief
- * Delivers data points from the statistics.
- *
- * Should be used in a while loop. Variables for either
- * pointer may be NULL, in which case the routine can be used as an
- * expensive point counter.
- * Return the data points one by one. Return estatsOK while there are
- * more points, and returns estatsNOPOINTS when the last point has
- * been returned.
- * If level > 0 then the outliers outside level*sigma are reported
- * only.
- * \param[in] stats The data structure
- * \param[out] x The array of x values
- * \param[out] y The array of y values
- * \param[out] dx The error in the x value
- * \param[out] dy The error in the y value
- * \param[in] level sigma level (see above)
- * \return error code
- */
-StatisticsStatus gmx_stats_get_point(gmx_stats_t stats, real* x, real* y, real* dx, real* dy, real level);
-
/*! \brief
* Fit the data to y = ax + b, possibly weighted, if uncertainties
* have been input. da and db may be NULL.
StatisticsStatus
gmx_stats_get_ab(gmx_stats_t stats, int weight, real* a, real* b, real* da, real* db, real* chi2, real* Rfit);
-/*! \brief
- * Fit the data to y = ax, possibly weighted, if uncertainties have
- * have been input. da and db may be NULL.
- * \param[in] stats The data structure
- * \param[in] weight type of weighting
- * \param[out] a slope
- * \param[out] da sigma in a
- * \param[out] chi2 normalized quality of fit
- * \param[out] Rfit correlation coefficient
- * \return error code
- */
-StatisticsStatus gmx_stats_get_a(gmx_stats_t stats, int weight, real* a, real* da, real* chi2, real* Rfit);
-
-/*! \brief
- * Get the correlation coefficient.
- * \param[in] stats The data structure
- * \param[out] R the correlation coefficient between the data (x and y) as input to the structure.
- * \return error code
- */
-StatisticsStatus gmx_stats_get_corr_coeff(gmx_stats_t stats, real* R);
-
-/*! \brief
- * Get the root mean square deviation.
- * \param[in] stats The data structure
- * \param[out] rmsd the root mean square deviation between x and y values.
- * \return error code
- */
-StatisticsStatus gmx_stats_get_rmsd(gmx_stats_t stats, real* rmsd);
-
-/*! \brief
- * Get the number of points.
- * \param[in] stats The data structure
- * \param[out] N number of data points
- * \return error code
- */
-StatisticsStatus gmx_stats_get_npoints(gmx_stats_t stats, int* N);
-
/*! \brief
* Computes and returns the average value.
* \param[in] stats The data structure
*/
StatisticsStatus gmx_stats_get_average(gmx_stats_t stats, real* aver);
-/*! \brief
- * Computes and returns the standard deviation.
- * \param[in] stats The data structure
- * \param[out] sigma Standard deviation
- * \return error code
- */
-StatisticsStatus gmx_stats_get_sigma(gmx_stats_t stats, real* sigma);
-
-/*! \brief
- * Computes and returns the standard error.
- * \param[in] stats The data structure
- * \param[out] error Standard error
- * \return error code
- */
-StatisticsStatus gmx_stats_get_error(gmx_stats_t stats, real* error);
-
/*! \brief
* Pointers may be null, in which case no assignment will be done.
* \param[in] stats The data structure
*/
StatisticsStatus gmx_stats_get_ase(gmx_stats_t stats, real* aver, real* sigma, real* error);
-/*! \brief
- * Dump the x, y, dx, dy data to a text file
- * \param[in] stats The data structure
- * \param[in] fp File pointer
- * \return error code
- */
-StatisticsStatus gmx_stats_dump_xy(gmx_stats_t stats, FILE* fp);
-
-/*! \brief
- * Make a histogram of the data present.
- *
- * Uses either binwidth to
- * determine the number of bins, or nbins to determine the binwidth,
- * therefore one of these should be zero, but not the other. If *nbins = 0
- * the number of bins will be returned in this variable. ehisto should be one of
- * ehistoX or ehistoY. If
- * normalized not equal to zero, the integral of the histogram will be
- * normalized to one. The output is in two arrays, *x and *y, to which
- * you should pass a pointer. Memory for the arrays will be allocated
- * as needed. Function returns one of the estats codes.
- * \param[in] stats The data structure
- * \param[in] binwidth For the histogram
- * \param[in] nbins Number of bins
- * \param[in] ehisto Type (see enum above)
- * \param[in] normalized see above
- * \param[out] x see above
- * \param[out] y see above
- * \return error code
- */
-StatisticsStatus gmx_stats_make_histogram(gmx_stats_t stats,
- real binwidth,
- int* nbins,
- int ehisto,
- int normalized,
- real** x,
- real** y);
-
/*! \brief Assert that statistics are OK
*
* \param[in] estats error code
* Some statistics utilities for convenience: useful when a complete data
* set is available already from another source, e.g. an xvg file.
****************************************************/
-/*! \brief
- * Fit a straight line y=ax thru the n data points x, y, return the
- * slope in *a.
- * \param[in] n number of points
- * \param[in] x data points x
- * \param[in] y data point y
- * \param[out] a slope
- * \return error code
- */
-StatisticsStatus lsq_y_ax(int n, real x[], real y[], real* a);
/*! \brief
* Fit a straight line y=ax+b thru the n data points x, y.