#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2015,2016,2017, by the GROMACS development team, led by
+# Copyright (c) 2015,2016,2017,2019, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
context.build_target(target='man')
context.build_target(target='completion')
context.build_target(target='install-guide')
+ context.build_target(target='checksum-files')
context.build_target(target='package_source')
--- /dev/null
+#! /usr/bin/env python
+# This file is part of the GROMACS molecular simulation package.
+#
+# Copyright (c) 2019, by the GROMACS development team, led by
+# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+# and including many others, as listed in the AUTHORS file in the
+# top-level source directory and at http://www.gromacs.org.
+#
+# GROMACS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1
+# of the License, or (at your option) any later version.
+#
+# GROMACS is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with GROMACS; if not, see
+# http://www.gnu.org/licenses, or write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# If you want to redistribute modifications to GROMACS, please
+# consider that scientific software is very special. Version
+# control is crucial - bugs must be traceable. We will be happy to
+# consider code for inclusion in the official distribution, but
+# derived work must not be called official GROMACS. Details are found
+# in the README & COPYING files - if they are missing, get the
+# official version at http://www.gromacs.org.
+#
+# To help us fund GROMACS development, we humbly ask that you cite
+# the research papers on the package. Check out http://www.gromacs.org.
+import hashlib, hmac, os, stat, sys, re
+from re import search
+
+"""
+Calculate hash of files in build tree to allow checking against
+stored hashes in case of the tree not being in git (e.g. if the
+program is build from a release tarball.
+
+Based on example script found here:
+ https://unix.stackexchange.com/a/35847
+"""
+
+def is_in_whitelist(name):
+ """Return true if file is white listed to be included in hash calculation."""
+ in_whitelist = False
+ whitelist = ["\.cpp$", "\.h$", "\.cuh$", "\.cu$", "\.clh$", "CMakeList.txt$", "\.cmake$", "\.in$", "\.cmakein$", "\.py$"]
+ for item in whitelist:
+ if search(item, name):
+ in_whitelist = True
+ break
+
+ return in_whitelist
+
+def is_blacklisted(name):
+ """Return if a file has been explicitly blacklisted.
+
+ """
+ is_blacklisted = False
+ blacklist = ["gmx-completion"]
+ for item in blacklist:
+ if search(item, name):
+ is_blacklisted = True
+ break
+
+ return is_blacklisted
+
+def file_hash(name):
+ """Return the hash of the contents of the specified file, as a hex string
+
+ Reads file in chunks of 16384 bytes and calculates the hash of the complete
+ file afterwards.
+ The hashing algorithm used is sha256, to avoid accidental clashes when using
+ a more simple algorithm such as md5.
+ """
+ f = open(name, 'rb')
+ h = hashlib.sha256()
+ while True:
+ buf = f.read(16384)
+ if len(buf) == 0: break
+ h.update(buf)
+ f.close()
+ return h.hexdigest()
+
+def traverse(h, path, original_path):
+ """Recursive function to traverse a file path until a regular file is found.
+ Walks down the path given as the input and updates the hash function with
+ information of new files that are found on bottom of the list.
+
+ Information used to calculate the hash are the name and the contents of the file.
+ Uses both absolute and relative path to make sure only the relative path is used
+ to calculate the hash.
+
+ Ignores files that are not in the white-list and also skips files that are
+ explicitly blacklisted.
+ Other things that are ignored are symlinks and all kinds of special files.
+ """
+ rs = os.lstat(path)
+ quoted_name = repr(os.path.relpath(path, original_path))
+ if stat.S_ISDIR(rs.st_mode):
+ for entry in sorted(os.listdir(path)):
+ traverse(h, os.path.join(path, entry), original_path)
+ elif stat.S_ISREG(rs.st_mode):
+ # Only test files that actually take part in building GROMACS
+ if (is_in_whitelist(path) and not is_blacklisted(path)):
+ fullname = 'reg ' + quoted_name + ' '
+ fullname += str(rs.st_size) + ' '
+ fullname += file_hash(path) + '\n'
+ h.update(fullname.encode('utf-8'))
+ else: pass # silently symlinks and other special files
+
+def main():
+ """Run the hashing script.
+
+ Takes single directory to hash files in.
+
+ """
+ import os
+ import sys
+ import argparse
+
+ parser = argparse.ArgumentParser(description='Hash all white listed files in a single directory')
+ parser.add_argument('-s',
+ '--source-root',
+ help='Source tree directory, can be specified multiple times to get several directories hashed',
+ nargs='*',
+ required=True)
+ parser.add_argument('-o',
+ '--output-file',
+ help='File to write hash to.',
+ default='hashresult')
+
+ args = parser.parse_args()
+
+ outfile_path = args.output_file
+ h = hashlib.sha256()
+ for input_sources in args.source_root:
+ traverse(h, input_sources, input_sources)
+
+ end = 'end\n'
+ h.update(end.encode('utf-8'))
+ outputfile = open(outfile_path, 'w')
+ outputfile.write(h.hexdigest())
+
+if __name__ == '__main__':
+ main()
+
#
# This file is part of the GROMACS molecular simulation package.
#
-# Copyright (c) 2014,2015, by the GROMACS development team, led by
+# Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
# and including many others, as listed in the AUTHORS file in the
# top-level source directory and at http://www.gromacs.org.
set(GMX_VERSION_STRING_FULL "@GMX_VERSION_STRING_FULL@")
set(GMX_VERSION_FULL_HASH "@GMX_VERSION_FULL_HASH@")
set(GMX_VERSION_CENTRAL_BASE_HASH "@GMX_VERSION_CENTRAL_BASE_HASH@")
+set(GMX_RELEASE_SOURCE_FILE_CHECKSUM "@GMX_RELEASE_SOURCE_FILE_CHECKSUM@")
+set(GMX_CURRENT_SOURCE_FILE_CHECKSUM "@GMX_CURRENT_SOURCE_FILE_CHECKSUM@")
# the function below.
set(VERSION_INFO_CMAKEIN_FILE ${CMAKE_CURRENT_LIST_DIR}/VersionInfo.cmake.cmakein)
set(VERSION_INFO_CONFIGURE_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/gmxConfigureVersionInfo.cmake)
+# A set of directories to scan for calculating the hash of source files.
+set(SET_OF_DIRECTORIES_TO_CHECKSUM "${PROJECT_SOURCE_DIR}/src")
+list(APPEND SET_OF_DIRECTORIES_TO_CHECKSUM "${PROJECT_SOURCE_DIR}/python_packaging")
+# Try to find python for the checksumming script
+set(PythonInterp_FIND_QUIETLY ON)
+find_package(PythonInterp 3.5)
# Rules to create the VersionInfo.cmake file.
# For git info, the sequence is:
set(GMX_VERSION_STRING_FULL ${GMX_VERSION_STRING})
set(GMX_VERSION_FULL_HASH "")
set(GMX_VERSION_CENTRAL_BASE_HASH "")
+ # To notify the user during compilation and at runtime that the build source
+ # has not been modified after unpacking the source tarball, the contents are hashed
+ # to be compared to a hash computed during the release process. If the hash matches
+ # all is fine and the user gets a message in the log file indicating that.
+ # If either the release hash file is missing, or if the hash does not match
+ # a different message is printed to indicate that the source has been changed
+ # compared to the version actually released. This is not needed in case a build
+ # is done in git, as we have the information there already.
+ # This is not done if the user has explicitly set an additional custom version string with
+ # -DGMX_VERSION_STRING_OF_FORK, as this indicates that they are knowing that a custom
+ # version of GROMACS is in use.
+ set(RELEASE_CHECKSUM_FILE "${PROJECT_SOURCE_DIR}/src/reference_checksum")
+ if(NOT GMX_VERSION_STRING_OF_FORK OR "${GMX_VERSION_STRING_OF_FORK}" STREQUAL "")
+ if(EXISTS ${RELEASE_CHECKSUM_FILE} AND PythonInterp_FOUND)
+ file(READ ${RELEASE_CHECKSUM_FILE} GMX_RELEASE_SOURCE_FILE_CHECKSUM)
+ string(STRIP ${GMX_RELEASE_SOURCE_FILE_CHECKSUM} GMX_RELEASE_SOURCE_FILE_CHECKSUM)
+ set(CHECKSUM_RESULT_FILE "${CMAKE_CURRENT_BINARY_DIR}/computed_checksum")
+ execute_process(COMMAND ${PYTHON_EXECUTABLE}
+ ${PROJECT_SOURCE_DIR}/admin/createFileHash.py
+ -s ${SET_OF_DIRECTORIES_TO_CHECKSUM}
+ -o ${CHECKSUM_RESULT_FILE}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ OUTPUT_QUIET)
+ file(READ ${CHECKSUM_RESULT_FILE} GMX_CURRENT_SOURCE_FILE_CHECKSUM)
+ string(STRIP ${GMX_CURRENT_SOURCE_FILE_CHECKSUM} GMX_CURRENT_SOURCE_FILE_CHECKSUM)
+ if(NOT ${GMX_RELEASE_SOURCE_FILE_CHECKSUM} STREQUAL ${GMX_CURRENT_SOURCE_FILE_CHECKSUM})
+ set(GMX_VERSION_STRING_FULL "${GMX_VERSION_STRING_FULL}_MODIFIED")
+ message(STATUS "The source code for this GROMACS installation is different from the officially released version.")
+ endif()
+ elseif(PythonInterp_FOUND)
+ set(GMX_VERSION_STRING_FULL "${GMX_VERSION_STRING_FULL}_UNCHECKED")
+ set(GMX_RELEASE_SOURCE_FILE_CHECKSUM "NoChecksumFile")
+ set(GMX_CURRENT_SOURCE_FILE_CHECKSUM "NoChecksumFile")
+ message(WARNING "Could not valdiate the GROMACS source due to missing reference checksum file.")
+ else()
+ set(GMX_VERSION_STRING_FULL "${GMX_VERSION_STRING_FULL}_UNCHECKED")
+ set(GMX_RELEASE_SOURCE_FILE_CHECKSUM "NoPythonAvailable")
+ set(GMX_CURRENT_SOURCE_FILE_CHECKSUM "NoPythonAvailable")
+ message(STATUS "Could not calculate checksum of source files without Python")
+ endif()
+ endif()
configure_file(${VERSION_INFO_CMAKEIN_FILE} ${VERSION_INFO_CMAKE_FILE})
endif()
unset(GMX_VERSION_STRING_FULL)
unset(GMX_VERSION_FULL_HASH)
unset(GMX_VERSION_CENTRAL_BASE_HASH)
+unset(GMX_RELEASE_SOURCE_FILE_CHECKSUM)
+unset(GMX_CURRENT_SOURCE_FILE_CHECKSUM)
+
+
+# What file the checksum should be written to
+set(CHECKSUM_FILE "${PROJECT_SOURCE_DIR}/src/reference_checksum")
+
+# Target that allows checksumming a source tree when producing a tarball.
+# Allows verification of builds from the tarball to make sure the source had
+# not been tampered with.
+# Note: The RUN_ALWAYS here is to regenerate the hash file only, it does not
+# mean that the target is run in all builds
+if (PYTHONINTERP_FOUND)
+ gmx_add_custom_output_target(checksum-files RUN_ALWAYS
+ OUTPUT ${CHECKSUM_FILE}
+ COMMAND ${PYTHON_EXECUTABLE}
+ ${PROJECT_SOURCE_DIR}/admin/createFileHash.py
+ -s ${SET_OF_DIRECTORIES_TO_CHECKSUM}
+ -o ${CHECKSUM_FILE}
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMENT "Generating checksum of source files")
+else()
+ add_custom_target(checksum-files
+ COMMAND ${CMAKE_COMMAND} -E echo
+ "Can not checksum files without python being available"
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+ COMMENT "Generating checksum of source files")
+endif()
# The main user-visible interface to the machinery.
# See documentation at the top of the script.
compiler. Both clang and gcc will work, but they produce lower
performance and each have some shortcomings. clang 3.8 now offers
support for OpenMP, and so may provide decent performance.
-The CMake variable ``CMAKE_OSX_DEPLOYMENT_TARGET`` influences CMake's
-choice of C++ stdlib implementation. Setting to ``10.9`` (default) or
-higher is the simplest way to find a compatible compiler and stdlib
-implementation.
For all non-x86 platforms, your best option is typically to use gcc or
the vendor's default or recommended compiler, and check for
you try a few different parallelization options, and experiment with
tools such as ``gmx tune_pme``.
+Validating |Gromacs| for source code modifications
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When building |Gromacs| from a release tarball, the build process automatically
+checks if any file contributing to the build process have been modified since they have
+been packed in the archive. This results in the marking of the version as either ``MODIFIED``
+(if the source files have been modified) or ``UNCHECKED`` (if no validation was possible, e.g.
+if no Python installation was found). The actual checking is performed by comparing a checksum
+stored in the release tarball against one generated by the ``createFileHash.py`` Python script
+during the build configuration. When running a |Gromacs| binary, the checksum is also printed
+in the log file, together with a message if there is a mismatch or no validation has been possible.
+
+This allows users to check whether the binary they are using was built from source code that is
+identical to the source code released by the |Gromacs| team. Thus unintentional modifications
+to the source code for building binaries that are used for running production simulations
+are easily detectable. Additionally, by manually setting a version tag using the
+GMX_VERSION_STRING_OF_FORK cmake option, users can mark a modified |Gromacs| release
+code with their custom version string suffix.
+
Having difficulty?
^^^^^^^^^^^^^^^^^^
GMX_VERSION_STRING_OF_FORK in the source code (or if necessary when
running CMake). It will then appear in the log file and users will know
which version and fork of the code produced the result.
+
+Provide checksum to validate release tarballs
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Released versions of |Gromacs| will now provide a checksum calculated
+from the files participating in building the binaries. When building
+|Gromacs| from the tarball, the files will be checksummed again and
+compared against the checksum generated during the release build. If the
+checksums don't match, the version string is modified to indicate that
+the source tree has been modified, and the information is printed in the
+log files for the users. If checksumming has not been possible (either due
+to missing Python during installation, or because the original checksum file
+is missing), this is indicated through a different version string.
+
+:issue:`2128`
+
REMOTE_HASH
EXTRA_VARS
GMX_SOURCE_DOI
+ GMX_RELEASE_HASH
+ GMX_SOURCE_HASH
)
list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE})
const char _gmx_full_git_hash[] = "@GMX_VERSION_FULL_HASH@";
const char _gmx_central_base_hash[] = "@GMX_VERSION_CENTRAL_BASE_HASH@";
const char gmxSourceDoiString[] = "@GMX_SOURCE_DOI@";
+const char gmxReleaseSourceFileChecksum[] = "@GMX_RELEASE_SOURCE_FILE_CHECKSUM@";
+const char gmxCurrentSourceFileChecksum[] = "@GMX_CURRENT_SOURCE_FILE_CHECKSUM@";
return gmxSourceDoiString;
}
+const char *gmxReleaseSourceChecksum()
+{
+ return gmxReleaseSourceFileChecksum;
+}
+
+const char *gmxCurrentSourceChecksum()
+{
+ return gmxCurrentSourceFileChecksum;
+}
+
#if GMX_DOUBLE
void gmx_is_double_precision()
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015,2018, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2018,2019, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
*/
const char *gmxDOI();
+/*! \brief
+ * Hash of the complete source released in the tarball.
+ *
+ * Empty when not a release tarball build.
+ */
+const char *gmxReleaseSourceChecksum();
+
+/*! \brief
+ * Hash of the complete source actually used when building.
+ *
+ * Always computed when building from tarball.
+ */
+const char *gmxCurrentSourceChecksum();
+
#endif
* referencing of different \Gromacs releases.
*/
extern const char gmxSourceDoiString[];
+//! Sha256 checksum of source and header files, populated for release builds.
+extern const char gmxReleaseSourceFileChecksum[];
+//! Sha256 checksum of source and header files, populated for builds from tarball.
+extern const char gmxCurrentSourceFileChecksum[];
//! \}
//! \endcond
{
writer->writeLine(formatString("Branched from: %s", base_hash));
}
+ const char *const releaseSourceChecksum = gmxReleaseSourceChecksum();
+ const char *const currentSourceChecksum = gmxCurrentSourceChecksum();
+ if (releaseSourceChecksum[0] != '\0')
+ {
+ if (std::strcmp(releaseSourceChecksum, "NoChecksumFile") == 0)
+ {
+ writer->writeLine(formatString("The source code this program was compiled from has not been verified because the reference checksum was missing during compilation. This means you have an incomplete GROMACS distribution, please make sure to download an intact source distribution and compile that before proceeding."));
+ writer->writeLine(formatString("Computed checksum: %s", currentSourceChecksum));
+ }
+ else if (std::strcmp(releaseSourceChecksum, "NoPythonAvailable") == 0)
+ {
+ writer->writeLine(formatString("Build source could not be verified, because the checksum could not be computed."));
+ }
+ else if (std::strcmp(releaseSourceChecksum, currentSourceChecksum) != 0)
+ {
+ writer->writeLine(formatString("This program has been built from source code that has been altered and does not match the code released as part of the official GROMACS version %s. If you did not intend to use an altered GROMACS version, make sure to download an intact source distribution and compile that before proceeding.", gmx_version()));
+ writer->writeLine(formatString("If you have modified the source code, you are strongly encouraged to set your custom version suffix (using -DGMX_VERSION_STRING_OF_FORK) which will can help later with scientific reproducibility but also when reporting bugs."));
+ writer->writeLine(formatString("Release checksum: %s", releaseSourceChecksum));
+ writer->writeLine(formatString("Computed checksum: %s", currentSourceChecksum));
+ }
+ else
+ {
+ writer->writeLine(formatString("Verified release checksum is %s", releaseSourceChecksum));
+ }
+ }
+
#if GMX_DOUBLE
writer->writeLine("Precision: double");