python_packaging/src/gmxapi/simulation/workflow.py

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2019, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34 #
  35 # This file is based on the Kasson Lab gmxapi project release 0.0.7.4.
  36 # https://github.com/kassonlab/gmxapi/blob/v0.0.7.4/src/gmx/workflow.py
  37 # # https://github.com/kassonlab/gmxapi/blob/v0.0.7.4/LICENSE
  38 """
  39 Provide workflow-level utilities and classes
  40 ============================================
  41
  42 Supports the implementation of operations in the gmxapi.simulation module.
  43 """
  44
  45 __all__ = ['from_tpr', 'WorkSpec', 'WorkElement']
  46
  47 import collections
  48 import warnings
  49 import weakref
  50 from typing import Text, Iterable, Set
  51
  52 import gmxapi as gmx
  53 from gmxapi import exceptions
  54
  55 # Module-level logger
  56 logger = gmx.logger.getChild('simulation.workflow')
  57 logger.info('Importing gmx.workflow')
  58
  59 # Work specification version string.
  60 workspec_version = "gmxapi_workspec_0_1"
  61 logger.info("Using schema version {}.".format(workspec_version))
  62
  63
  64 def to_utf8(input) -> bytes:
  65     """Return a utf8 encoded byte sequence of the Unicode ``input`` or its string representation.
  66
  67     Returns:
  68          :py:bytes byte sequence.
  69     """
  70     if isinstance(input, str):
  71         value = input.encode('utf-8')
  72     elif isinstance(input, bytes):
  73         value = input
  74     else:
  75         try:
  76             string = str(input)
  77             value = string.encode('utf-8')
  78         except Exception as e:
  79             raise exceptions.ValueError("Input cannot be interpreted as a UTF-8 compatible string.") from e
  80     return value
  81
  82
  83 def to_string(input) -> str:
  84     """Return a Unicode string representation of ``input``.
  85
  86     If ``input`` or its string representation is not already a Unicode object, attempt to decode as utf-8.
  87
  88     Returns a native string, decoding utf-8 encoded byte sequences if necessary.
  89     """
  90     if isinstance(input, str):
  91         value = input
  92     else:
  93         try:
  94             value = input.decode('utf-8')
  95         except Exception:
  96             try:
  97                 value = str(input)
  98             except Exception as e:
  99                 raise exceptions.ValueError("Cannot find a string representation of input.") from e
 100     return value
 101
 102
 103 class GmxMap(dict):
 104     """Utility/compatibility class to ensure consistent keys.
 105
 106     Internally, converts all keys to native str for the current interpreter.
 107     """
 108     def keys(self):
 109         for key in dict.keys(self):
 110             if not isinstance(key, str):
 111                 raise exceptions.ApiError('Invalid key type found: {} {}'.format(key, type(key)))
 112             yield key
 113
 114     def __getitem__(self, key):
 115         return super(GmxMap, self).__getitem__(str(key))
 116
 117     def __setitem__(self, key, item):
 118         super(GmxMap, self).__setitem__(str(key), item)
 119
 120     def __delitem__(self, key):
 121         super(GmxMap, self).__delitem__(str(key))
 122
 123
 124 class WorkSpec(object):
 125     """
 126     Container of workflow elements with data dependency
 127     information and requirements for execution.
 128
 129     An element cannot be added to a WorkSpec if it has dependencies that are not
 130     in the WorkSpec.
 131
 132     Work is added to the specification by passing a WorkElement object to
 133     :py:func:`WorkSpec.add_element()`.
 134     Any dependencies in the WorkElement must already be specified in the target WorkSpec.
 135
 136     When iterated over, a WorkSpec object returns WorkElement objects.
 137     WorkElement objects are yielded in a valid order to keep dependencies
 138     satisfied, but not necessarily the same order in which add_element()
 139     calls were originally made. In other words, the WorkSpec is a directed
 140     acyclic dependency graph, and its iterator returns nodes in an arbitrary
 141     but topologically correct order.
 142
 143     The string representation of a WorkSpec object is a valid JSON serialized data object.
 144
 145     The schema for version 0.1 of the specification allows data structures like
 146     the following.
 147     ::
 148
 149         {
 150             'version': 'gmxapi_workspec_0_1',
 151             'elements':
 152             {
 153                 'myinput':
 154                 {
 155                     'namespace': 'gromacs',
 156                     'operation': 'load_tpr',
 157                     'params': {'input': ['tpr_filename1', 'tpr_filename2']}
 158                 },
 159                 'mydata':
 160                 {
 161                     'namespace': 'gmxapi',
 162                     'operation': 'open_global_data_with_barrier',
 163                     'params': ['data_filename']
 164                 },
 165                 'mypotential':
 166                 {
 167                     'namespace': 'myplugin',
 168                     'operation': 'create_mdmodule',
 169                     'params': {...},
 170                     'depends': ['mydata']
 171                 },
 172                 'mysim':
 173                 {
 174                     'namespace': 'gmxapi',
 175                     'operation': 'md',
 176                     'depends': ['myinput', 'mypotential']
 177                 }
 178             }
 179         }
 180
 181     The first mapping (``version``) is required as shown. The ``elements`` map
 182     contains uniquely named elements specifying an operation, the operation's
 183     namespace, and parameters and dependencies of the operation for this element.
 184     ``depends`` is a sequence of string names of elements that are also in the
 185     work spec. ``params`` is a key-value map with string keys and values that
 186     are valid JSON data. ``namespace`` and ``operation`` are strings that the
 187     :py:class:`Context <gmx.context.Context>` can map to directors it uses to
 188     construct the session. Namespace ``gmxapi`` is reserved for operations
 189     specified by the API. Namespace ``gromacs`` is reserved for operations
 190     implemented as GROMACS adapters (versioned separately from gmxapi). The
 191     period character (".") has special meaning and should not be used in naming
 192     elements, namespaces, or operations.
 193
 194     """
 195     def __init__(self):
 196         self.version = workspec_version
 197         self.elements = GmxMap()
 198         self.__context_weak_ref = None
 199
 200     @property
 201     def _context(self):
 202         referent = None
 203         if self.__context_weak_ref is not None:
 204             referent = self.__context_weak_ref()
 205         return referent
 206
 207     @_context.setter
 208     def _context(self, context):
 209         # We're moving towards having the context own the work, so the work should
 210         # not own the context.
 211         self.__context_weak_ref = weakref.ref(context)
 212
 213     def __chase_deps(self, source_set: Set[str], name_list: Iterable[Text]):
 214         """Helper to recursively generate dependencies before dependents.
 215
 216         Given a set of WorkElement objects and a list of element names, generate WorkElements for
 217         the members of name_list plus their dependencies in an order such that dependencies are
 218         guaranteed to occur before their dependent elements.
 219
 220         For example, to sequence an entire work specification into a reasonable order for instantiation, use
 221
 222             >>> workspec.__chase_deps(set(workspec.elements.keys()), list(workspec.elements.keys()))
 223
 224         Note: as a member function of WorkSpec, we have access to the full WorkSpec elements data at all
 225         times, giving us extra flexibility in implementation and arguments.
 226
 227         Args:
 228             source_set: a (super)set of element names from the current work spec (will be consumed)
 229             name_list: subset of *sources* to be sequenced
 230
 231         Returns:
 232             Sequence of WorkElement objects drawn from the names in *source_set*
 233
 234         Requires that WorkElements named in *name_list* and any elements on which
 235         they depend are all named in *source_list* and available in the current
 236         work spec.
 237
 238         Warning: *source_set* is a reference to an object that is modified arbitrarily.
 239         The caller should not re-use the object after calling _chase_deps().
 240         (Make a copy first, if needed.)
 241
 242         TODO: Separate out DAG topology operations from here and Context.__enter__()
 243         Our needs are simple enough that we probably don't need an external dependency
 244         like networkx...
 245         """
 246         # Recursively (depth-first) generate a topologically valid serialized DAG from source_set.
 247         assert isinstance(source_set, set)
 248         if isinstance(name_list, (str, bytes)):
 249             warnings.warn('name_list appears to be a single name. Disambiguate a string by passing a list or tuple.')
 250         assert isinstance(name_list, collections.abc.Iterable)
 251
 252         # Make a copy of name_list in case the input reference is being used elsewhere during
 253         # iteration, such as for source_set, which is modified during the loop.
 254         for name in tuple(name_list):
 255             assert isinstance(name, str)
 256             if name in source_set:
 257                 source_set.remove(name)
 258                 element = WorkElement.deserialize(self.elements[name], name=name, workspec=self)
 259                 dependencies = element.depends
 260                 # items in element.depends are either element names or ensembles of element names.
 261                 for item in dependencies:
 262                     if isinstance(item, (list, tuple, set)):
 263                         dependency_list = item
 264                     else:
 265                         if not isinstance(item, str):
 266                             raise exceptions.ValueError(
 267                                 'Dependencies should be a string or sequence of strings. Got {}'.format(type(item)))
 268                         dependency_list = [item]
 269                     for dependency in dependency_list:
 270                         for recursive_dep in self.__chase_deps(source_set, (dependency,)):
 271                             yield recursive_dep
 272                 yield element
 273             else:
 274                 # Note: The user is responsible for ensuring that source_set is complete.
 275                 # Otherwise, we would need to maintain a list of elements previously yielded.
 276                 pass
 277
 278
 279     def __iter__(self):
 280         source_set = set(self.elements.keys())
 281         for element in self.__chase_deps(source_set, source_set):
 282             yield element
 283
 284     def __hash__(self):
 285         """Uniquely identify this work specification.
 286
 287         Allows the spec to be used as a dictionary key in Python. Note that this hash is possibly dependent on the
 288         Python implementation. It is not part of the gmxapi specification and should not be used outside of a single
 289         invocation of a script.
 290         """
 291         # Hash the serialized elements, concatenated as a single string. Note that the order of elements and their
 292         # contents is not guaranteed, but should be consistent within a script invocation.
 293         return hash(to_string(self.serialize()))
 294
 295     def add_element(self, element):
 296         """Add an element to a work specification if possible.
 297
 298         Adding an element to a WorkSpec must preserve the validity of the workspec, which involves several checks.
 299         We do not yet check for element uniqueness beyond a string name.
 300
 301         If an element is added that was previously in another WorkSpec, it must first be removed from the
 302         other WorkSpec.
 303         """
 304         if hasattr(element, "namespace") and hasattr(element, "operation") and hasattr(element, "serialize"):
 305             if not hasattr(element, "name") or element.name is None or len(str(element.name)) < 1:
 306                 raise exceptions.UsageError("Only named elements may be added to a WorkSpec.")
 307             if element.name in self.elements:
 308                 raise exceptions.UsageError("Elements in WorkSpec must be uniquely identifiable.")
 309             if hasattr(element, "depends"):
 310                 for dependency in element.depends:
 311                     if not dependency in self.elements:
 312                         raise exceptions.UsageError(
 313                             "Element dependencies must already be specified before an Element may be added.")
 314             # Okay, it looks like we have an element we can add
 315             if hasattr(element, "workspec") and element.workspec is not None and element.workspec is not self:
 316                 raise exceptions.Error(
 317                     "Element must be removed from its current WorkSpec to be added to this WorkSpec, but element "
 318                     "removal is not yet implemented.")
 319             self.elements[element.name] = element.serialize()
 320             element.workspec = self
 321         else:
 322             raise exceptions.ValueError(
 323                 "Provided object does not appear to be compatible with gmx.workflow.WorkElement.")
 324         logger.info("Added element {} to workspec.".format(element.name))
 325
 326     def serialize(self):
 327         """Serialize the work specification in a form suitable to pass to any Context implementation.
 328
 329         Serialization is performed with the JSON data serialization module.
 330
 331         To simplify unique identification of work specifications, this function will also impose rules for reproducibility.
 332
 333         1. All key-value maps are sorted alphanumerically by their string keys.
 334         2. Strings must consist of valid ASCII characters.
 335         3. Output is a byte sequence of the utf-8 encoded densely formatted JSON document.
 336
 337         Returns:
 338             ``unicode`` object in Python 2, ``bytes`` object in Python 3
 339
 340         Output of serialize() should be explicitly converted to a string before passing to a JSON deserializer.
 341
 342             >>> my_object = my_workspec.serialize()
 343             >>> my_data_structure = json.loads(my_object.decode('utf-8'))
 344             >>> # or...
 345             >>> my_data_structure = json.loads(my_object, encoding='utf-8')
 346
 347         """
 348         import json
 349         # Build the normalized dictionary
 350         dict_representation = {'version': self.version,
 351                                'elements': {}
 352                                }
 353         for name, element in [(e, json.loads(to_string(self.elements[e]))) for e in sorted(self.elements.keys())]:
 354             dict_representation['elements'][str(name)] = element
 355         serialization = json.dumps(dict_representation, ensure_ascii=True, sort_keys=True, separators=(',', ':'))
 356         return serialization.encode('utf-8')
 357
 358     @classmethod
 359     def deserialize(serialized):
 360         import json
 361         workspec = WorkSpec()
 362         dict_representation = json.loads(to_string(serialized))
 363         ver_in = dict_representation['version']
 364         ver_out = workspec.version
 365         if ver_in != ver_out:
 366             message = "Expected work spec version {}. Got work spec version {}.".format(ver_out, ver_in)
 367             raise exceptions.ValueError(message)
 368         for element in dict_representation['elements']:
 369             workspec.elements[element] = dict_representation['elements'][element]
 370         return workspec
 371
 372     def uid(self):
 373         """Get a unique identifier for this work specification.
 374
 375         Returns:
 376             hash value
 377
 378         Generate a cryptographic hash of this work specification that is guaranteed to match that of another equivalent
 379         work specification. The returned string is a 64-character hexadecimal encoded SHA-256 hash digest of the
 380         serialized WorkSpec.
 381
 382         The definition of equivalence is likely to evolve, but currently means a work spec of the
 383         same version with the same named elements containing the same operations, dependencies, and parameters, as
 384         represented in the serialized version of the work specification. Note that this does not include checks on the
 385         actual contents of input files or anything that does not appear in the work specification directly. Also, the
 386         hash is lossy, so it is remotely conceivable that two specs could have the same hash. The work specs
 387         should be compared before making any expensive decisions based on work spec equivalence, such as with hash(workspec).
 388
 389         Element names probably shouldn't be included in the unique identifying information (so that we can optimize out
 390         duplicated artifacts), but they are. A future API specification may add unique identification to the elements...
 391         """
 392         # Get an alphanumeric string of the checksum of the serialized work spec. SHA-256 should require about 43 characters
 393         # of base64 to represent, which seems reasonable. We need to replace some of the base64 characters to make them
 394         # filesystem friendly, though. Hexadecimal may be more friendly, but would require 64 characters.
 395         import hashlib
 396         data = to_utf8(self.serialize())
 397         result = hashlib.sha256(data)
 398         return result.hexdigest()
 399
 400     def __str__(self):
 401         """Generate string representation for str() or print().
 402
 403         The string output should look like the abstract schema for gmxapi_workspec_1_0, but the exact
 404         format is unspecified and may change in future versions.
 405
 406         For consistent JSON output, use WorkSpec.serialize().
 407         """
 408         import json
 409         string = to_string(self.serialize())
 410         data = json.loads(string)
 411         reserialized = json.dumps(data, indent=4, sort_keys=True)
 412         return str(reserialized)
 413
 414     def __repr__(self):
 415         """Generate Pythonic representation for repr(workspec)."""
 416         return 'gmx.workflow.WorkSpec()'
 417
 418 # A possible alternative name for WorkElement would be Operator, since there is a one-to-one
 419 # mapping between WorkElements and applications of "operation"s. We need to keep in mind the
 420 # sensible distinction between the WorkElement abstraction and the API objects and DAG nodes.
 421 class WorkElement(object):
 422     """Encapsulate an element of a work specification."""
 423     def __init__(self, namespace="gmxapi", operation=None, params=None, depends=()):
 424         self._namespace = str(to_string(namespace))
 425         # We can add an operations submodule to validate these. E.g. self.operation = gmx.workflow.operations.normalize(operation)
 426         if operation is not None:
 427             self._operation = str(to_string(operation))
 428         else:
 429             raise exceptions.UsageError("Invalid argument type for operation.")
 430
 431         # Note: Nothing currently prevents attribute updates by assignment after adding the element to a workspec,
 432         # but this protocol will be clarified with https://github.com/kassonlab/gmxapi/issues/92
 433         if params is None:
 434             self.params = GmxMap()
 435         elif isinstance(params, dict):
 436             self.params = GmxMap({to_string(name): params[name] for name in params})
 437         else:
 438             raise exceptions.UsageError("If provided, params must be a dictionary of keyword arguments")
 439         self.depends = []
 440         for d in depends:
 441             if isinstance(d, (list, tuple)):
 442                 self.depends.append([str(name) for name in d])
 443             else:
 444                 self.depends.append(str(d))
 445
 446         # The Python class for work elements keeps a strong reference to a WorkSpec object containing its description
 447         self._name = None
 448         self._workspec = None
 449
 450     @property
 451     def namespace(self):
 452         assert isinstance(self._namespace, str)
 453         return self._namespace
 454
 455     @property
 456     def operation(self):
 457         assert isinstance(self._operation, str)
 458         return self._operation
 459
 460     @property
 461     def name(self):
 462         assert isinstance(self._name, (str, type(None)))
 463         return self._name
 464
 465     @name.setter
 466     def name(self, new_name):
 467         self._name = str(to_string(new_name))
 468
 469     @property
 470     def workspec(self):
 471         return self._workspec
 472
 473     @workspec.setter
 474     def workspec(self, input):
 475         self._workspec = input
 476
 477     def add_dependency(self, element):
 478         """Add another element as a dependency.
 479
 480         First move the provided element to the same WorkSpec, if not already here.
 481         Then, add to ``depends`` and update the WorkSpec.
 482         """
 483         def check_element(element):
 484             if element.workspec is None:
 485                 self.workspec.add_element(element)
 486                 assert element.workspec is self.workspec
 487                 assert element.name in self.workspec.elements
 488             elif element.workspec is not self.workspec:
 489                 raise exceptions.ApiError("Element will need to be moved to the same workspec.")
 490             return True
 491
 492         if hasattr(element, 'workspec') and hasattr(element, 'name'):
 493             check_element(element)
 494             self.depends.append(element.name)
 495         else:
 496             assert isinstance(element, (list, tuple))
 497             self.depends.append(tuple([item.name for item in element if check_element(item)]))
 498
 499         self.workspec.elements[self.name] = self.serialize()
 500
 501     def serialize(self):
 502         """Create a byte sequence representation of the work element.
 503
 504         The WorkElement class exists just to provide convenient handles in Python. The WorkSpec is not actually a
 505         container of WorkElement objects.
 506
 507         Returns:
 508             Byte sequence of utf-8 encoded JSON document. May need to be decoded if needed as a (Unicode) string.
 509
 510         """
 511         import json
 512         output_dict = {'namespace': self.namespace,
 513                        'operation': self.operation,
 514                        'params': self.params,
 515                        'depends': self.depends
 516                        }
 517         serialization = json.dumps(output_dict)
 518         return to_utf8(serialization)
 519
 520     @classmethod
 521     def deserialize(cls, input, name=None, workspec=None):
 522         """Create a new WorkElement object from a serialized representation.
 523
 524         Arguments:
 525             input: a serialized WorkElement
 526             name: new element name (optional) (deprecated)
 527             workspec: an existing workspec to attach this element to (optional)
 528
 529         When subclasses become distinct, this factory function will need to do additional dispatching to create an object of the correct type.
 530         Alternatively, instead of subclassing, a slightly heavier single class may suffice, or more flexible duck typing might be better.
 531         """
 532         import json
 533         input_string = to_string(input)
 534         args = json.loads(input_string)
 535         element = cls(namespace=args['namespace'], operation=args['operation'], params=args['params'], depends=args['depends'])
 536         if name is not None:
 537             element.name = name
 538             # This conditional is nested because we can only add named elements to a WorkSpec.
 539             if workspec is not None:
 540                 element.workspec = workspec
 541                 if element.name not in workspec.elements:
 542                     workspec.add_element(element)
 543         return element
 544
 545 class SharedDataElement(WorkElement):
 546     """Work element with MD-specific extensions.
 547
 548     The schema may not need to be changed, but the API object may be expected to provide additional functionality.
 549     """
 550     def __init__(self, params, name=None):
 551         """Create a blank SharedDataElement representation.
 552
 553         It may be appropriate to insist on creating objects of this type via helpers or factories, particularly if
 554         creation requires additional parameters.
 555         """
 556         self.args = params['args']
 557         self.kwargs = params['kwargs']
 558         super(SharedDataElement, self).__init__(namespace="gmxapi",
 559                                                 operation="global_data",
 560                                                 params={'args': self.args, 'kwargs': self.kwargs})
 561         self.name = name
 562
 563
 564 def get_source_elements(workspec):
 565     """Get an iterator of the starting nodes in the work spec.
 566
 567     Source elements have no dependencies and can be processed immediately. Elements with dependencies
 568     cannot be processed, instantiated, or added to a work spec until after their dependencies have been.
 569
 570     Args:
 571         workspec : an existing work specification to analyze, such as by a Context implementation preparing to schedule work.
 572
 573     Returns:
 574         iterator of gmx.workflow.WorkElement objects that may be processed without dependencies.
 575
 576     This function is provided in the API to allow flexibility in how source elements are determined.
 577     """
 578     for name in workspec.elements:
 579         element_data = workspec.elements[name]
 580         element = WorkElement.deserialize(element_data)
 581         if len(element.depends) == 0:
 582             element.name = name
 583             element.workspec = workspec
 584             yield(element)
 585
 586
 587 def from_tpr(input=None, **kwargs):
 588     """Create a WorkSpec from a (list of) tpr file(s).
 589
 590     Generates a work specification based on the provided simulation input and returns a handle to the
 591     MD simulation element of the workflow. Key word arguments can override simulation behavior from
 592     ``input``.
 593
 594     If the MD operation discovers artifacts from a previous simulation that was launched from the same input,
 595     the simulation resumes from the last checkpointed step. If ``append_output`` is set ``False``, existing
 596     artifacts are kept separate from new output with the standard file naming convention,
 597     and new output begins from the last checkpointed step, if any.
 598
 599     Setting ``end_time`` redefines the end point of the simulation trajectory from what was provided in
 600     ``input``. It is equivalent to changing the number of steps requested in the MDP (or TPR) input, but
 601     the time is provided as picoseconds instead of a number of time steps.
 602
 603     .. deprecated:: 0.0.7
 604         If ``steps=N`` is provided and N is an integer
 605         greater than or equal to 1, the MD operation advances the trajectory by ``N`` steps, regardless of the number
 606         of simulation steps specified in ``input`` or ``end_time``. For convenience, setting ``steps=None`` does not override
 607         ``input``.
 608         Note that when it is not ``None``, ``steps`` takes precedence over ``end_time`` and ``input``, but can still be
 609         superceded by a signal, such as if an MD plugin or other code has a simulation completion condition that occurs
 610         before ``N`` additional steps have run.
 611
 612     Where key word arguments correspond to ``gmx mdrun`` command line options, the corresponding flags are noted below.
 613
 614     Keyword Arguments:
 615         input (str): *Required* string or list of strings giving the filename(s) of simulation input
 616         append_output (bool): Append output for continuous trajectories if True, truncate existing output data if False. (default True)
 617         end_time (float): Specify the final time in the simulation trajectory, overriding input read from TPR.
 618         grid (tuple): Domain decomposition grid divisions (nx, ny, nz). (-dd)
 619         max_hours (float): Terminate after 0.99 times this many hours if simulation is still running. (-maxh)
 620         pme_ranks (int): number of separate ranks to be used for PME electrostatics. (-npme)
 621         threads_per_pme_rank (int): Number of OpenMP threads per PME rank. (-ntomp_pme)
 622         steps (int): Override input files and run for this many steps. (-nsteps; deprecated)
 623         threads (int): Total number of threads to start. (-nt)
 624         threads_per_rank (int): number of OpenMP threads to start per MPI rank. (-ntomp)
 625         tmpi (int): number of thread-MPI ranks to start. (-ntmpi)
 626
 627     ..  versionchanged:: 0.1
 628         *pme_threads_per_rank* renamed to *threads_per_pme_rank*.
 629
 630     Returns:
 631         simulation member of a gmx.workflow.WorkSpec object
 632
 633     Produces a WorkSpec with the following data::
 634
 635         version: gmxapi_workspec_0_1
 636         elements:
 637             tpr_input:
 638                 namespace: gromacs
 639                 operation: load_tpr
 640                 params: {'input': ['tpr_filename1', 'tpr_filename2', ...]}
 641             md_sim:
 642                 namespace: gmxapi
 643                 operation: md
 644                 depends: ['tpr_input']
 645                 params: {'kw1': arg1, 'kw2': arg2, ...}
 646
 647     Bugs: version 0.0.6
 648         * There is not a way to programatically check the current step number on disk.
 649           See https://github.com/kassonlab/gmxapi/issues/56 and https://github.com/kassonlab/gmxapi/issues/85
 650     """
 651     import os
 652
 653     usage = "argument to from_tpr() should be a valid filename or list of filenames, followed by optional key word arguments."
 654
 655     # Normalize to tuple input type.
 656     if isinstance(input, list) or isinstance(input, tuple):
 657         tpr_list = tuple([to_string(element) for element in input])
 658     else:
 659         try:
 660             tpr_list = (to_string(input),)
 661         except:
 662             raise exceptions.UsageError(usage)
 663
 664     # Check for valid filenames
 665     for arg in tpr_list:
 666         if not (os.path.exists(arg) and os.path.isfile(arg)):
 667             arg_path = os.path.abspath(arg)
 668             raise exceptions.UsageError(usage + " Got {}".format(arg_path))
 669
 670     # Note: These are runner parameters, not MD parameters, and should be in the call to gmx.run() instead of here.
 671     # Reference https://github.com/kassonlab/gmxapi/issues/95
 672     params = {}
 673     for arg_key in kwargs:
 674         if arg_key == 'grid' or arg_key == 'dd':
 675             params['grid'] = tuple(kwargs[arg_key])
 676         elif arg_key == 'pme_ranks' or arg_key == 'npme':
 677             params['pme_ranks'] = int(kwargs[arg_key])
 678         elif arg_key == 'threads' or arg_key == 'nt':
 679             params['threads'] = int(kwargs[arg_key])
 680         elif arg_key == 'tmpi' or arg_key == 'ntmpi':
 681             params['tmpi'] = int(kwargs[arg_key])
 682         elif arg_key == 'threads_per_rank' or arg_key == 'ntomp':
 683             params['threads_per_rank'] = int(kwargs[arg_key])
 684         elif arg_key == 'pme_threads_per_rank' or arg_key == 'threads_per_pme_rank' or arg_key == 'ntomp_pme':
 685             # TODO: Remove this temporary accommodation.
 686             assert not gmx.version.api_is_at_least(0, 2)
 687             if arg_key == 'pme_threads_per_rank':
 688                 warnings.warn("Key word pme_threads_per_rank has been renamed to threads_per_pme_rank.",
 689                               DeprecationWarning)
 690             params['threads_per_pme_rank'] = int(kwargs[arg_key])
 691         elif arg_key == 'steps' or arg_key == 'nsteps':
 692             if kwargs[arg_key] is None:
 693                 # None means "don't override the input" which is indicated by a parameter value of -2 in GROMACS 2019
 694                 steps = -2
 695             else:
 696                 # Otherwise we require steps to be a positive integer
 697                 try:
 698                     steps = int(kwargs[arg_key])
 699                     if steps < 1:
 700                         raise exceptions.ValueError('steps to run must be at least 1')
 701                 except (TypeError, ValueError) as e:
 702                     # steps is not an integer.
 703                     raise exceptions.TypeError('"steps" could not be interpreted as an integer.')
 704                 # The "nsteps" command line flag will be removed in GROMACS 2020
 705                 # and so "steps" is deprecated in gmxapi 0.0.7
 706                 warnings.warn("`steps` keyword argument is deprecated. Consider `end_time` instead.",
 707                               DeprecationWarning)
 708             params['steps'] = steps
 709         elif arg_key == 'max_hours' or arg_key == 'maxh':
 710             params['max_hours'] = float(kwargs[arg_key])
 711         elif arg_key == 'append_output':
 712             # Try not to encourage confusion with the `mdrun` `-noappend` flag, which would be a confusing double negative if represented as a bool.
 713             params['append_output'] = bool(kwargs[arg_key])
 714         elif arg_key == 'end_time':
 715             params[arg_key] = float(kwargs[arg_key])
 716         else:
 717             raise exceptions.UsageError("Invalid key word argument: {}. {}".format(arg_key, usage))
 718
 719     # Create an empty WorkSpec
 720     workspec = WorkSpec()
 721
 722     # Create and add the Element for the tpr file(s)
 723     inputelement = WorkElement(namespace='gromacs', operation='load_tpr', params={'input': tpr_list})
 724     inputelement.name = 'tpr_input'
 725     if inputelement.name not in workspec.elements:
 726         # Operations such as this need to be replaced with accessors or properties that can check the validity of the WorkSpec
 727         workspec.elements[inputelement.name] = inputelement.serialize()
 728         inputelement.workspec = workspec
 729
 730     # Create and add the simulation element
 731     # We can add smarter handling of the `depends` argument, but it is only critical to check when adding the element
 732     # to a WorkSpec.
 733     mdelement = WorkElement(operation='md', depends=[inputelement.name], params=params)
 734     mdelement.name = 'md_sim'
 735     # Check that the element has not already been added, but that its dependency has.
 736     workspec.add_element(mdelement)
 737
 738     return mdelement