python_packaging/src/gmxapi/commandline.py

   1 #
   2 # This file is part of the GROMACS molecular simulation package.
   3 #
   4 # Copyright (c) 2019, by the GROMACS development team, led by
   5 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6 # and including many others, as listed in the AUTHORS file in the
   7 # top-level source directory and at http://www.gromacs.org.
   8 #
   9 # GROMACS is free software; you can redistribute it and/or
  10 # modify it under the terms of the GNU Lesser General Public License
  11 # as published by the Free Software Foundation; either version 2.1
  12 # of the License, or (at your option) any later version.
  13 #
  14 # GROMACS is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 # Lesser General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU Lesser General Public
  20 # License along with GROMACS; if not, see
  21 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23 #
  24 # If you want to redistribute modifications to GROMACS, please
  25 # consider that scientific software is very special. Version
  26 # control is crucial - bugs must be traceable. We will be happy to
  27 # consider code for inclusion in the official distribution, but
  28 # derived work must not be called official GROMACS. Details are found
  29 # in the README & COPYING files - if they are missing, get the
  30 # official version at http://www.gromacs.org.
  31 #
  32 # To help us fund GROMACS development, we humbly ask that you cite
  33 # the research papers on the package. Check out http://www.gromacs.org.
  34 """
  35 Provide command line operation.
  36 """
  37
  38 __all__ = ['commandline_operation']
  39
  40 import shutil
  41 import subprocess
  42
  43 import gmxapi as gmx
  44 from gmxapi import exceptions
  45 from gmxapi import logger as root_logger
  46 from gmxapi.datamodel import NDArray
  47 from gmxapi.operation import OutputCollectionDescription
  48
  49 # Module-level logger
  50 logger = root_logger.getChild('commandline')
  51 logger.info('Importing {}'.format(__name__))
  52
  53
  54 # Create an Operation that consumes a list and a boolean to produce a string and an integer.
  55 #
  56 # Wrap the defined function using a decorator that
  57 #    * strips the `output` parameter from the signature
  58 #    * provides `output` publishing proxy to the inner function and
  59 #    * produce a result with attributes for
  60 #       * file: mapping of output flags to output filenames
  61 #       * erroroutput: text results in case of error
  62 #       * returncode: integer return code of wrapped command
  63 #
  64 # Note that the existence of the 'file' output map is expressed here, but
  65 # the keys of the map are not implicit or set by the wrapped function.
  66 # For the map to be non-empty, it must be defined before the resulting helper
  67 # function is called.
  68 #
  69 # TODO: Operation returns the output object when called with the shorter signature.
  70 #
  71 @gmx.function_wrapper(output={'erroroutput': str, 'returncode': int})
  72 def cli(command: NDArray, shell: bool, output: OutputCollectionDescription):
  73     """Execute a command line program in a subprocess.
  74
  75     Configure an executable in a subprocess. Executes when run in an execution
  76     Context, as part of a work graph or via gmx.run(). Runs in the current
  77     working directory.
  78
  79     Shell processing is not enabled, but can be considered for a future version.
  80     This means that shell expansions such as environment variables, globbing (`*`),
  81     and other special symbols (like `~` for home directory) are not available.
  82     This allows a simpler and more robust implementation, as well as a better
  83     ability to uniquely identify the effects of a command line operation. If you
  84     think this disallows important use cases, please let us know.
  85
  86     Arguments:
  87          command : a tuple (or list) to be the subprocess arguments, including `executable`
  88          output : mapping of command line flags to output filename arguments
  89          shell : unused (provides forward-compatibility)
  90
  91     Arguments are iteratively added to the command line with standard Python
  92     iteration, so you should use a tuple or list even if you have only one parameter.
  93     I.e. If you provide a string with `arguments="asdf"` then it will be passed as
  94     `... "a" "s" "d" "f"`. To pass a single string argument, `arguments=("asdf")`
  95     or `arguments=["asdf"]`.
  96
  97     `input` and `output` should be a dictionary with string keys, where the keys
  98     name command line "flags" or options.
  99
 100     Example:
 101         Execute a command named `exe` that takes a flagged option for file name
 102         (stored in a local Python variable `my_filename`) and an `origin` flag
 103         that uses the next three arguments to define a vector.
 104
 105             >>> my_filename = "somefilename"
 106             >>> result = cli(('exe', '--origin', 1.0, 2.0, 3.0, '-f', my_filename), shell=False)
 107             >>> assert hasattr(result, 'file')
 108             >>> assert hasattr(result, 'erroroutput')
 109             >>> assert hasattr(result, 'returncode')
 110
 111     Returns:
 112         A data structure with attributes for each of the results `file`, `erroroutput`, and `returncode`
 113
 114     Result object attributes:
 115         * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
 116         * `erroroutput`: A string of error output (if any) if the process failed.
 117         * `returncode`: return code of the subprocess.
 118
 119     """
 120     # Note: we could make provisions for stdio filehandles in a future version. E.g.
 121     # * STDOUT is available if a consuming operation is bound to `output.stdout`.
 122     # * STDERR is available if a consuming operation is bound to `output.stderr`.
 123     # * Otherwise, STDOUT and/or STDERR is(are) closed when command is called.
 124     #
 125     # Warning:
 126     #     Commands relying on STDIN cannot be used and is closed when command is called.
 127
 128     # In the operation implementation, we expect the `shell` parameter to be intercepted by the
 129     # wrapper and set to False.
 130     if shell:
 131         raise exceptions.UsageError("Operation does not support shell processing.")
 132
 133     if isinstance(command, (str, bytes)):
 134         command = [command]
 135     command = list([arg for arg in command])
 136     try:
 137         command[0] = shutil.which(command[0])
 138     except Exception:
 139         raise exceptions.ValueError('command argument could not be resolved to an executable file path.')
 140
 141     # TODO: (FR9) Can OS input/output filehandles be a responsibility of
 142     #  the code providing 'resources'?
 143
 144     erroroutput = ''
 145     logger.debug('executing subprocess')
 146     try:
 147         # TODO: If Python >=3.5 is required, switch to subprocess.run()
 148         command_output = subprocess.check_output(command,
 149                                                  shell=shell,
 150                                                  stdin=subprocess.DEVNULL,
 151                                                  stderr=subprocess.STDOUT,
 152                                                  )
 153         returncode = 0
 154         # TODO: Resource management code should manage a safe data object for `output`.
 155         # WARNING: We have no reason to assume the output is utf-8 encoded text!!!
 156         for line in command_output.decode('utf-8').split('\n'):
 157             logger.debug(line)
 158     except subprocess.CalledProcessError as e:
 159         logger.info("commandline operation had non-zero return status when calling {}".format(e.cmd))
 160         erroroutput = e.output.decode('utf-8')
 161         returncode = e.returncode
 162     # resources.output.erroroutput.publish(erroroutput)
 163     # resources.output.returncode.publish(returncode)
 164     # `publish` is descriptive, but redundant. Access to the output data handler is
 165     # assumed to coincide with publishing, and we assume data is published when the
 166     # handler is released. A class with a single `publish` method is overly complex
 167     # since we can just use the assignment operator.
 168     output.erroroutput = erroroutput
 169     output.returncode = returncode
 170     # TODO: Handle the file output at the higher level wrapper.
 171     # output.file = None
 172
 173
 174 # TODO: (FR4) Make this a formal operation to properly handle gmxapi data dependencies.
 175 #  The consumer of this operation has an NDArray input. filemap may contain gmxapi data flow
 176 #  aspects that we want the framework to handle for us.
 177 def filemap_to_flag_list(filemap: dict = None):
 178     """Convert a map of command line flags and filenames to a list of command line arguments.
 179
 180     Used to map inputs and outputs of command line tools to and from gmxapi data handles.
 181     User provides mappings of flags and filenames so that gmxapi can construct an
 182     executable command line.
 183
 184     Primary use case is implicit. commandline_operation() instantiates this operation based on
 185     user input, and sends the output to cli()
 186
 187     Arguments:
 188         filemap : key-value map of command line flags and filename arguments
 189
 190     Returns:
 191         list of strings and/or gmxapi data references
 192     """
 193     result = []
 194     if filemap is not None:
 195         for key, value in filemap.items():
 196             # Note that the value may be a string, a list, an ndarray, or a future
 197             if not isinstance(value, (list, tuple, NDArray)):
 198                 if hasattr(value, 'result') and value.dtype == NDArray:
 199                     pass
 200                 elif hasattr(value, 'result') and value.dtype != NDArray:
 201                     # TODO: Fix this ugly hack when we have proper Future slicing and can make NDArray futures.
 202                     result_function = value.result
 203                     value.result = lambda function=result_function: [function()]
 204                 else:
 205                     value = [value]
 206             result = gmx.join_arrays(front=result, back=gmx.join_arrays(front=[key], back=value))
 207     return result
 208
 209
 210 # TODO: (FR4) Use generating function or decorator that can validate kwargs?
 211 # TODO: (FR4) Outputs need to be fully formed and typed in the object returned
 212 #  from the helper (decorated function).
 213 def commandline_operation(executable=None,
 214                           arguments=(),
 215                           input_files: dict = None,
 216                           output_files: dict = None,
 217                           **kwargs):
 218     """Helper function to define a new operation that executes a subprocess in gmxapi data flow.
 219
 220     Define a new Operation for a particular executable and input/output parameter set.
 221     Generate a chain of operations to process the named key word arguments and handle
 222     input/output data dependencies.
 223
 224     Arguments:
 225         executable : name of an executable on the path
 226         arguments : list of positional arguments to insert at argv[1]
 227         input_files : mapping of command-line flags to input file names
 228         output_files : mapping of command-line flags to output file names
 229
 230     Output:
 231         The output node of the resulting operation handle contains
 232         * `file`: the mapping of CLI flags to filename strings resulting from the `output` kwarg
 233         * `erroroutput`: A string of error output (if any) if the process failed.
 234         * `returncode`: return code of the subprocess.
 235
 236     """
 237
 238     # Implementation details: When used in a script, this function returns an
 239     # instance of an operation. However, because of the dynamic specification of
 240     # inputs and outputs, each invocation may have the overhead of defining new
 241     # types to express the data flow topology, regardless of the executable.
 242     # If this overhead is problematic, consider exposing the intermediate step
 243     # at which the Operation is fully specified to facilitate reuse.
 244
 245     ##
 246     # 1. Define a new operation with outputs from `cli()` plus `file` from `output_files`
 247
 248     # output_files is essentially passed through, but we need assurance that results
 249     # will not be published until the rest of the operation has run (i.e. the cli() executable.)
 250
 251     # Warning: decorating a local function like this is counter to the notion of Operations
 252     # as portable (importable, serializable/deserializable). The big picture here needs
 253     # some more consideration.
 254     # TODO: (NOW) Distinguish portable Operations from relocatable Futures.
 255     # There is nothing antithetical about objects implementing gmxapi data interfaces
 256     # that are only resolvable by a certain Context as long as that Context can convey
 257     # the results to another Context upon request. Re-instantiating Operations is
 258     # only one way of relocating Futures. In this case, though, the dynamic creation of
 259     # merged_ops doesn't seem right, and commandline_operation should probably be
 260     # a proper Operation.
 261     #
 262     # TODO: (FR4+) Characterize the `file` dictionary key type:
 263     #  explicitly sequences rather than maybe-string/maybe-sequence-of-strings
 264     @gmx.function_wrapper(output={'erroroutput': str, 'returncode': int, 'file': dict})
 265     def merged_ops(erroroutput: str = None, returncode: int = None, file: dict = None,
 266                    output: OutputCollectionDescription = None):
 267         assert erroroutput is not None
 268         assert returncode is not None
 269         assert file is not None
 270         assert output is not None
 271         output.file = file
 272         output.returncode = returncode
 273         output.erroroutput = erroroutput
 274
 275     ##
 276     # 2. Prepare data flow.
 277
 278     if input_files is None:
 279         input_files = {}
 280     if output_files is None:
 281         output_files = {}
 282     if isinstance(arguments, (str, bytes)):
 283         arguments = [arguments]
 284     command = gmx.concatenate_lists([[executable],
 285                                      arguments,
 286                                      filemap_to_flag_list(input_files),
 287                                      filemap_to_flag_list(output_files)])
 288     shell = gmx.make_constant(False)
 289     cli_args = {'command': command,
 290                 'shell': shell}
 291     cli_args.update(**kwargs)
 292
 293     ##
 294     # 3. Merge operations
 295     #
 296     # Note: Without a `label` argument, repeated calls to cli(**cli_args) should
 297     # produce references to the same unique resource. Creating this handle
 298     # separately should not be necessary, but we've got a way to go until we have the
 299     # fingerprinting and Context resource management we need for that.
 300     # TODO: ``label`` kwarg
 301     # TODO: input fingerprinting
 302     cli_result = cli(**cli_args)
 303     merged_result = merged_ops(erroroutput=cli_result.output.erroroutput,
 304                                returncode=cli_result.output.returncode,
 305                                file=output_files,
 306                                **kwargs)
 307
 308     # Return an object with an OutputCollection granting access to outputs of
 309     # cli() and of output_files (as "file")
 310     return merged_result