Include directive sorter
[alexxy/gromacs.git] / docs / doxygen / includesorter.py
1 #!/usr/bin/python
2 #
3 # This file is part of the GROMACS molecular simulation package.
4 #
5 # Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 # and including many others, as listed in the AUTHORS file in the
8 # top-level source directory and at http://www.gromacs.org.
9 #
10 # GROMACS is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU Lesser General Public License
12 # as published by the Free Software Foundation; either version 2.1
13 # of the License, or (at your option) any later version.
14 #
15 # GROMACS is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 # Lesser General Public License for more details.
19 #
20 # You should have received a copy of the GNU Lesser General Public
21 # License along with GROMACS; if not, see
22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24 #
25 # If you want to redistribute modifications to GROMACS, please
26 # consider that scientific software is very special. Version
27 # control is crucial - bugs must be traceable. We will be happy to
28 # consider code for inclusion in the official distribution, but
29 # derived work must not be called official GROMACS. Details are found
30 # in the README & COPYING files - if they are missing, get the
31 # official version at http://www.gromacs.org.
32 #
33 # To help us fund GROMACS development, we humbly ask that you cite
34 # the research papers on the package. Check out http://www.gromacs.org.
35
36 """Include directive sorter for GROMACS.
37
38 This module implements an #include directive sorter for GROMACS C/C++ files.
39 It allows (in most cases) automatically sorting includes and formatting
40 the paths to use either relative paths or paths relative to src/.
41 It groups includes in groups of related headers, sorts the headers
42 alphabetically within each block, and inserts empty lines in between.
43 The script requires an up-to-date list of installed headers and Doxygen XML
44 documentation to be present in the build tree.
45
46 The sorting assumes some conventions (e.g., that system headers are included
47 with angle brackets instead of quotes).  Generally, these conventions are
48 checked by the doxygen-check.py script.
49 """
50
51 import os.path
52 import re
53 import sys
54
55 class IncludeGroup(object):
56
57     """Enumeration type for grouping includes."""
58
59     def __init__(self, value):
60         """Initialize a IncludeGroup instance.
61
62         IncludeGroup.{main,system_c,...} should be used outside the
63         class instead of calling the constructor.
64         """
65         self._value = value
66
67     def __cmp__(self, other):
68         """Order include groups in the desired order."""
69         return cmp(self._value, other._value)
70
71 # gmxpre.h is always first
72 IncludeGroup.pre = IncludeGroup(0)
73 # "main" include file for the source file is next
74 IncludeGroup.main = IncludeGroup(1)
75 # config.h is next, if present, to keep its location consistent
76 IncludeGroup.config = IncludeGroup(2)
77 # Followed by system headers, with C first and C++ following
78 IncludeGroup.system_c = IncludeGroup(3)
79 IncludeGroup.system_c_cpp = IncludeGroup(4)
80 IncludeGroup.system_cpp = IncludeGroup(5)
81 # System headers not in standard C/C++ are in a separate block
82 IncludeGroup.system_other = IncludeGroup(6)
83 # src/external/ contents that are included with quotes go here
84 IncludeGroup.nonsystem_other = IncludeGroup(7)
85 # Other GROMACS headers
86 IncludeGroup.gmx_general = IncludeGroup(8)
87 # This group is for shared (unit) testing utilities
88 IncludeGroup.gmx_test = IncludeGroup(9)
89 # This group is for headers local to the including file/module
90 IncludeGroup.gmx_local = IncludeGroup(10)
91
92 class GroupedSorter(object):
93
94     """Grouping and formatting logic for #include directives.
95
96     This class implements the actual logic that decides how includes are
97     grouped and sorted, and how they are formatted."""
98
99     # These variables contain the list of system headers for various blocks
100     _std_c_headers = ['assert.h', 'ctype.h', 'errno.h', 'float.h',
101             'inttypes.h', 'limits.h', 'math.h', 'signal.h', 'stdarg.h',
102             'stddef.h', 'stdint.h', 'stdio.h', 'stdlib.h', 'string.h',
103             'time.h']
104     _std_c_cpp_headers = ['c' + x[:-2] for x in _std_c_headers]
105     _std_cpp_headers = ['algorithm', 'deque', 'exception', 'fstream',
106             'iomanip', 'ios', 'iosfwd', 'iostream', 'istream', 'iterator',
107             'limits', 'list', 'map', 'memory', 'new', 'numeric', 'ostream',
108             'regex', 'set', 'sstream', 'stdexcept', 'streambuf', 'string', 'strstream',
109             'typeinfo', 'vector', 'utility']
110
111     def __init__(self, style, absolute):
112         """Initialize a sorted with the given style."""
113         if style == 'single-group':
114             self._local_group = 'none'
115         elif style == 'pub-priv':
116             self._local_group = 'private'
117         else:
118             self._local_group = 'local'
119         if absolute:
120             self._abspath_main = True
121             self._abspath_local = True
122         else:
123             self._abspath_main = False
124             self._abspath_local = False
125
126     def _get_path(self, included_file, group, including_file):
127         """Compute include path to use for an #include.
128
129         The path is made either absolute (i.e., relative to src/), or
130         relative to the location of the including file, depending on the group
131         the file is in.
132         """
133         use_abspath = including_file is None or group is None
134         if not use_abspath:
135             if group in (IncludeGroup.gmx_general, IncludeGroup.gmx_test):
136                 use_abspath = True
137             elif group == IncludeGroup.main and self._abspath_main:
138                 use_abspath = True
139             elif group == IncludeGroup.gmx_local and self._abspath_local:
140                 use_abspath = True
141         if not use_abspath:
142             fromdir = os.path.dirname(including_file.get_abspath())
143             relpath = os.path.relpath(included_file.get_abspath(), fromdir)
144             if not relpath.startswith('..'):
145                 return relpath
146         path = included_file.get_relpath()
147         assert path.startswith('src/')
148         return path[4:]
149
150     def _get_gmx_group(self, including_file, included_file):
151         """Determine group for GROMACS headers.
152
153         Helper function to determine the group for an #include directive
154         when the #include is in one of the gmx_* groups (or in the main group).
155         """
156         main_header = including_file.get_main_header()
157         if main_header and main_header == included_file:
158             return IncludeGroup.main
159         if included_file.get_directory().get_name() == 'testutils':
160             return IncludeGroup.gmx_test
161         if including_file.get_directory().contains(included_file):
162             if self._local_group == 'local':
163                 return IncludeGroup.gmx_local
164             if self._local_group == 'private':
165                 if included_file.api_type_is_reliable() \
166                         and included_file.is_module_internal():
167                     return IncludeGroup.gmx_local
168                 if not included_file.api_type_is_reliable() \
169                         and including_file.get_relpath().startswith('src/programs'):
170                     return IncludeGroup.gmx_local
171         if included_file.is_test_file():
172             return IncludeGroup.gmx_test
173         return IncludeGroup.gmx_general
174
175     def get_sortable_object(self, include):
176         """Produce a sortable, opaque object for an include.
177
178         Includes are sorted by calling this function for each #include object,
179         and sorting the list made up of these objects (using the default
180         comparison operators).  Each element from the sorted list is then
181         passed to format_include(), which extracts information from the opaque
182         object and formats the #include directive for output.
183         """
184         included_file = include.get_file()
185         if not included_file:
186             path = include.get_included_path()
187             if path in self._std_c_headers:
188                 group = IncludeGroup.system_c
189             elif path in self._std_c_cpp_headers:
190                 group = IncludeGroup.system_c_cpp
191             elif path in self._std_cpp_headers:
192                 group = IncludeGroup.system_cpp
193             else:
194                 group = IncludeGroup.system_other
195         elif included_file.is_external():
196             group = IncludeGroup.nonsystem_other
197             if 'external/' in include.get_included_path():
198                 path = self._get_path(included_file, group, None)
199             else:
200                 path = include.get_included_path()
201         elif included_file.get_name() == 'gmxpre.h':
202             group = IncludeGroup.pre
203             path = self._get_path(included_file, group, None)
204         elif included_file.get_name() in ('config.h', 'gmx_header_config.h'):
205             group = IncludeGroup.config
206             path = self._get_path(included_file, group, None)
207         else:
208             including_file = include.get_including_file()
209             group = self._get_gmx_group(including_file, included_file)
210             path = self._get_path(included_file, group, including_file)
211         return (group, os.path.split(path), include)
212
213     def format_include(self, obj, prev, lines):
214         """Format an #include directive after sorting."""
215         result = []
216         if prev:
217             if prev[0] != obj[0]:
218                 # Print empty line between groups
219                 result.append('\n')
220             elif prev[1] == obj[1]:
221                 # Skip duplicates
222                 return result
223         include = obj[2]
224         line = lines[include.get_line_number()-1]
225         include_re = r'^(?P<head>\s*#\s*include\s+)["<][^">]*[">](?P<tail>.*)$'
226         match = re.match(include_re, line)
227         assert match
228         if include.is_system():
229             path = '<{0}>'.format(os.path.join(obj[1][0], obj[1][1]))
230         else:
231             path = '"{0}"'.format(os.path.join(obj[1][0], obj[1][1]))
232         result.append('{0}{1}{2}\n'.format(match.group('head'), path, match.group('tail')))
233         return result
234
235 class IncludeSorter(object):
236
237     """High-level logic for sorting includes.
238
239     This class contains the high-level logic for sorting include statements.
240     The actual ordering and formatting the includes is delegated to a sort method
241     (see GroupedSorter) to keep things separated.
242     """
243
244     def __init__(self, sortmethod, quiet):
245         """Initialize the include sorter with the given sorter and options."""
246         self._sortmethod = sortmethod
247         self._quiet = quiet
248         self._changed = False
249
250     def _sort_include_block(self, block, lines):
251         """Sort a single include block.
252
253         Returns a new list of lines for the block.
254         If anything is changed, self._changed is set to True, and the caller
255         can check that."""
256         includes = map(self._sortmethod.get_sortable_object, block.get_includes())
257         includes.sort()
258         result = []
259         prev = None
260         current_line_number = block.get_first_line()-1
261         for include in includes:
262             newlines = self._sortmethod.format_include(include, prev, lines)
263             result.extend(newlines)
264             if not self._changed:
265                 for offset, newline in enumerate(newlines):
266                     if lines[current_line_number + offset] != newline:
267                         self._changed = True
268                         break
269                 current_line_number += len(newlines)
270             prev = include
271         return result
272
273     def sort_includes(self, fileobj):
274         """Sort all includes in a file."""
275         lines = fileobj.get_contents()
276         # Format into a list first:
277         #  - avoid bugs or issues in the script truncating the file
278         #  - can check whether anything was changed before touching the file
279         newlines = []
280         prev = 0
281         self._changed = False
282         for block in fileobj.get_include_blocks():
283             newlines.extend(lines[prev:block.get_first_line()-1])
284             newlines.extend(self._sort_include_block(block, lines))
285             # The returned values are 1-based, but indexing here is 0-based,
286             # so an explicit +1 is not needed.
287             prev = block.get_last_line()
288         if self._changed:
289             if not self._quiet:
290                 sys.stderr.write('{0}: includes reformatted\n'.format(fileobj.get_relpath()))
291             newlines.extend(lines[prev:])
292             with open(fileobj.get_abspath(), 'w') as fp:
293                 fp.write(''.join(newlines))
294
295 def main():
296     """Run the include sorter script."""
297     import os
298     import sys
299
300     from optparse import OptionParser
301
302     from gmxtree import GromacsTree
303     from reporter import Reporter
304
305     parser = OptionParser()
306     parser.add_option('-S', '--source-root',
307                       help='Source tree root directory')
308     parser.add_option('-B', '--build-root',
309                       help='Build tree root directory')
310     parser.add_option('-F', '--files',
311                       help='Specify files to sort')
312     parser.add_option('-q', '--quiet', action='store_true',
313                       help='Do not write status messages')
314     # This is for evaluating different options; can be removed from the final
315     # version.
316     parser.add_option('-s', '--style', type='choice', default='pub-priv',
317                       choices=('single-group', 'pub-priv', 'pub-local'),
318                       help='Style for Gromacs includes')
319     parser.add_option('--absolute', action='store_true',
320                       help='Write all include paths relative to src/')
321     options, args = parser.parse_args()
322
323     filelist = args
324     if options.files:
325         if options.files == '-':
326             lines = sys.stdin.readlines()
327         else:
328             with open(options.files, 'r') as fp:
329                 lines = fp.readlines()
330         filelist.extend([x.strip() for x in lines])
331
332     reporter = Reporter(quiet=True)
333
334     if not options.quiet:
335         sys.stderr.write('Scanning source tree...\n')
336     tree = GromacsTree(options.source_root, options.build_root, reporter)
337     tree.load_installed_file_list()
338     files = []
339     for filename in filelist:
340         fileobj = tree.get_file(os.path.abspath(filename))
341         if not fileobj:
342             sys.stderr.write('warning: ignoring unknown file {0}\n'.format(filename))
343             continue
344         files.append(fileobj)
345     if not options.quiet:
346         sys.stderr.write('Reading source files...\n')
347     tree.scan_files(only_files=files, keep_contents=True)
348     extfiles = set(files)
349     for fileobj in files:
350         for included_file in fileobj.get_includes():
351             other_file = included_file.get_file()
352             if other_file:
353                 extfiles.add(other_file)
354     if not options.quiet:
355         sys.stderr.write('Reading Doxygen XML files...\n')
356     tree.load_xml(only_files=extfiles)
357
358     if not options.quiet:
359         sys.stderr.write('Sorting includes...\n')
360
361     sorter = IncludeSorter(GroupedSorter(options.style, options.absolute), options.quiet)
362
363     for fileobj in files:
364         sorter.sort_includes(fileobj)
365
366 if __name__ == '__main__':
367     main()