admin/copyright.py

   1 #!/usr/bin/python
   2 #
   3 # This file is part of the GROMACS molecular simulation package.
   4 #
   5 # Copyright (c) 2013, by the GROMACS development team, led by
   6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7 # and including many others, as listed in the AUTHORS file in the
   8 # top-level source directory and at http://www.gromacs.org.
   9 #
  10 # GROMACS is free software; you can redistribute it and/or
  11 # modify it under the terms of the GNU Lesser General Public License
  12 # as published by the Free Software Foundation; either version 2.1
  13 # of the License, or (at your option) any later version.
  14 #
  15 # GROMACS is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 # Lesser General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public
  21 # License along with GROMACS; if not, see
  22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24 #
  25 # If you want to redistribute modifications to GROMACS, please
  26 # consider that scientific software is very special. Version
  27 # control is crucial - bugs must be traceable. We will be happy to
  28 # consider code for inclusion in the official distribution, but
  29 # derived work must not be called official GROMACS. Details are found
  30 # in the README & COPYING files - if they are missing, get the
  31 # official version at http://www.gromacs.org.
  32 #
  33 # To help us fund GROMACS development, we humbly ask that you cite
  34 # the research papers on the package. Check out http://www.gromacs.org.
  35
  36 import datetime
  37 import os.path
  38 import re
  39 import sys
  40
  41 from optparse import OptionParser
  42
  43 class CopyrightState(object):
  44
  45     """Information about an existing (or non-existing) copyright header."""
  46
  47     def __init__(self, has_copyright, is_correct, is_newstyle, years, other_copyrights):
  48         self.has_copyright = has_copyright
  49         self.is_correct = is_correct
  50         self.is_newstyle = is_newstyle
  51         self.years = years
  52         self.other_copyrights = other_copyrights
  53
  54 class CopyrightChecker(object):
  55
  56     """Logic for analyzing existing copyright headers and generating new ones."""
  57
  58     _header = ["", "This file is part of the GROMACS molecular simulation package.", ""]
  59     _copyright = "Copyright (c) {0}, by the GROMACS development team, led by"
  60     _footer = """
  61 Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  62 and including many others, as listed in the AUTHORS file in the
  63 top-level source directory and at http://www.gromacs.org.
  64
  65 GROMACS is free software; you can redistribute it and/or
  66 modify it under the terms of the GNU Lesser General Public License
  67 as published by the Free Software Foundation; either version 2.1
  68 of the License, or (at your option) any later version.
  69
  70 GROMACS is distributed in the hope that it will be useful,
  71 but WITHOUT ANY WARRANTY; without even the implied warranty of
  72 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  73 Lesser General Public License for more details.
  74
  75 You should have received a copy of the GNU Lesser General Public
  76 License along with GROMACS; if not, see
  77 http://www.gnu.org/licenses, or write to the Free Software Foundation,
  78 Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  79
  80 If you want to redistribute modifications to GROMACS, please
  81 consider that scientific software is very special. Version
  82 control is crucial - bugs must be traceable. We will be happy to
  83 consider code for inclusion in the official distribution, but
  84 derived work must not be called official GROMACS. Details are found
  85 in the README & COPYING files - if they are missing, get the
  86 official version at http://www.gromacs.org.
  87
  88 To help us fund GROMACS development, we humbly ask that you cite
  89 the research papers on the package. Check out http://www.gromacs.org.
  90 """.strip().splitlines()
  91
  92     def check_copyright(self, comment_block):
  93         """Analyze existing copyright header for correctness and extract information."""
  94         copyright_re = r'Copyright \(c\) (([0-9]{4}[,-])*[0-9]{4}),? by the GROMACS development team,'
  95         has_copyright = False
  96         is_newstyle = True
  97         is_correct = True
  98         next_header_line = 0
  99         next_footer_line = 0
 100         append_next_line_to_other_copyrights = False
 101         existing_years = ''
 102         other_copyrights = []
 103         for line in comment_block:
 104             if append_next_line_to_other_copyrights:
 105                 other_copyrights[-1] += ' ' + line
 106                 append_next_line_to_other_copyrights = False
 107                 continue
 108             if 'Copyright' in line:
 109                 has_copyright = True
 110                 match = re.match(copyright_re, line)
 111                 if match:
 112                     existing_years = match.group(1)
 113                     new_line = self._copyright.format(existing_years)
 114                     if line != new_line:
 115                         is_correct = False
 116                 else:
 117                     other_copyrights.append(line[line.find('Copyright'):])
 118                     if not line.startswith('Copyright'):
 119                         append_next_line_to_other_copyrights = True
 120                 if next_header_line != -1 or next_footer_line != 0:
 121                     is_correct = False
 122                 continue
 123             if line.startswith('Written by the Gromacs development team'):
 124                 has_copyright = True
 125             if next_header_line >= 0:
 126                 if line == self._header[next_header_line]:
 127                     next_header_line += 1
 128                     if next_header_line >= len(self._header):
 129                         next_header_line = -1
 130                 else:
 131                     is_correct = False
 132                     is_newstyle = False
 133             elif next_footer_line >= 0:
 134                 if line == self._footer[next_footer_line]:
 135                     next_footer_line += 1
 136                     if next_footer_line >= len(self._footer):
 137                         next_footer_line = -1
 138                 else:
 139                     is_correct = False
 140             else:
 141                 is_correct = False
 142         if next_header_line != -1 or next_footer_line != -1:
 143             is_correct = False
 144
 145         return CopyrightState(has_copyright, is_correct, is_newstyle, existing_years, other_copyrights)
 146
 147     def process_copyright(self, state, options, current_years, reporter):
 148         """Determine whether a copyrigth header needs to be updated and report issues."""
 149         need_update = False
 150
 151         if state.years:
 152             if options.replace_years:
 153                 if state.years != current_years:
 154                     need_update = True
 155                     reporter.report('copyright years replaced')
 156                 new_years = current_years
 157             else:
 158                 new_years = state.years
 159                 if not new_years.endswith(current_years):
 160                     if options.update_year:
 161                         need_update = True
 162                         new_years += ',' + current_years
 163                     if options.check or not need_update:
 164                         reporter.report('copyright year outdated')
 165                     else:
 166                         reporter.report('copyright year added')
 167         else:
 168             new_years = current_years
 169
 170         if not state.has_copyright:
 171             if options.add_missing:
 172                 need_update = True
 173             if options.check or not need_update:
 174                 reporter.report('copyright header missing')
 175             elif options.add_missing:
 176                 reporter.report('copyright header added')
 177         else:
 178             if not state.is_newstyle:
 179                 if options.replace_header:
 180                     need_update = True
 181                 if options.check or not need_update:
 182                     reporter.report('copyright header incorrect')
 183                 else:
 184                     reporter.report('copyright header replaced')
 185             elif not state.is_correct:
 186                 if options.update_header:
 187                     need_update = True
 188                 if options.check or not need_update:
 189                     reporter.report('copyright header outdated')
 190                 else:
 191                     reporter.report('copyright header updated')
 192
 193         return need_update, new_years
 194
 195     def get_copyright_text(self, years, other_copyrights):
 196         """Construct a new copyright header."""
 197         output = []
 198         output.extend(self._header)
 199         if other_copyrights:
 200             for line in other_copyrights:
 201                 outline = line.rstrip()
 202                 if outline.endswith(','):
 203                     outline = outline[:-1]
 204                 if not outline.endswith('.'):
 205                     outline += '.'
 206                 output.append(outline)
 207         output.append(self._copyright.format(years))
 208         output.extend(self._footer)
 209         return output
 210
 211 class Reporter(object):
 212
 213     """Wrapper for reporting issues in a file."""
 214
 215     def __init__(self, reportfile, filename):
 216         self._reportfile = reportfile
 217         self._filename = filename
 218
 219     def report(self, text):
 220         self._reportfile.write(self._filename + ': ' + text + '\n');
 221
 222 class CommentHandlerC(object):
 223
 224     """Handler for extracting and creating C-style comments."""
 225
 226     def extract_first_comment_block(self, content_lines):
 227         if not content_lines or not content_lines[0].startswith('/*'):
 228             return ([], 0)
 229         comment_block = [content_lines[0][2:].strip()]
 230         line_index = 1
 231         while line_index < len(content_lines):
 232             line = content_lines[line_index]
 233             if '*/' in content_lines[line_index]:
 234                 break
 235             comment_block.append(line.lstrip('* ').rstrip())
 236             line_index += 1
 237         return (comment_block, line_index + 1)
 238
 239     def create_comment_block(self, lines):
 240         output = []
 241         output.append(('/* ' + lines[0]).rstrip())
 242         output.extend([(' * ' + x).rstrip() for x in lines[1:]])
 243         output.append(' */')
 244         return output
 245
 246 class CommentHandlerSh(object):
 247
 248     """Handler for extracting and creating sh-style comments."""
 249
 250     def extract_first_comment_block(self, content_lines):
 251         if not content_lines or not content_lines[0].startswith('#'):
 252             return ([], 0)
 253         comment_block = []
 254         line_index = 0
 255         while line_index < len(content_lines):
 256             line = content_lines[line_index]
 257             if not line.startswith('#'):
 258                 break
 259             comment_block.append(line.lstrip('# ').rstrip())
 260             line_index += 1
 261             if line == '# the research papers on the package. Check out http://www.gromacs.org.':
 262                 break
 263         while line_index < len(content_lines):
 264             line = content_lines[line_index].rstrip()
 265             if len(line) > 0 and line != '#':
 266                 break
 267             line_index += 1
 268         return (comment_block, line_index)
 269
 270     def create_comment_block(self, lines):
 271         output = []
 272         output.extend([('# ' + x).rstrip() for x in lines])
 273         output.append('')
 274         return output
 275
 276 comment_handlers = {'c': CommentHandlerC(), 'sh': CommentHandlerSh()}
 277
 278 def select_comment_handler(override, filename):
 279     """Select comment handler for a file based on file name and input options."""
 280     filetype = override
 281     if not filetype and filename != '-':
 282         basename = os.path.basename(filename)
 283         root, ext = os.path.splitext(basename)
 284         if ext == '.cmakein':
 285             dummy, ext2 = os.path.splitext(root)
 286             if ext2:
 287                 ext = ext2
 288         if ext in ('.c', '.cpp', '.h', '.y', '.l', '.pre'):
 289             filetype = 'c'
 290         elif basename in ('CMakeLists.txt', 'GMXRC', 'git-pre-commit') or \
 291                 ext in ('.cmake', '.cmakein', '.py', '.sh', '.bash', '.csh', '.zsh'):
 292             filetype = 'sh'
 293     if filetype in comment_handlers:
 294         return comment_handlers[filetype]
 295     if filetype:
 296         sys.stderr.write("Unsupported input format: {0}\n".format(filetype))
 297     elif filename != '-':
 298         sys.stderr.write("Unsupported input format: {0}\n".format(filename))
 299     else:
 300         sys.stderr.write("No file name or file type provided.\n")
 301     sys.exit(1)
 302
 303 def create_copyright_header(years, other_copyrights=None, language='c'):
 304     if language not in comment_handlers:
 305         sys.strerr.write("Unsupported language: {0}\n".format(language))
 306         sys.exit(1)
 307     copyright_checker = CopyrightChecker()
 308     comment_handler = comment_handlers[language]
 309     copyright_lines = copyright_checker.get_copyright_text(years, other_copyrights)
 310     comment_lines = comment_handler.create_comment_block(copyright_lines)
 311     return '\n'.join(comment_lines) + '\n'
 312
 313 def process_options():
 314     """Process input options."""
 315     parser = OptionParser()
 316     parser.add_option('-l', '--lang',
 317                       help='Comment type to use (c or sh)')
 318     parser.add_option('-y', '--years',
 319                       help='Comma-separated list of years')
 320     parser.add_option('-F', '--files',
 321                       help='File to read list of files from')
 322     parser.add_option('--check', action='store_true',
 323                       help='Do not modify the files, only check the copyright (default action). ' +
 324                            'If specified together with --update, do the modifications ' +
 325                            'but produce output as if only --check was provided.')
 326     parser.add_option('--update-year', action='store_true',
 327                       help='Update the copyright year if outdated')
 328     parser.add_option('--replace-years', action='store_true',
 329                       help='Replace the copyright years with those given with --years')
 330     parser.add_option('--update-header', action='store_true',
 331                       help='Update the copyright header if outdated')
 332     parser.add_option('--replace-header', action='store_true',
 333                       help='Replace any copyright header with the current one')
 334     parser.add_option('--add-missing', action='store_true',
 335                       help='Add missing copyright headers')
 336     options, args = parser.parse_args()
 337
 338     filenames = args
 339     if options.files:
 340         with open(options.files, 'r') as filelist:
 341             filenames = [x.strip() for x in filelist.read().splitlines()]
 342     elif not filenames:
 343         filenames = ['-']
 344
 345     # Default is --check if nothing provided.
 346     if not options.check and not options.update_year and \
 347             not options.update_header and not options.replace_header and \
 348             not options.add_missing:
 349         options.check = True
 350
 351     return options, filenames
 352
 353 def main():
 354     """Do processing as a stand-alone script."""
 355     options, filenames = process_options()
 356     years = options.years
 357     if not years:
 358         years = str(datetime.date.today().year)
 359     if years.endswith(','):
 360         years = years[:-1]
 361
 362     checker = CopyrightChecker()
 363
 364     # Process each input file in turn.
 365     for filename in filenames:
 366         comment_handler = select_comment_handler(options.lang, filename)
 367
 368         # Read the input file.  We are doing an in-place operation, so can't
 369         # operate in pass-through mode.
 370         if filename == '-':
 371             contents = sys.stdin.read().splitlines()
 372             reporter = Reporter(sys.stderr, '<stdin>')
 373         else:
 374             with open(filename, 'r') as inputfile:
 375                 contents = inputfile.read().splitlines()
 376             reporter = Reporter(sys.stdout, filename)
 377
 378         output = []
 379         # Keep lines that must be at the beginning of the file and skip them in
 380         # the check.
 381         if contents and (contents[0].startswith('#!/') or \
 382                 contents[0].startswith('%code requires') or \
 383                 contents[0].startswith('/* #if')):
 384             output.append(contents[0])
 385             contents = contents[1:]
 386         # Remove and skip empty lines at the beginning.
 387         while contents and len(contents[0]) == 0:
 388             contents = contents[1:]
 389
 390         # Analyze the first comment block in the file.
 391         comment_block, line_count = comment_handler.extract_first_comment_block(contents)
 392         state = checker.check_copyright(comment_block)
 393         need_update, file_years = checker.process_copyright(state, options, years, reporter)
 394
 395         if need_update:
 396             # Remove the original comment if it was a copyright comment.
 397             if state.has_copyright:
 398                 contents = contents[line_count:]
 399             new_block = checker.get_copyright_text(file_years, state.other_copyrights)
 400             output.extend(comment_handler.create_comment_block(new_block))
 401
 402         # Write the output file if required.
 403         if need_update or filename == '-':
 404             # Append the rest of the input file as it was.
 405             output.extend(contents)
 406             output = '\n'.join(output) + '\n'
 407             if filename == '-':
 408                 sys.stdout.write(output)
 409             else:
 410                 with open(filename, 'w') as outputfile:
 411                     outputfile.write(output)
 412
 413 if __name__ == "__main__":
 414     main()