diff --git a/ci/travis/git-clang-format b/ci/travis/git-clang-format index 116635ab7..727bea8f9 100755 --- a/ci/travis/git-clang-format +++ b/ci/travis/git-clang-format @@ -1,11 +1,12 @@ #!/usr/bin/env python +# This file is updated from +# https://github.com/llvm-mirror/clang/blob/master/tools/clang-format/git-clang-format. # #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# # -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # #===------------------------------------------------------------------------===# @@ -20,9 +21,10 @@ clang-format on the changes in current files or a specific commit. For further details, run: git clang-format -h -Requires Python 2.7 +Requires Python 2.7 or Python 3 """ +from __future__ import absolute_import, division, print_function import argparse import collections import contextlib @@ -32,12 +34,15 @@ import re import subprocess import sys -usage = 'git clang-format [OPTIONS] [] [--] [...]' +usage = 'git clang-format [OPTIONS] [] [] [--] [...]' desc = ''' -Run clang-format on all lines that differ between the working directory -and , which defaults to HEAD. Changes are only applied to the working -directory. +If zero or one commits are given, run clang-format on all lines that differ +between the working directory and , which defaults to HEAD. Changes are +only applied to the working directory. + +If two commits are given (requires --diff), run clang-format on all lines in the +second that differ from the first . The following git-config settings set the default of the corresponding option: clangFormat.binary @@ -75,8 +80,10 @@ def main(): 'm', # ObjC 'mm', # ObjC++ 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ + 'cu', # CUDA # Other languages that clang-format supports 'proto', 'protodevel', # Protocol Buffers + 'java', # Java 'js', # JavaScript 'ts', # TypeScript ]) @@ -97,9 +104,10 @@ def main(): default_extensions), help=('comma-separated list of file extensions to format, ' 'excluding the period and case-insensitive')), - p.add_argument('--exclude', help='Exclude files matching this regex.') p.add_argument('-f', '--force', action='store_true', help='allow changes to unstaged files') + # This option is added by ray. + p.add_argument('--exclude', help='Exclude files matching this regex.') p.add_argument('-p', '--patch', action='store_true', help='select hunks interactively') p.add_argument('-q', '--quiet', action='count', default=0, @@ -121,50 +129,68 @@ def main(): opts.verbose -= opts.quiet del opts.quiet - commit, files = interpret_args(opts.args, dash_dash, opts.commit) - changed_lines = compute_diff_and_extract_lines(commit, files) - if opts.verbose >= 1: - ignored_files = set(changed_lines) - filter_by_extension(changed_lines, opts.extensions.lower().split(',')) + commits, files = interpret_args(opts.args, dash_dash, opts.commit) + if len(commits) > 1: + if not opts.diff: + die('--diff is required when two commits are given') + else: + if len(commits) > 2: + die('at most two commits allowed; %d given' % len(commits)) + changed_lines = compute_diff_and_extract_lines(commits, files) + # The following code block is added by ray to exclude some files. + removed_files = [] if opts.exclude: for filename in changed_lines.keys(): if re.match(opts.exclude, filename): - del changed_lines[filename] + removed_files.append(filename) + for filename in removed_files: + del changed_lines[filename] + + if opts.verbose >= 1: + ignored_files = set(changed_lines) + filter_by_extension(changed_lines, opts.extensions.lower().split(',')) if opts.verbose >= 1: ignored_files.difference_update(changed_lines) if ignored_files: - print 'Ignoring changes in the following files:' + print('Ignoring changes in the following files (wrong extension):') for filename in ignored_files: - print ' ', filename + print(' %s' % filename) if changed_lines: - print 'Running clang-format on the following files:' + print('Running clang-format on the following files:') for filename in changed_lines: - print ' ', filename + print(' %s' % filename) if not changed_lines: - print 'no modified files to format' + print('no modified files to format') return # The computed diff outputs absolute paths, so we must cd before accessing # those files. cd_to_toplevel() - old_tree = create_tree_from_workdir(changed_lines) - new_tree = run_clang_format_and_save_to_tree(changed_lines, - binary=opts.binary, - style=opts.style) + if len(commits) > 1: + old_tree = commits[1] + new_tree = run_clang_format_and_save_to_tree(changed_lines, + revision=commits[1], + binary=opts.binary, + style=opts.style) + else: + old_tree = create_tree_from_workdir(changed_lines) + new_tree = run_clang_format_and_save_to_tree(changed_lines, + binary=opts.binary, + style=opts.style) if opts.verbose >= 1: - print 'old tree:', old_tree - print 'new tree:', new_tree + print('old tree: %s' % old_tree) + print('new tree: %s' % new_tree) if old_tree == new_tree: if opts.verbose >= 0: - print 'clang-format did not modify any files' + print('clang-format did not modify any files') elif opts.diff: print_diff(old_tree, new_tree) else: changed_files = apply_changes(old_tree, new_tree, force=opts.force, patch_mode=opts.patch) if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: - print 'changed files:' + print('changed files:') for filename in changed_files: - print ' ', filename + print(' %s' % filename) def load_git_config(non_string_options=None): @@ -186,40 +212,41 @@ def load_git_config(non_string_options=None): def interpret_args(args, dash_dash, default_commit): - """Interpret `args` as "[commit] [--] [files...]" and return (commit, files). + """Interpret `args` as "[commits] [--] [files]" and return (commits, files). It is assumed that "--" and everything that follows has been removed from args and placed in `dash_dash`. - If "--" is present (i.e., `dash_dash` is non-empty), the argument to its - left (if present) is taken as commit. Otherwise, the first argument is - checked if it is a commit or a file. If commit is not given, - `default_commit` is used.""" + If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its + left (if present) are taken as commits. Otherwise, the arguments are checked + from left to right if they are commits or files. If commits are not given, + a list with `default_commit` is used.""" if dash_dash: if len(args) == 0: - commit = default_commit - elif len(args) > 1: - die('at most one commit allowed; %d given' % len(args)) + commits = [default_commit] else: - commit = args[0] - object_type = get_object_type(commit) - if object_type not in ('commit', 'tag'): - if object_type is None: - die("'%s' is not a commit" % commit) - else: - die("'%s' is a %s, but a commit was expected" % (commit, object_type)) + commits = args + for commit in commits: + object_type = get_object_type(commit) + if object_type not in ('commit', 'tag'): + if object_type is None: + die("'%s' is not a commit" % commit) + else: + die("'%s' is a %s, but a commit was expected" % (commit, object_type)) files = dash_dash[1:] elif args: - if disambiguate_revision(args[0]): - commit = args[0] - files = args[1:] - else: - commit = default_commit - files = args + commits = [] + while args: + if not disambiguate_revision(args[0]): + break + commits.append(args.pop(0)) + if not commits: + commits = [default_commit] + files = args else: - commit = default_commit + commits = [default_commit] files = [] - return commit, files + return commits, files def disambiguate_revision(value): @@ -244,12 +271,12 @@ def get_object_type(value): stdout, stderr = p.communicate() if p.returncode != 0: return None - return stdout.strip() + return convert_string(stdout.strip()) -def compute_diff_and_extract_lines(commit, files): +def compute_diff_and_extract_lines(commits, files): """Calls compute_diff() followed by extract_lines().""" - diff_process = compute_diff(commit, files) + diff_process = compute_diff(commits, files) changed_lines = extract_lines(diff_process.stdout) diff_process.stdout.close() diff_process.wait() @@ -259,13 +286,17 @@ def compute_diff_and_extract_lines(commit, files): return changed_lines -def compute_diff(commit, files): - """Return a subprocess object producing the diff from `commit`. +def compute_diff(commits, files): + """Return a subprocess object producing the diff from `commits`. The return value's `stdin` file object will produce a patch with the - differences between the working directory and `commit`, filtered on `files` - (if non-empty). Zero context lines are used in the patch.""" - cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] + differences between the working directory and the first commit if a single + one was specified, or the difference between both specified commits, filtered + on `files` (if non-empty). Zero context lines are used in the patch.""" + git_tool = 'diff-index' + if len(commits) > 1: + git_tool = 'diff-tree' + cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--'] cmd.extend(files) p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) p.stdin.close() @@ -283,6 +314,7 @@ def extract_lines(patch_file): list of line `Range`s.""" matches = {} for line in patch_file: + line = convert_string(line) match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) if match: filename = match.group(1).rstrip('\r\n') @@ -303,8 +335,10 @@ def filter_by_extension(dictionary, allowed_extensions): `allowed_extensions` must be a collection of lowercase file extensions, excluding the period.""" allowed_extensions = frozenset(allowed_extensions) - for filename in dictionary.keys(): + for filename in list(dictionary.keys()): base_ext = filename.rsplit('.', 1) + if len(base_ext) == 1 and '' in allowed_extensions: + continue if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: del dictionary[filename] @@ -322,15 +356,34 @@ def create_tree_from_workdir(filenames): return create_tree(filenames, '--stdin') -def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format', - style=None): +def run_clang_format_and_save_to_tree(changed_lines, revision=None, + binary='clang-format', style=None): """Run clang-format on each file and save the result to a git tree. Returns the object ID (SHA-1) of the created tree.""" + def iteritems(container): + try: + return container.iteritems() # Python 2 + except AttributeError: + return container.items() # Python 3 def index_info_generator(): - for filename, line_ranges in changed_lines.iteritems(): - mode = oct(os.stat(filename).st_mode) - blob_id = clang_format_to_blob(filename, line_ranges, binary=binary, + for filename, line_ranges in iteritems(changed_lines): + if revision: + git_metadata_cmd = ['git', 'ls-tree', + '%s:%s' % (revision, os.path.dirname(filename)), + os.path.basename(filename)] + git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + stdout = git_metadata.communicate()[0] + mode = oct(int(stdout.split()[0], 8)) + else: + mode = oct(os.stat(filename).st_mode) + # Adjust python3 octal format so that it matches what git expects + if mode.startswith('0o'): + mode = '0' + mode[2:] + blob_id = clang_format_to_blob(filename, line_ranges, + revision=revision, + binary=binary, style=style) yield '%s %s\t%s' % (mode, blob_id, filename) return create_tree(index_info_generator(), '--index-info') @@ -348,7 +401,7 @@ def create_tree(input_lines, mode): with temporary_index_file(): p = subprocess.Popen(cmd, stdin=subprocess.PIPE) for line in input_lines: - p.stdin.write('%s\0' % line) + p.stdin.write(to_bytes('%s\0' % line)) p.stdin.close() if p.wait() != 0: die('`%s` failed' % ' '.join(cmd)) @@ -356,26 +409,42 @@ def create_tree(input_lines, mode): return tree_id -def clang_format_to_blob(filename, line_ranges, binary='clang-format', - style=None): +def clang_format_to_blob(filename, line_ranges, revision=None, + binary='clang-format', style=None): """Run clang-format on the given file and save the result to a git blob. + Runs on the file in `revision` if not None, or on the file in the working + directory if `revision` is None. + Returns the object ID (SHA-1) of the created blob.""" - clang_format_cmd = [binary, filename] + clang_format_cmd = [binary] if style: clang_format_cmd.extend(['-style='+style]) clang_format_cmd.extend([ '-lines=%s:%s' % (start_line, start_line+line_count-1) for start_line, line_count in line_ranges]) + if revision: + clang_format_cmd.extend(['-assume-filename='+filename]) + git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)] + git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + git_show.stdin.close() + clang_format_stdin = git_show.stdout + else: + clang_format_cmd.extend([filename]) + git_show = None + clang_format_stdin = subprocess.PIPE try: - clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE, + clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin, stdout=subprocess.PIPE) + if clang_format_stdin == subprocess.PIPE: + clang_format_stdin = clang_format.stdin except OSError as e: if e.errno == errno.ENOENT: die('cannot find executable "%s"' % binary) else: raise - clang_format.stdin.close() + clang_format_stdin.close() hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin'] hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout, stdout=subprocess.PIPE) @@ -385,7 +454,9 @@ def clang_format_to_blob(filename, line_ranges, binary='clang-format', die('`%s` failed' % ' '.join(hash_object_cmd)) if clang_format.wait() != 0: die('`%s` failed' % ' '.join(clang_format_cmd)) - return stdout.rstrip('\r\n') + if git_show and git_show.wait() != 0: + die('`%s` failed' % ' '.join(git_show_cmd)) + return convert_string(stdout).rstrip('\r\n') @contextlib.contextmanager @@ -423,7 +494,12 @@ def print_diff(old_tree, new_tree): # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output # is expected to be viewed by the user, and only the former does nice things # like color and pagination. - subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) + # + # We also only print modified files since `new_tree` only contains the files + # that were modified, so unmodified files would show as deleted without the + # filter. + subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree, + '--']) def apply_changes(old_tree, new_tree, force=False, patch_mode=False): @@ -431,15 +507,16 @@ def apply_changes(old_tree, new_tree, force=False, patch_mode=False): Bails if there are local changes in those files and not `force`. If `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" - changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree, + changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z', + '--name-only', old_tree, new_tree).rstrip('\0').split('\0') if not force: unstaged_files = run('git', 'diff-files', '--name-status', *changed_files) if unstaged_files: - print >>sys.stderr, ('The following files would be modified but ' - 'have unstaged changes:') - print >>sys.stderr, unstaged_files - print >>sys.stderr, 'Please commit, stage, or stash them first.' + print('The following files would be modified but ' + 'have unstaged changes:', file=sys.stderr) + print(unstaged_files, file=sys.stderr) + print('Please commit, stage, or stash them first.', file=sys.stderr) sys.exit(2) if patch_mode: # In patch mode, we could just as well create an index from the new tree @@ -466,25 +543,51 @@ def run(*args, **kwargs): p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate(input=stdin) + + stdout = convert_string(stdout) + stderr = convert_string(stderr) + if p.returncode == 0: if stderr: if verbose: - print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) - print >>sys.stderr, stderr.rstrip() + print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr) + print(stderr.rstrip(), file=sys.stderr) if strip: stdout = stdout.rstrip('\r\n') return stdout if verbose: - print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode) + print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr) if stderr: - print >>sys.stderr, stderr.rstrip() + print(stderr.rstrip(), file=sys.stderr) sys.exit(2) def die(message): - print >>sys.stderr, 'error:', message + print('error:', message, file=sys.stderr) sys.exit(2) +def to_bytes(str_input): + # Encode to UTF-8 to get binary data. + if isinstance(str_input, bytes): + return str_input + return str_input.encode('utf-8') + + +def to_string(bytes_input): + if isinstance(bytes_input, str): + return bytes_input + return bytes_input.encode('utf-8') + + +def convert_string(bytes_input): + try: + return to_string(bytes_input.decode('utf-8')) + except AttributeError: # 'str' object has no attribute 'decode'. + return str(bytes_input) + except UnicodeError: + return str(bytes_input) + if __name__ == '__main__': main() +