Răsfoiți Sursa

Rewrite lint.sh as check_lint.py (#3161)

* Rewrite lint.sh as check_lint.py

This makes several beneficial changes:

  * Runs in parallel: lint all now takes 5.5 seconds, down from 25.
  * Supports linting specific files, passed on the command-line.
  * Defaults to linting files changed since master, pass --all to lint
    everything.
  * Infers header language based on related files.
  * Lays the groundwork for linting more than just C++.
  * Adds support for linting python.

* Fix lint errors

* Use check_lint.py in check.sh

* Remove lint.sh

* Python lint configuration

* Enable python linting in travis
Gil 6 ani în urmă
părinte
comite
3e3bcb41f7

+ 3 - 0
.gitignore

@@ -79,6 +79,9 @@ Ninja
 /cmake-build-debug
 /cmake-build-release
 
+# Python
+*.pyc
+
 # Visual Studio
 /.vs
 

+ 1 - 0
.travis.yml

@@ -16,6 +16,7 @@ jobs:
       before_install:
         - brew install clang-format
         - brew install swiftformat
+        - pip install flake8
       script:
         - ./scripts/check.sh --test-only
 

+ 8 - 8
Firestore/Example/Benchmarks/FSTLevelDBBenchmarkTests.mm

@@ -16,9 +16,8 @@
 
 #import <Foundation/Foundation.h>
 #import <XCTest/XCTest.h>
-#include <cstdint>
 
-#include "benchmark/benchmark.h"
+#include <cstdint>
 
 #import "Firestore/Source/Local/FSTLevelDB.h"
 #import "Firestore/Source/Local/FSTLocalSerializer.h"
@@ -29,6 +28,7 @@
 #include "Firestore/core/src/firebase/firestore/model/document_key.h"
 #include "Firestore/core/src/firebase/firestore/model/types.h"
 #include "Firestore/core/src/firebase/firestore/util/string_format.h"
+#include "benchmark/benchmark.h"
 
 NS_ASSUME_NONNULL_BEGIN
 
@@ -104,7 +104,7 @@ class LevelDBFixture : public benchmark::Fixture {
       auto docKey = DocumentKey::FromPathString(StringFormat("docs/doc_%i", i));
       std::string docKeyString = LevelDbRemoteDocumentKey::Key(docKey);
       txn.Put(docKeyString, DocumentData());
-      WriteIndex(txn, docKey);
+      WriteIndex(&txn, docKey);
     }
     txn.Commit();
     // Force a write to disk to simulate startup situation
@@ -112,11 +112,11 @@ class LevelDBFixture : public benchmark::Fixture {
   }
 
  protected:
-  void WriteIndex(LevelDbTransaction &txn, const DocumentKey &docKey) {
+  void WriteIndex(LevelDbTransaction *txn, const DocumentKey &docKey) {
     // Arbitrary target ID
     TargetId targetID = 1;
-    txn.Put(LevelDbDocumentTargetKey::Key(docKey, targetID), emptyBuffer_);
-    txn.Put(LevelDbTargetDocumentKey::Key(targetID, docKey), emptyBuffer_);
+    txn->Put(LevelDbDocumentTargetKey::Key(docKey, targetID), emptyBuffer_);
+    txn->Put(LevelDbTargetDocumentKey::Key(targetID, docKey), emptyBuffer_);
   }
 
   FSTLevelDB *db_;
@@ -128,7 +128,7 @@ class LevelDBFixture : public benchmark::Fixture {
 // Write a couple large values (documents)
 // In each test, either overwrite index entries and documents, or just documents
 
-BENCHMARK_DEFINE_F(LevelDBFixture, RemoteEvent)(benchmark::State &state) {
+BENCHMARK_DEFINE_F(LevelDBFixture, RemoteEvent)(benchmark::State &state) {  // NOLINT
   bool writeIndexes = static_cast<bool>(state.range(0));
   int64_t documentSize = state.range(1);
   int64_t docsToUpdate = state.range(2);
@@ -137,7 +137,7 @@ BENCHMARK_DEFINE_F(LevelDBFixture, RemoteEvent)(benchmark::State &state) {
     LevelDbTransaction txn(db_.ptr, "benchmark");
     for (int i = 0; i < docsToUpdate; i++) {
       auto docKey = DocumentKey::FromPathString(StringFormat("docs/doc_%i", i));
-      if (writeIndexes) WriteIndex(txn, docKey);
+      if (writeIndexes) WriteIndex(&txn, docKey);
       std::string docKeyString = LevelDbRemoteDocumentKey::Key(docKey);
       txn.Put(docKeyString, documentUpdate);
     }

+ 10 - 6
scripts/binary_to_array.py

@@ -50,19 +50,22 @@ import os
 
 arg_parser = argparse.ArgumentParser()
 
-arg_parser.add_argument("input", help="Input file containing binary data to embed")
+arg_parser.add_argument("input",
+                        help="Input file containing binary data to embed")
 arg_parser.add_argument("--output_source",
-                    help="Output source file, defining the array data.")
+                        help="Output source file, defining the array data.")
 arg_parser.add_argument("--output_header",
-                    help="Output header file, declaring the array data.")
+                        help="Output header file, declaring the array data.")
 arg_parser.add_argument("--array", help="Identifier for the array.")
 arg_parser.add_argument("--array_size", help="Identifier for the array size.")
 arg_parser.add_argument("--filename", help="Override file name in code.")
-arg_parser.add_argument("--filename_identifier", help="Where to put the filename.")
+arg_parser.add_argument("--filename_identifier",
+                        help="Where to put the filename.")
 arg_parser.add_argument("--header_guard",
-                    help="Header guard to #define in the output header.")
+                        help="Header guard to #define in the output header.")
 arg_parser.add_argument("--cpp_namespace",
-                    help="C++ namespace to use. If blank, will generate a C array.")
+                        help="C++ namespace to use. "
+                             "If blank, will generate a C array.")
 
 # How many hex bytes to display in a line. Each "0x00, " takes 6 characters, so
 # a width of 12 lets us fit within 80 characters.
@@ -278,5 +281,6 @@ def main():
     src.write(source_text)
     logging.debug("Wrote source file %s", output_source)
 
+
 if __name__ == "__main__":
   main()

+ 1 - 1
scripts/check.sh

@@ -261,4 +261,4 @@ fi
 "${top_dir}/scripts/check_test_inclusion.py"
 
 # Google C++ style
-"${top_dir}/scripts/lint.sh" "${START_SHA}"
+"${top_dir}/scripts/check_lint.py" "${START_SHA}"

+ 252 - 0
scripts/check_lint.py

@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Lints source files for conformance with the style guide that applies.
+
+Currently supports linting Objective-C, Objective-C++, C++, and Python source.
+"""
+
+import argparse
+import logging
+import os
+import re
+import subprocess
+import sys
+import textwrap
+
+from lib import checker
+from lib import command_trace
+from lib import git
+from lib import source
+
+_logger = logging.getLogger('lint')
+
+
+_dry_run = False
+
+
+_CPPLINT_OBJC_FILTERS = [
+    # Objective-C uses #import and does not use header guards
+    '-build/header_guard',
+
+    # Inline definitions of Objective-C blocks confuse
+    '-readability/braces',
+
+    # C-style casts are acceptable in Objective-C++
+    '-readability/casting',
+
+    # Objective-C needs use type 'long' for interop between types like NSInteger
+    # and printf-style functions.
+    '-runtime/int',
+
+    # cpplint is generally confused by Objective-C mixing with C++.
+    #   * Objective-C method invocations in a for loop make it think its a
+    #     range-for
+    #   * Objective-C dictionary literals confuse brace spacing
+    #   * Empty category declarations ("@interface Foo ()") look like function
+    #     invocations
+    '-whitespace',
+]
+
+_CPPLINT_OBJC_OPTIONS = [
+    # cpplint normally excludes Objective-C++
+    '--extensions=h,m,mm',
+
+    # Objective-C style allows longer lines
+    '--linelength=100',
+
+    '--filter=' + ','.join(_CPPLINT_OBJC_FILTERS),
+]
+
+
+def main():
+  global _dry_run
+
+  parser = argparse.ArgumentParser(description='Lint source files.')
+  parser.add_argument('--dry-run', '-n', action='store_true',
+                      help='Show what the linter would do without doing it')
+  parser.add_argument('--all', action='store_true',
+                      help='run the linter over all known sources')
+  parser.add_argument('rev_or_files', nargs='*',
+                      help='A single revision that specifies a point in time '
+                           'from which to look for changes. Defaults to '
+                           'origin/master. Alternatively, a list of specific '
+                           'files or git pathspecs to lint.')
+  args = command_trace.parse_args(parser)
+
+  if args.dry_run:
+    _dry_run = True
+    command_trace.enable_tracing()
+
+  pool = checker.Pool()
+
+  sources = _unique(source.CC_DIRS + source.OBJC_DIRS + source.PYTHON_DIRS)
+  patterns = git.make_patterns(sources)
+
+  files = git.find_changed_or_files(args.all, args.rev_or_files, patterns)
+  check(pool, files)
+
+  pool.exit()
+
+
+def check(pool, files):
+  group = source.categorize_files(files)
+
+  for kind, files in group.kinds.items():
+    for chunk in checker.shard(files):
+      if not chunk:
+        continue
+
+      linter = _linters[kind]
+      pool.submit(linter, chunk)
+
+
+def lint_cc(files):
+  return _run_cpplint([], files)
+
+
+def lint_objc(files):
+  return _run_cpplint(_CPPLINT_OBJC_OPTIONS, files)
+
+
+def _run_cpplint(options, files):
+  scripts_dir = os.path.dirname(os.path.abspath(__file__))
+  cpplint = os.path.join(scripts_dir, 'cpplint.py')
+
+  command = [sys.executable, cpplint, '--quiet']
+  command.extend(options)
+  command.extend(files)
+
+  return _read_output(command)
+
+
+_flake8_warned = False
+
+
+def lint_py(files):
+  flake8 = which('flake8')
+  if flake8 is None:
+    global _flake8_warned
+    if not _flake8_warned:
+      _flake8_warned = True
+      _logger.warn(textwrap.dedent(
+          """
+          Could not find flake8 on the path; skipping python lint.
+          Install with:
+
+            pip install --user flake8
+          """))
+    return
+
+  command = [flake8]
+  command.extend(files)
+
+  return _read_output(command)
+
+
+def _read_output(command):
+  command_trace.log(command)
+
+  if _dry_run:
+    return checker.Result(0, '')
+
+  proc = subprocess.Popen(
+      command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+  output = proc.communicate('')[0]
+  sc = proc.wait()
+
+  return checker.Result(sc, output)
+
+
+_linters = {
+    'cc': lint_cc,
+    'objc': lint_objc,
+    'py': lint_py,
+}
+
+
+def _unique(items):
+  return list(set(items))
+
+
+def make_path():
+  """Makes a list of paths to search for binaries.
+
+  Returns:
+    A list of directories that can be sources of binaries to run. This includes
+    both the PATH environment variable and any bin directories associated with
+    python install locations.
+  """
+  # Start with the system-supplied PATH.
+  path = os.environ['PATH'].split(os.pathsep)
+
+  # In addition, add any bin directories near the lib directories in the python
+  # path. This makes it possible to find flake8 in ~/Library/Python/2.7/bin
+  # after pip install --user flake8. Also handle installations on Windows which
+  # go in %APPDATA%/Python/Scripts.
+  lib_pattern = re.compile(r'(.*)/[^/]*/site-packages')
+  for entry in sys.path:
+    entry = entry.replace(os.sep, '/')
+    m = lib_pattern.match(entry)
+    if not m:
+      continue
+
+    python_root = m.group(1).replace('/', os.sep)
+
+    for bin_basename in ('bin', 'Scripts'):
+      bin_dir = os.path.join(python_root, bin_basename)
+      if bin_dir not in path and os.path.exists(bin_dir):
+        path.append(bin_dir)
+
+  return path
+
+
+_PATH = make_path()
+
+
+def which(executable):
+  """Finds the executable with the given name.
+
+  Returns:
+    The fully qualified path to the executable or None if the executable isn't
+    found.
+  """
+  if executable.startswith('/'):
+    return executable
+
+  for executable_with_ext in _executable_names(executable):
+    for entry in _PATH:
+      joined = os.path.join(entry, executable_with_ext)
+      if os.path.isfile(joined) and os.access(joined, os.X_OK):
+        return joined
+
+  return None
+
+
+def _executable_names(executable):
+  """Yields a sequence of all possible executable names."""
+
+  if os.name == 'nt':
+    pathext = os.environ.get('PATHEXT', '').split(os.pathsep)
+    for ext in pathext:
+      yield executable + ext
+
+  else:
+    yield executable
+
+
+if __name__ == '__main__':
+  main()

+ 1 - 1
scripts/check_test_inclusion.py

@@ -80,7 +80,7 @@ def CheckProject(project_file, test_files):
   """
 
   # An dict of basename to filename
-  basenames = {os.path.basename(f) : f for f in test_files}
+  basenames = {os.path.basename(f): f for f in test_files}
 
   file_list_pattern = re.compile(r"/\* (\S+) in Sources \*/")
   with open(project_file, "r") as fd:

+ 13 - 0
scripts/lib/__init__.py

@@ -0,0 +1,13 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

+ 120 - 0
scripts/lib/checker.py

@@ -0,0 +1,120 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import math
+import multiprocessing.pool
+import sys
+import threading
+
+# Python 3 renamed Queue to queue
+try:
+  import queue
+except ImportError:
+  import Queue as queue
+
+
+_TASKS = multiprocessing.cpu_count()
+
+
+_output_lock = threading.Lock()
+
+
+def shard(items):
+  """Breaks down the given items into roughly equal sized lists.
+
+  The number of lists will be equal to the number of available processor cores.
+  """
+  if not items:
+    return []
+
+  n = int(math.ceil(len(items) / _TASKS))
+  return _chunks(items, n)
+
+
+def _chunks(items, n):
+  """Yield successive n-sized chunks from items."""
+  for i in range(0, len(items), n):
+    yield items[i:i + n]
+
+
+class Result(object):
+
+  def __init__(self, num_errors, output):
+    self.errors = num_errors
+    self.output = output
+
+  @staticmethod
+  def from_list(errors):
+    return Result(len(errors), '\n'.join(errors))
+
+
+class Pool(object):
+
+  def __init__(self):
+    # Checkers submit tasks to be run and these are dropped in the _pending
+    # queue. Workers process that queue and results are put in the _results
+    # queue. _results is drained by the thread that calls join().
+    self._pending = queue.Queue()
+    self._results = queue.Queue()
+
+    def worker():
+      while True:
+        task, args = self._pending.get()
+        result = task(*args)
+        if result is not None:
+          self._results.put(result)
+        self._pending.task_done()
+
+    for i in range(_TASKS):
+      t = threading.Thread(target=worker)
+      t.daemon = True
+      t.start()
+
+  def submit(self, task, *args):
+    """Submits a task for execution by the pool.
+
+    Args:
+      task: A callable routine that will perform the work.
+      *args: A list of arguments to pass that routine.
+    """
+    self._pending.put((task, args))
+
+  def join(self):
+    """Waits for the completion of all submitted tasks.
+
+    Returns:
+      The number of errors encountered.
+    """
+    self._pending.join()
+
+    num_errors = 0
+    while not self._results.empty():
+      result = self._results.get()
+      num_errors += result.errors
+      sys.stdout.write(result.output)
+      self._results.task_done()
+
+    self._results.join()
+    return num_errors
+
+  def exit(self):
+    """Waits for the completion of the submitted tasks and exits.
+
+    This calls join() and then exits with a 0 status code if there were no
+    errors, or 1 if there were.
+    """
+    errors = self.join()
+    sys.exit(errors > 0)

+ 79 - 0
scripts/lib/command_trace.py

@@ -0,0 +1,79 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+from lib import terminal
+
+_commands = logging.getLogger('commands')
+
+
+def log(command_args):
+  """Logs that a command has run.
+
+  Args:
+    command_args: A list of the command and its arguments.
+  """
+  if _commands.isEnabledFor(logging.DEBUG):
+    columns = terminal.columns()
+
+    text = ' '.join(command_args)
+
+    # When just passing --trace, shorten output to the width of the current
+    # window. When running extra verbose don't shorten.
+    if not logging.root.isEnabledFor(logging.INFO):
+      if len(text) >= columns:
+        text = text[0:columns - 5] + ' ...'
+
+    _commands.debug('%s', text)
+
+
+def add_arguments(parser):
+  """Adds standard arguments to the given ArgumentParser."""
+  parser.add_argument('--trace', action='store_true',
+                      help='show commands')
+  parser.add_argument('--verbose', '-v', action='count', default=0,
+                      help='run verbosely')
+
+
+def enable_tracing():
+  """Enables tracing of command execution."""
+  _commands.setLevel(logging.DEBUG)
+
+
+def setup(args):
+  """Prepares for tracing/verbosity based on the given parsed arguments."""
+  level = logging.WARN
+
+  if args.trace:
+    enable_tracing()
+
+  if args.verbose >= 2:
+    level = logging.DEBUG
+  elif args.verbose >= 1:
+    level = logging.INFO
+
+  logging.basicConfig(format='%(message)s', level=level)
+
+
+def parse_args(parser):
+  """Shortcut that adds arguments, parses, and runs setup.
+
+  Returns:
+    The args result from parser.parse_args().
+  """
+  add_arguments(parser)
+  args = parser.parse_args()
+  setup(args)
+  return args

+ 127 - 0
scripts/lib/git.py

@@ -0,0 +1,127 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from lib import source
+import command_trace
+import os
+import subprocess
+
+
+def find_changed_or_files(all, rev_or_files, patterns):
+  """Finds files.
+
+  Args:
+    all: Force finding all files.
+    rev_or_files: A single revision, a list of files, or empty.
+    patterns: A list of git matching patterns
+
+  Returns:
+    Files that match.
+
+    If rev_or_files is a single revision, the result is all files that match
+    the patterns that have changed since the revision.
+
+    If rev_or_files is a list of files, the result is all the files that match
+    that list of files. The files can be patterns.
+
+    If rev_or_files is empty, the result is all the files that match patterns.
+  """
+  if all:
+    return find_files(patterns)
+
+  if not rev_or_files:
+    return find_changed('origin/master', patterns)
+
+  if len(rev_or_files) == 1 and is_revision(rev_or_files[0]):
+    return find_changed(rev_or_files[0], patterns)
+  else:
+    return find_files(rev_or_files)
+
+
+def is_revision(word):
+  """Returns true if the given word is a revision name according to git."""
+  command = ['git', 'rev-parse', word, '--']
+  with open(os.devnull, 'w') as dev_null:
+    command_trace.log(command)
+    rc = subprocess.call(command, stdout=dev_null, stderr=dev_null)
+    return rc == 0
+
+
+def find_changed(revision, patterns):
+  """Finds files changed since a revision."""
+
+  # Always include -- indicate that revision is known to be a revision, even
+  # if no patterns follow.
+  command = ['git', 'diff', '-z', '--name-only', '--diff-filter=ACMR',
+             revision, '--']
+  command.extend(patterns)
+  command.extend(standard_exclusions())
+  return _null_split_output(command)
+
+
+def find_files(patterns=None):
+  """Finds files matching the given patterns using git ls-files."""
+  command = ['git', 'ls-files', '-z', '--']
+  if patterns:
+    command.extend(patterns)
+  command.extend(standard_exclusions())
+  return _null_split_output(command)
+
+
+def find_lines_matching(pattern, sources=None):
+  command = [
+      'git', 'grep',
+      '-n',  # show line numbers
+      '-I',  # exclude binary files
+      pattern,
+      '--'
+  ]
+  if sources:
+    command.extend(sources)
+  command.extend(standard_exclusions())
+
+  command_trace.log(command)
+
+  bufsize = 4096
+  proc = subprocess.Popen(command, bufsize=bufsize, stdout=subprocess.PIPE)
+  result = []
+  try:
+    while proc.poll() is None:
+      result.append(proc.stdout.read(bufsize))
+  except KeyboardInterrupt:
+    proc.terminate()
+    proc.wait()
+
+  return ''.join(result)
+
+
+def make_patterns(dirs):
+  """Returns a list of git match patterns for the given directories."""
+  return ['%s/**' % d for d in dirs]
+
+
+def make_exclusions(dirs):
+  return [':(exclude)' + d for d in dirs]
+
+
+def standard_exclusions():
+  result = make_exclusions(source.IGNORE)
+  result.append(':(exclude)**/third_party/**')
+  return result
+
+
+def _null_split_output(command):
+  """Runs the given command and splits its output on the null byte."""
+  command_trace.log(command)
+  result = subprocess.check_output(command)
+  return [name for name in result.rstrip().split('\0') if name]

+ 248 - 0
scripts/lib/source.py

@@ -0,0 +1,248 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fnmatch
+import logging
+import os
+import re
+import textwrap
+
+from lib import command_trace
+
+
+# Paths under which all files should be ignored
+IGNORE = frozenset([
+    'Firestore/Protos/nanopb',
+    'Firestore/Protos/cpp',
+    'Firestore/Protos/objc',
+    'Firestore/third_party/abseil-cpp',
+    'GoogleDataTransportCCTSupport/GDTCCTLibrary/Protogen/nanopb',
+    'GoogleDataTransportCCTSupport/ProtoSupport',
+])
+
+FIRESTORE_CORE = ['Firestore/core']
+FIRESTORE_OBJC = ['Firestore/Source', 'Firestore/Example/Tests']
+FIRESTORE_SWIFT = ['Firestore/Swift']
+
+FIRESTORE_TESTS = ['Firestore/core/test', 'Firestore/Example/Tests']
+
+CC_DIRS = FIRESTORE_CORE
+CC_EXTENSIONS = ['.h', '.cc']
+
+OBJC_DIRS = FIRESTORE_CORE + FIRESTORE_OBJC
+OBJC_EXTENSIONS = ['.h', '.m', '.mm']
+
+PYTHON_DIRS = ['scripts']
+PYTHON_EXTENSIONS = ['.py']
+
+SOURCE_EXTENSIONS = [
+    '.c',
+    '.cc',
+    '.cmake',
+    '.h',
+    '.js',
+    '.m',
+    '.mm',
+    '.py',
+    '.rb',
+    '.sh',
+    '.swift'
+]
+
+_DEFINITE_EXTENSIONS = {
+    '.cc': 'cc',
+    '.m': 'objc',
+    '.mm': 'objc',
+    '.py': 'py',
+}
+
+
+_classify_logger = logging.getLogger('lint.classify')
+
+
+class LanguageBreakdown:
+  """Files broken down by source language."""
+
+  def __init__(self):
+    self.cc = []
+    self.objc = []
+    self.py = []
+    self.all = []
+
+    self.kinds = {
+        'cc': self.cc,
+        'objc': self.objc,
+        'py': self.py,
+    }
+
+  def classify(self, kind, reason, filename):
+    _classify_logger.debug('classify %s: %s (%s)' % (kind, filename, reason))
+    self.kinds[kind].append(filename)
+    self.all.append(filename)
+
+  @staticmethod
+  def ignore(filename):
+    _classify_logger.debug('classify ignored: %s' % filename)
+
+
+def categorize_files(files):
+  """Breaks down the given list of files by language.
+
+  Args:
+    files: a list of files
+
+  Returns:
+    A LanguageBreakdown instance containing all the files that match a
+    recognized source language.
+  """
+  result = LanguageBreakdown()
+
+  for filename in files:
+    if _in_directories(filename, IGNORE):
+      continue
+
+    ext = os.path.splitext(filename)[1]
+    definite = _DEFINITE_EXTENSIONS.get(ext)
+    if definite:
+      result.classify(definite, 'extension', filename)
+      continue
+
+    if ext == '.h':
+      if _in_directories(filename, CC_DIRS):
+        # If a header exists in the C++ core, ignore related files. Some classes
+        # may transiently have an implementation in a .mm file, but hold the
+        # header to the higher standard: the implementation should eventually
+        # be in a .cc, otherwise the file doesn't belong in the core.
+        result.classify('cc', 'directory', filename)
+        continue
+
+      related_ext = _related_file_ext(filename)
+      if related_ext == '.cc':
+        result.classify('cc', 'related file', filename)
+        continue
+
+      if related_ext in ('.m', '.mm'):
+        result.classify('objc', 'related file', filename)
+        continue
+
+      if _in_directories(filename, OBJC_DIRS):
+        result.classify('objc', 'directory', filename)
+        continue
+
+      raise Exception(textwrap.dedent(
+          """
+          Don't know how to handle the header %s.
+
+          If C++ add a parent directory to CC_DIRS in lib/source.py.
+
+          If Objective-C add to OBJC_DIRS or consider changing the default here
+          and removing this exception.""" % filename))
+
+    result.ignore(filename)
+
+  return result
+
+
+def shard(group, num_shards):
+  """Breaks the group apart into num_shards shards.
+
+  Args:
+    group: a breakdown, perhaps returned from categorize_files.
+    num_shards: The number of shards into which to break down the group.
+
+  Returns:
+    A list of shards.
+  """
+  shards = []
+  for i in range(num_shards):
+    shards.append(LanguageBreakdown())
+
+  pos = 0
+  for kind, files in group.kinds.items():
+    for filename in files:
+      shards[pos].kinds[kind].append(filename)
+      pos = (pos + 1) % num_shards
+
+  return shards
+
+
+_PLUS = re.compile(r'\+.*')
+
+
+def _related_file_ext(header):
+  """Returns the dominant extension among related files.
+
+  A file is related if it starts with the same prefix. Prefix is the basename
+  without extension, and stripping off any + category names that are common in
+  Objective-C.
+
+  For example: executor.h has related files executor_std.cc and
+  executor_libdispatch.mm.
+
+  If there are multiple related files, the implementation chooses one based
+  on which language is most restrictive. That is, if a header serves both C++
+  and Objective-C++ implementations, lint the header as C++ to prevent issues
+  that might arise in that mode.
+
+  Returns:
+    The file extension (e.g. '.cc')
+  """
+  parent = os.path.dirname(header)
+  basename = os.path.basename(header)
+
+  root = os.path.splitext(basename)[0]
+  root = _PLUS.sub('', root)
+  root = os.path.join(parent, root)
+
+  files = _related_files(root)
+  exts = {os.path.splitext(f)[1] for f in files}
+
+  for ext in ('.cc', '.m', '.mm'):
+    if ext in exts:
+      return ext
+  return None
+
+
+def _related_files(root):
+  """Returns a list of files related to the given root.
+  """
+  parent = os.path.dirname(root)
+  if not parent:
+    # dirname returns empty for filenames that are already a basename.
+    parent = '.'
+
+  pattern = os.path.basename(root) + '*'
+  return fnmatch.filter(_list_files(parent), pattern)
+
+
+def _list_files(parent):
+  """Lists files contained directly in the parent directory."""
+  result = _list_files.cache.get(parent)
+  if result is None:
+    command_trace.log(['ls', parent])
+    result = os.listdir(parent)
+    _list_files.cache[parent] = result
+  return result
+
+
+_list_files.cache = {}
+
+
+def _in_directories(filename, dirs):
+  """Tests whether `filename` is anywhere in any of the given dirs."""
+  for dirname in dirs:
+    if (filename.startswith(dirname)
+        and (len(filename) == len(dirname) or filename[len(dirname)] == '/')):
+      return True
+  return False

+ 38 - 0
scripts/lib/terminal.py

@@ -0,0 +1,38 @@
+# Copyright 2019 Google
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess
+import threading
+
+
+_lock = threading.Lock()
+_columns = None
+
+
+def columns():
+  """Returns the number of columns in the terminal's display."""
+
+  global _columns
+  with _lock:
+    if _columns is None:
+      _columns = _find_terminal_columns()
+    return _columns
+
+
+def _find_terminal_columns():
+  try:
+    result = subprocess.check_output(['tput', 'cols'])
+    return int(result.rstrip())
+  except subprocess.CalledProcessError:
+    return 80

+ 0 - 101
scripts/lint.sh

@@ -1,101 +0,0 @@
-# Copyright 2018 Google
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Lints C++ files for conformance with the Google C++ style guide
-
-# Joins the given arguments with the separator given as the first argument.
-function join() {
-  local IFS="$1"
-  shift
-  echo "$*"
-}
-
-git_options=(
-    -z    # \0 terminate output
-)
-
-objc_lint_filters=(
-  # Objective-C uses #import and does not use header guards
-  -build/header_guard
-
-  # Inline definitions of Objective-C blocks confuse
-  -readability/braces
-
-  # C-style casts are acceptable in Objective-C++
-  -readability/casting
-
-  # Objective-C needs use type 'long' for interop between types like NSInteger
-  # and printf-style functions.
-  -runtime/int
-
-  # cpplint is generally confused by Objective-C mixing with C++.
-  #   * Objective-C method invocations in a for loop make it think its a
-  #     range-for
-  #   * Objective-C dictionary literals confuse brace spacing
-  #   * Empty category declarations ("@interface Foo ()") look like function
-  #     invocations
-  -whitespace
-)
-
-objc_lint_options=(
-  # cpplint normally excludes Objective-C++
-  --extensions=h,m,mm
-
-  # Objective-C style allows longer lines
-  --linelength=100
-
-  --filter=$(join , "${objc_lint_filters[@]}")
-)
-
-if [[ $# -gt 0 ]]; then
-  # Interpret any command-line argument as a revision range
-  command=(git diff --name-only --diff-filter=ACMR)
-  git_options+=("$@")
-
-else
-  # Default to operating on all files that match the pattern
-  command=(git ls-files)
-fi
-
-# POSIX xargs is required to run commands at least once, but cpplint.py fails
-# (with huge help text) if no files are supplied. Apple xargs avoids invocation
-# if there are no arguments. Use a temporary file to avoid depending on/testing
-# for this feature.
-TEMP=$(mktemp -t lint-files-$$.XXXXXXXXXX)
-trap "rm '$TEMP'" EXIT
-
-# Straight C++ files get regular cpplint
-"${command[@]}" "${git_options[@]}" \
-    -- 'Firestore/core/**/*.'{h,cc} \
-  > "$TEMP"
-
-if [[ -s "$TEMP" ]]; then
-  xargs -0 python scripts/cpplint.py --quiet 2>&1 < "$TEMP"
-fi
-CPP_STATUS=$?
-
-# Objective-C++ files get a looser cpplint
-"${command[@]}" "${git_options[@]}" \
-    -- 'Firestore/Source/**/*.'{h,m,mm} \
-      'Firestore/Example/Tests/**/*.'{h,m,mm} \
-      'Firestore/core/**/*.mm' \
-  > "$TEMP"
-if [[ -s "$TEMP" ]]; then
-  xargs -0 python scripts/cpplint.py "${objc_lint_options[@]}" --quiet 2>&1 < "$TEMP"
-fi
-OBJC_STATUS=$?
-
-if [[ $CPP_STATUS != 0 || $OBJC_STATUS != 0 ]]; then
-  exit 1
-fi

+ 22 - 0
tox.ini

@@ -0,0 +1,22 @@
+[pycodestyle]
+# Several checks are disabled because they're incompatible with Google python
+# style.
+#
+# See https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes for
+# more details.
+ignore =
+    # Google python style indents by two, not four. This trips a number of
+    # rules:
+    E111,E114,E129,
+
+    # W504 enforces that line breaks are after binary operators.
+    # W503 enforces the opposite. Disable it.
+    W503
+
+max-line-length=80
+
+[flake8]
+# Same list as above
+ignore = E111,E114,E129,W503
+max-line-length=80
+exclude = .git,.idea,__pycache__,scripts/cpplint.py