source.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. # Copyright 2019 Google
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import fnmatch
  15. import logging
  16. import os
  17. import re
  18. import textwrap
  19. from lib import command_trace
  20. # Paths under which all files should be ignored
  21. IGNORE = frozenset([
  22. 'Firestore/Protos/nanopb',
  23. 'Firestore/Protos/cpp',
  24. 'Firestore/Protos/objc',
  25. 'Firestore/third_party/abseil-cpp',
  26. ])
  27. FIRESTORE_CORE = ['Firestore/core']
  28. FIRESTORE_OBJC = ['Firestore/Source', 'Firestore/Example/Tests']
  29. FIRESTORE_SWIFT = ['Firestore/Swift']
  30. FIRESTORE_TESTS = ['Firestore/core/test', 'Firestore/Example/Tests']
  31. CC_DIRS = FIRESTORE_CORE
  32. CC_EXTENSIONS = ['.h', '.cc']
  33. OBJC_DIRS = FIRESTORE_CORE + FIRESTORE_OBJC
  34. OBJC_EXTENSIONS = ['.h', '.m', '.mm']
  35. PYTHON_DIRS = ['scripts']
  36. PYTHON_EXTENSIONS = ['.py']
  37. SOURCE_EXTENSIONS = [
  38. '.c',
  39. '.cc',
  40. '.cmake',
  41. '.h',
  42. '.js',
  43. '.m',
  44. '.mm',
  45. '.py',
  46. '.rb',
  47. '.sh',
  48. '.swift'
  49. ]
  50. _DEFINITE_EXTENSIONS = {
  51. '.cc': 'cc',
  52. '.m': 'objc',
  53. '.mm': 'objc',
  54. '.py': 'py',
  55. }
  56. _classify_logger = logging.getLogger('lint.classify')
  57. class LanguageBreakdown:
  58. """Files broken down by source language."""
  59. def __init__(self):
  60. self.cc = []
  61. self.objc = []
  62. self.py = []
  63. self.all = []
  64. self.kinds = {
  65. 'cc': self.cc,
  66. 'objc': self.objc,
  67. 'py': self.py,
  68. }
  69. def classify(self, kind, reason, filename):
  70. _classify_logger.debug('classify %s: %s (%s)' % (kind, filename, reason))
  71. self.kinds[kind].append(filename)
  72. self.all.append(filename)
  73. @staticmethod
  74. def ignore(filename):
  75. _classify_logger.debug('classify ignored: %s' % filename)
  76. def categorize_files(files):
  77. """Breaks down the given list of files by language.
  78. Args:
  79. files: a list of files
  80. Returns:
  81. A LanguageBreakdown instance containing all the files that match a
  82. recognized source language.
  83. """
  84. result = LanguageBreakdown()
  85. for filename in files:
  86. if _in_directories(filename, IGNORE):
  87. continue
  88. ext = os.path.splitext(filename)[1]
  89. definite = _DEFINITE_EXTENSIONS.get(ext)
  90. if definite:
  91. result.classify(definite, 'extension', filename)
  92. continue
  93. if ext == '.h':
  94. if _in_directories(filename, CC_DIRS):
  95. # If a header exists in the C++ core, ignore related files. Some classes
  96. # may transiently have an implementation in a .mm file, but hold the
  97. # header to the higher standard: the implementation should eventually
  98. # be in a .cc, otherwise the file doesn't belong in the core.
  99. result.classify('cc', 'directory', filename)
  100. continue
  101. related_ext = _related_file_ext(filename)
  102. if related_ext == '.cc':
  103. result.classify('cc', 'related file', filename)
  104. continue
  105. if related_ext in ('.m', '.mm'):
  106. result.classify('objc', 'related file', filename)
  107. continue
  108. if _in_directories(filename, OBJC_DIRS):
  109. result.classify('objc', 'directory', filename)
  110. continue
  111. raise NotImplementedError(textwrap.dedent(
  112. """
  113. Don't know how to handle the header %s.
  114. If C++ add a parent directory to CC_DIRS in lib/source.py.
  115. If Objective-C add to OBJC_DIRS or consider changing the default here
  116. and removing this exception.""" % filename))
  117. result.ignore(filename)
  118. return result
  119. def shard(group, num_shards):
  120. """Breaks the group apart into num_shards shards.
  121. Args:
  122. group: a breakdown, perhaps returned from categorize_files.
  123. num_shards: The number of shards into which to break down the group.
  124. Returns:
  125. A list of shards.
  126. """
  127. shards = []
  128. for i in range(num_shards):
  129. shards.append(LanguageBreakdown())
  130. pos = 0
  131. for kind, files in group.kinds.items():
  132. for filename in files:
  133. shards[pos].kinds[kind].append(filename)
  134. pos = (pos + 1) % num_shards
  135. return shards
  136. _PLUS = re.compile(r'\+.*')
  137. def _related_file_ext(header):
  138. """Returns the dominant extension among related files.
  139. A file is related if it starts with the same prefix. Prefix is the basename
  140. without extension, and stripping off any + category names that are common in
  141. Objective-C.
  142. For example: executor.h has related files executor_std.cc and
  143. executor_libdispatch.mm.
  144. If there are multiple related files, the implementation chooses one based
  145. on which language is most restrictive. That is, if a header serves both C++
  146. and Objective-C++ implementations, lint the header as C++ to prevent issues
  147. that might arise in that mode.
  148. Returns:
  149. The file extension (e.g. '.cc')
  150. """
  151. parent = os.path.dirname(header)
  152. basename = os.path.basename(header)
  153. root = os.path.splitext(basename)[0]
  154. root = _PLUS.sub('', root)
  155. root = os.path.join(parent, root)
  156. files = _related_files(root)
  157. exts = {os.path.splitext(f)[1] for f in files}
  158. for ext in ('.cc', '.m', '.mm'):
  159. if ext in exts:
  160. return ext
  161. return None
  162. def _related_files(root):
  163. """Returns a list of files related to the given root.
  164. """
  165. parent = os.path.dirname(root)
  166. if not parent:
  167. # dirname returns empty for filenames that are already a basename.
  168. parent = '.'
  169. pattern = os.path.basename(root) + '*'
  170. return fnmatch.filter(_list_files(parent), pattern)
  171. def _list_files(parent):
  172. """Lists files contained directly in the parent directory."""
  173. result = _list_files.cache.get(parent)
  174. if result is None:
  175. command_trace.log(['ls', parent])
  176. result = os.listdir(parent)
  177. _list_files.cache[parent] = result
  178. return result
  179. _list_files.cache = {}
  180. def _in_directories(filename, dirs):
  181. """Tests whether `filename` is anywhere in any of the given dirs."""
  182. for dirname in dirs:
  183. if (filename.startswith(dirname)
  184. and (len(filename) == len(dirname) or filename[len(dirname)] == '/')):
  185. return True
  186. return False