source.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # Copyright 2019 Google
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import fnmatch
  15. import logging
  16. import os
  17. import re
  18. import textwrap
  19. from lib import command_trace
  20. # Paths under which all files should be ignored
  21. IGNORE = frozenset([
  22. 'Firestore/Protos/nanopb',
  23. 'Firestore/Protos/cpp',
  24. 'Firestore/Protos/objc',
  25. 'Firestore/third_party/abseil-cpp',
  26. 'GoogleDataTransport/GDTCCTLibrary/Protogen/nanopb',
  27. 'GoogleDataTransport/ProtoSupport',
  28. ])
  29. FIRESTORE_CORE = ['Firestore/core']
  30. FIRESTORE_OBJC = ['Firestore/Source', 'Firestore/Example/Tests']
  31. FIRESTORE_SWIFT = ['Firestore/Swift']
  32. FIRESTORE_TESTS = ['Firestore/core/test', 'Firestore/Example/Tests']
  33. CC_DIRS = FIRESTORE_CORE
  34. CC_EXTENSIONS = ['.h', '.cc']
  35. OBJC_DIRS = FIRESTORE_CORE + FIRESTORE_OBJC
  36. OBJC_EXTENSIONS = ['.h', '.m', '.mm']
  37. PYTHON_DIRS = ['scripts']
  38. PYTHON_EXTENSIONS = ['.py']
  39. SOURCE_EXTENSIONS = [
  40. '.c',
  41. '.cc',
  42. '.cmake',
  43. '.h',
  44. '.js',
  45. '.m',
  46. '.mm',
  47. '.py',
  48. '.rb',
  49. '.sh',
  50. '.swift'
  51. ]
  52. _DEFINITE_EXTENSIONS = {
  53. '.cc': 'cc',
  54. '.m': 'objc',
  55. '.mm': 'objc',
  56. '.py': 'py',
  57. }
  58. _classify_logger = logging.getLogger('lint.classify')
  59. class LanguageBreakdown:
  60. """Files broken down by source language."""
  61. def __init__(self):
  62. self.cc = []
  63. self.objc = []
  64. self.py = []
  65. self.all = []
  66. self.kinds = {
  67. 'cc': self.cc,
  68. 'objc': self.objc,
  69. 'py': self.py,
  70. }
  71. def classify(self, kind, reason, filename):
  72. _classify_logger.debug('classify %s: %s (%s)' % (kind, filename, reason))
  73. self.kinds[kind].append(filename)
  74. self.all.append(filename)
  75. @staticmethod
  76. def ignore(filename):
  77. _classify_logger.debug('classify ignored: %s' % filename)
  78. def categorize_files(files):
  79. """Breaks down the given list of files by language.
  80. Args:
  81. files: a list of files
  82. Returns:
  83. A LanguageBreakdown instance containing all the files that match a
  84. recognized source language.
  85. """
  86. result = LanguageBreakdown()
  87. for filename in files:
  88. if _in_directories(filename, IGNORE):
  89. continue
  90. ext = os.path.splitext(filename)[1]
  91. definite = _DEFINITE_EXTENSIONS.get(ext)
  92. if definite:
  93. result.classify(definite, 'extension', filename)
  94. continue
  95. if ext == '.h':
  96. if _in_directories(filename, CC_DIRS):
  97. # If a header exists in the C++ core, ignore related files. Some classes
  98. # may transiently have an implementation in a .mm file, but hold the
  99. # header to the higher standard: the implementation should eventually
  100. # be in a .cc, otherwise the file doesn't belong in the core.
  101. result.classify('cc', 'directory', filename)
  102. continue
  103. related_ext = _related_file_ext(filename)
  104. if related_ext == '.cc':
  105. result.classify('cc', 'related file', filename)
  106. continue
  107. if related_ext in ('.m', '.mm'):
  108. result.classify('objc', 'related file', filename)
  109. continue
  110. if _in_directories(filename, OBJC_DIRS):
  111. result.classify('objc', 'directory', filename)
  112. continue
  113. raise NotImplementedError(textwrap.dedent(
  114. """
  115. Don't know how to handle the header %s.
  116. If C++ add a parent directory to CC_DIRS in lib/source.py.
  117. If Objective-C add to OBJC_DIRS or consider changing the default here
  118. and removing this exception.""" % filename))
  119. result.ignore(filename)
  120. return result
  121. def shard(group, num_shards):
  122. """Breaks the group apart into num_shards shards.
  123. Args:
  124. group: a breakdown, perhaps returned from categorize_files.
  125. num_shards: The number of shards into which to break down the group.
  126. Returns:
  127. A list of shards.
  128. """
  129. shards = []
  130. for i in range(num_shards):
  131. shards.append(LanguageBreakdown())
  132. pos = 0
  133. for kind, files in group.kinds.items():
  134. for filename in files:
  135. shards[pos].kinds[kind].append(filename)
  136. pos = (pos + 1) % num_shards
  137. return shards
  138. _PLUS = re.compile(r'\+.*')
  139. def _related_file_ext(header):
  140. """Returns the dominant extension among related files.
  141. A file is related if it starts with the same prefix. Prefix is the basename
  142. without extension, and stripping off any + category names that are common in
  143. Objective-C.
  144. For example: executor.h has related files executor_std.cc and
  145. executor_libdispatch.mm.
  146. If there are multiple related files, the implementation chooses one based
  147. on which language is most restrictive. That is, if a header serves both C++
  148. and Objective-C++ implementations, lint the header as C++ to prevent issues
  149. that might arise in that mode.
  150. Returns:
  151. The file extension (e.g. '.cc')
  152. """
  153. parent = os.path.dirname(header)
  154. basename = os.path.basename(header)
  155. root = os.path.splitext(basename)[0]
  156. root = _PLUS.sub('', root)
  157. root = os.path.join(parent, root)
  158. files = _related_files(root)
  159. exts = {os.path.splitext(f)[1] for f in files}
  160. for ext in ('.cc', '.m', '.mm'):
  161. if ext in exts:
  162. return ext
  163. return None
  164. def _related_files(root):
  165. """Returns a list of files related to the given root.
  166. """
  167. parent = os.path.dirname(root)
  168. if not parent:
  169. # dirname returns empty for filenames that are already a basename.
  170. parent = '.'
  171. pattern = os.path.basename(root) + '*'
  172. return fnmatch.filter(_list_files(parent), pattern)
  173. def _list_files(parent):
  174. """Lists files contained directly in the parent directory."""
  175. result = _list_files.cache.get(parent)
  176. if result is None:
  177. command_trace.log(['ls', parent])
  178. result = os.listdir(parent)
  179. _list_files.cache[parent] = result
  180. return result
  181. _list_files.cache = {}
  182. def _in_directories(filename, dirs):
  183. """Tests whether `filename` is anywhere in any of the given dirs."""
  184. for dirname in dirs:
  185. if (filename.startswith(dirname)
  186. and (len(filename) == len(dirname) or filename[len(dirname)] == '/')):
  187. return True
  188. return False