binary_to_array.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. #!/usr/bin/env python2
  2. # Copyright 2018 Google LLC
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. """Utility to convert binary data into a C/C++ array.
  17. Usage: %s --input=input_file.bin [--output_source=output_source.cc]
  18. [--output_header=output_header.h] [--cpp_namespace=namespace]
  19. [--header_guard=HEADER_GUARD_TEXT] [--array=array_c_identifier]
  20. [--array_size=array_size_c_identifier] [--filename=override_filename]
  21. [--filename_identifier=filename_c_identifier]
  22. By default, the output source file will be named the same as the input file,
  23. but with .cc as the extension; the output header file will be named the
  24. same as the input file but with .h as the extension.
  25. By default, the data will be in an array named $NAME_data and the size will
  26. be in a constant named $NAME_length, and the filename will be stored in
  27. $NAME_filename. In all these cases, $NAME is the input filename (sans path and
  28. extension) with runs of non-alphanumeric characters changed to underscores. The
  29. header guard will be generated from the output header filename in a similar way.
  30. By default, the data will be placed in the root namespace. If the data is placed
  31. in the root namespace, it will be declared as a C array (using extern "C" if
  32. compiled in C++ mode).
  33. The actual size of $NAME_data is $NAME_length + 1, where it contains an extra
  34. 0x00 at the end. When data is actually text, $NAME_data can be used as a valid C
  35. string directly.
  36. """
  37. from os import path
  38. from re import sub
  39. import argparse
  40. import logging
  41. import os
  42. arg_parser = argparse.ArgumentParser()
  43. arg_parser.add_argument("input",
  44. help="Input file containing binary data to embed.")
  45. arg_parser.add_argument("--output_source",
  46. help="Output source file, defining the array data.")
  47. arg_parser.add_argument("--output_header",
  48. help="Output header file, declaring the array data.")
  49. arg_parser.add_argument("--array", help="Identifier for the array.")
  50. arg_parser.add_argument("--array_size", help="Identifier for the array size.")
  51. arg_parser.add_argument("--filename", help="Override file name in code.")
  52. arg_parser.add_argument("--filename_identifier",
  53. help="Where to put the filename.")
  54. arg_parser.add_argument("--header_guard",
  55. help="Header guard to #define in the output header.")
  56. arg_parser.add_argument("--cpp_namespace",
  57. help="C++ namespace to use. "
  58. "If blank, will generate a C array.")
  59. # How many hex bytes to display in a line. Each "0x00, " takes 6 characters, so
  60. # a width of 12 lets us fit within 80 characters.
  61. WIDTH = 12
  62. def header(header_guard, namespaces, array_name, array_size_name, fileid):
  63. """Return a C/C++ header for the given array.
  64. Args:
  65. header_guard: Name of the HEADER_GUARD to define.
  66. namespaces: List of namespaces, outer to inner.
  67. array_name: Name of the array.
  68. array_size_name: Name of the array size constant.
  69. fileid: Name of the identifier containing the file name.
  70. Returns:
  71. A list of strings containing the C/C++ header file, line-by-line.
  72. """
  73. data = []
  74. data.extend([
  75. "// Copyright 2019 Google Inc. All Rights Reserved.",
  76. "",
  77. "#ifndef %s" % header_guard,
  78. "#define %s" % header_guard,
  79. "",
  80. "#include <cstdlib>",
  81. ""
  82. ])
  83. if namespaces:
  84. data.extend([
  85. "namespace %s {" % ns for ns in namespaces
  86. ])
  87. else:
  88. data.extend([
  89. "#if defined(__cplusplus)",
  90. "extern \"C\" {",
  91. "#endif // defined(__cplusplus)"])
  92. data.extend([
  93. "",
  94. "extern const size_t %s;" % array_size_name,
  95. "extern const unsigned char %s[];" % array_name,
  96. "extern const char %s[];" % fileid,
  97. ])
  98. data.extend([
  99. ""
  100. ])
  101. if namespaces:
  102. data.extend([
  103. "} // namespace %s" % ns for ns in reversed(namespaces)
  104. ])
  105. else:
  106. data.extend([
  107. "#if defined(__cplusplus)",
  108. "} // extern \"C\"",
  109. "#endif // defined(__cplusplus)"
  110. ])
  111. data.extend([
  112. "",
  113. "#endif // %s" % header_guard,
  114. ""
  115. ])
  116. return data
  117. def source(namespaces, array_name, array_size_name, fileid, filename,
  118. input_bytes, include_name):
  119. """Return a C/C++ source file for the given array.
  120. Args:
  121. namespaces: List of namespaces, outer to inner.
  122. array_name: Name of the array.
  123. array_size_name: Name of the array size constant.
  124. fileid: Name of the identifier containing the filename.
  125. filename: The original data filename itself.
  126. input_bytes: Binary data to put into the array.
  127. include_name: Name of the corresponding header file to include.
  128. Returns:
  129. A string containing the C/C++ source file.
  130. """
  131. if os.name == 'nt':
  132. # Force forward slashes on Windows
  133. include_name = include_name.replace('\\', '/')
  134. data = []
  135. data.extend([
  136. "// Copyright 2019 Google Inc. All Rights Reserved.",
  137. "",
  138. "#include \"%s\"" % include_name,
  139. "",
  140. "#include <cstdlib>",
  141. ""
  142. ])
  143. if namespaces:
  144. data.extend([
  145. "namespace %s {" % ns for ns in namespaces
  146. ])
  147. else:
  148. data.extend([
  149. "#if defined(__cplusplus)",
  150. "extern \"C\" {",
  151. "#endif // defined(__cplusplus)"])
  152. data.extend([
  153. "",
  154. "extern const size_t %s;" % array_size_name,
  155. "extern const char %s[];" % fileid,
  156. "extern const unsigned char %s[];" % array_name, "",
  157. "const unsigned char %s[] = {" % array_name
  158. ])
  159. length = len(input_bytes)
  160. line = ""
  161. for idx in range(0, length):
  162. if idx % WIDTH == 0:
  163. line += " "
  164. else:
  165. line += " "
  166. line += "0x%02x," % input_bytes[idx]
  167. if idx % WIDTH == WIDTH - 1:
  168. data.append(line)
  169. line = ""
  170. data.append(line)
  171. data.append(" 0x00 // Extra \\0 to make it a C string")
  172. data.extend([
  173. "};",
  174. "",
  175. "const size_t %s =" % array_size_name,
  176. " sizeof(%s) - 1;" % array_name,
  177. "",
  178. "const char %s[] = \"%s\";" % (fileid, filename),
  179. "",
  180. ])
  181. if namespaces:
  182. data.extend([
  183. "} // namespace %s" % ns for ns in namespaces
  184. ][::-1]) # close namespaces in reverse order
  185. else:
  186. data.extend([
  187. "#if defined(__cplusplus)",
  188. "} // extern \"C\"",
  189. "#endif // defined(__cplusplus)"
  190. ])
  191. data.extend([
  192. ""
  193. ])
  194. return data
  195. def _get_repo_root():
  196. """Returns the root of the source repository.
  197. """
  198. scripts_dir = os.path.abspath(os.path.dirname(__file__))
  199. assert os.path.basename(scripts_dir) == 'scripts'
  200. root_dir = os.path.dirname(scripts_dir)
  201. assert os.path.isdir(os.path.join(root_dir, '.github'))
  202. return root_dir
  203. def main():
  204. """Read an binary input file and output to a C/C++ source file as an array.
  205. """
  206. args = arg_parser.parse_args()
  207. input_file = args.input
  208. input_file_base = os.path.splitext(args.input)[0]
  209. output_source = args.output_source
  210. if not output_source:
  211. output_source = input_file_base + ".cc"
  212. logging.debug("Using default --output_source='%s'", output_source)
  213. output_header = args.output_header
  214. if not output_header:
  215. output_header = input_file_base + ".h"
  216. logging.debug("Using default --output_header='%s'", output_header)
  217. root_dir = _get_repo_root()
  218. absolute_dir = path.dirname(output_header)
  219. relative_dir = path.relpath(absolute_dir, root_dir)
  220. relative_header_path = path.join(relative_dir, path.basename(output_header))
  221. identifier_base = sub("[^0-9a-zA-Z]+", "_", path.basename(input_file_base))
  222. array_name = args.array
  223. if not array_name:
  224. array_name = identifier_base + "_data"
  225. logging.debug("Using default --array='%s'", array_name)
  226. array_size_name = args.array_size
  227. if not array_size_name:
  228. array_size_name = identifier_base + "_size"
  229. logging.debug("Using default --array_size='%s'", array_size_name)
  230. fileid = args.filename_identifier
  231. if not fileid:
  232. fileid = identifier_base + "_filename"
  233. logging.debug("Using default --filename_identifier='%s'", fileid)
  234. filename = args.filename
  235. if filename is None: # but not if it's the empty string
  236. filename = path.basename(input_file)
  237. logging.debug("Using default --filename='%s'", filename)
  238. header_guard = args.header_guard
  239. if not header_guard:
  240. header_guard = sub("[^0-9a-zA-Z]+", "_", relative_header_path).upper() + '_'
  241. # Avoid double underscores to stay compliant with the Standard.
  242. header_guard = sub("[_]+", "_", header_guard)
  243. logging.debug("Using default --header_guard='%s'", header_guard)
  244. namespace = args.cpp_namespace
  245. namespaces = namespace.split("::") if namespace else []
  246. with open(input_file, "rb") as infile:
  247. input_bytes = bytearray(infile.read())
  248. logging.debug("Read %d bytes from %s", len(input_bytes), input_file)
  249. header_text = "\n".join(header(header_guard, namespaces, array_name,
  250. array_size_name, fileid))
  251. source_text = "\n".join(source(namespaces, array_name, array_size_name,
  252. fileid, filename, input_bytes,
  253. relative_header_path))
  254. with open(output_header, "w") as hdr:
  255. hdr.write(header_text)
  256. logging.debug("Wrote header file %s", output_header)
  257. with open(output_source, "w") as src:
  258. src.write(source_text)
  259. logging.debug("Wrote source file %s", output_source)
  260. if __name__ == "__main__":
  261. main()