pull_and_fill_translations.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. import os
  2. import shutil
  3. import subprocess
  4. import xml.dom.minidom as minidom
  5. import xml.etree.ElementTree as ET
  6. from collections import OrderedDict
  7. import re
  8. import requests
  9. SUPPORTED_LANGUAGES = [
  10. "zh", "en", "ar"
  11. ]
  12. TOLGEE_PATH = "./tolgee_json"
  13. TOLGEE_API_URL = "http://47.236.31.243:8090/api"
  14. TOLGEE_API_KEY = "tgpak_hbpxgyltnryguy3lonyxiyrtgruta4rroryhiolooe3q"
  15. PROJECT_ID = "8"
  16. HEADERS = {
  17. "X-Api-Key": TOLGEE_API_KEY,
  18. "Content-Type": "application/json"
  19. }
  20. def fetch_all_cdata_keys():
  21. dataList = [] #key是name,value是id
  22. page = 0
  23. page_size = 1000
  24. while True:
  25. url = f"{TOLGEE_API_URL}/v2/projects/{PROJECT_ID}/keys?page={page}&size={page_size}"
  26. r = requests.get(url, headers=HEADERS)
  27. if r.status_code != 200:
  28. print(f"❌ 拉取失败: {r.status_code} -> {r.text}")
  29. break
  30. data = r.json()
  31. # print(data)
  32. dataList.extend(data['_embedded']['keys'])
  33. if data['page']['totalPages'] == page +1 : # 最后一页
  34. break
  35. page += 1
  36. print(f"✅ 已拉取 {len(dataList)} 个 Tolgee keys")
  37. # print(dataList)
  38. # 只保留包含 CDATA 的 keys,'custom': {'_androidWrapWithCdata': True}
  39. dataList = [item for item in dataList if
  40. item['custom'] and '_androidWrapWithCdata' in item['custom'] and item['custom'][
  41. '_androidWrapWithCdata']]
  42. # 提取name
  43. dataList = [item['name'] for item in dataList if 'name' in item]
  44. print(f"🔍 找到 {len(dataList)} 个包含 CDATA 的 keys")
  45. print(dataList)
  46. return dataList
  47. def run_tolgee_pull():
  48. os.makedirs(TOLGEE_PATH, exist_ok=True)
  49. cmd = [
  50. "tolgee", "pull",
  51. "--format", "ANDROID_XML",
  52. "--path", TOLGEE_PATH,
  53. "--languages"
  54. ] + SUPPORTED_LANGUAGES + [
  55. "--empty-dir",
  56. ]
  57. print(f"📥 拉取 Tolgee 翻译中...")
  58. subprocess.run(cmd, check=True)
  59. print(f"✅ Tolgee 拉取完成:{TOLGEE_PATH}")
  60. def find_en_strings_files():
  61. en_files = []
  62. for root, dirs, files in os.walk("."):
  63. normalized_root = root.replace("\\", "/")
  64. if "lite" in normalized_root or "lite" in files:
  65. continue
  66. if "/build/" in normalized_root or normalized_root.endswith("/build"):
  67. continue
  68. if "strings.xml" in files and "/res/values" in root.replace("\\", "/"):
  69. if "values-" not in root: # 只取英文
  70. en_files.append(os.path.join(root, "strings.xml"))
  71. return en_files
  72. def parse_strings_file(file_path):
  73. try:
  74. tree = ET.parse(file_path)
  75. root = tree.getroot()
  76. result = OrderedDict()
  77. for e in root.findall("string"):
  78. if 'name' in e.attrib:
  79. result[e.attrib['name']] = e.text or ""
  80. return result
  81. except Exception as e:
  82. print(f"❌ 解析失败: {file_path} -> {e}")
  83. return OrderedDict()
  84. # 解析 Tolgee 拉取的翻译,并合并到现有的 strings.xml 中
  85. def merge_translations(lang, base_dict: dict, pulled_dict: dict, existing_dict: dict,
  86. translatable_false_keys):
  87. merged = OrderedDict()
  88. # 先添加原有 key,保持顺序
  89. # 只有en才会有translatable_false_keys的数据
  90. for key in existing_dict:
  91. merged[key] = existing_dict[key]
  92. # if lang == "en" and key in translatable_false_keys:
  93. # merged[key] = pulled_dict.get(key) or existing_dict[key] or base_dict.get(key) or ""
  94. # else:
  95. # merged[key] = pulled_dict.get(key) or existing_dict[key] or ""
  96. # 再补充新 key(存在于 base_dict,但不在原文件中)
  97. for key in base_dict:
  98. #如果 key 不在 pulled_dict 中,跳过
  99. if key not in pulled_dict:
  100. continue
  101. if key in translatable_false_keys and lang != "en":
  102. # 如果是非英文语言且在translatable_false_keys中,则不添加,并且移除掉
  103. merged.pop(key, None)
  104. continue
  105. if pulled_dict.get(key) is not None:
  106. merged[key] = pulled_dict.get(key)
  107. return merged
  108. def find_translatable_false_keys():
  109. result = []
  110. for root, dirs, files in os.walk("."):
  111. normalized_root = root.replace("\\", "/")
  112. if normalized_root.endswith("/res/values") and "strings.xml" in files:
  113. file_path = os.path.join(root, "strings.xml")
  114. try:
  115. tree = ET.parse(file_path)
  116. root_elem = tree.getroot()
  117. for elem in root_elem.findall("string"):
  118. if elem.attrib.get("translatable") == "false":
  119. result.append(elem.attrib["name"])
  120. except Exception as e:
  121. print(f"⚠️ 解析失败:{file_path} -> {e}")
  122. print(f"🔍 找到 {len(result)} 个 translatable=false 的 key")
  123. # print(result)
  124. return result
  125. def write_strings_xml(file_path, data: dict,translatable_false_keys,cdata_keys):
  126. if data is None or not data:
  127. print(f"⚠️ 跳过写入:{file_path},没有数据")
  128. return
  129. os.makedirs(os.path.dirname(file_path), exist_ok=True)
  130. cdata_map = {}
  131. # 创建 <resources> 根节点
  132. resources = ET.Element("resources")
  133. # 遍历 key,构建 string 元素
  134. for key, value in data.items():
  135. attributes = {"name": key}
  136. text = value
  137. if key in translatable_false_keys:
  138. attributes["translatable"] = "false"
  139. if isinstance(value, dict):
  140. text = value.get("value", "")
  141. for attr in ["translatable", "formatted", "product"]:
  142. if attr in value:
  143. attributes[attr] = value[attr]
  144. string_elem = ET.SubElement(resources, "string", attrib=attributes)
  145. if key in cdata_keys:
  146. placeholder = f"__CDATA__{key}__"
  147. string_elem.text = placeholder
  148. cdata_map[key] = text
  149. else:
  150. string_elem.text = text
  151. # 格式化 + 替换 CDATA
  152. rough_string = ET.tostring(resources, encoding="utf-8")
  153. pretty_xml = minidom.parseString(rough_string).toprettyxml(indent=" ")
  154. pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
  155. # 替换默认声明为标准的 XML 头部
  156. if pretty_xml.startswith('<?xml version="1.0" ?>'):
  157. pretty_xml = pretty_xml.replace(
  158. '<?xml version="1.0" ?>',
  159. '<?xml version="1.0" encoding="utf-8"?>',
  160. 1
  161. )
  162. for key in cdata_map:
  163. placeholder = f"__CDATA__{key}__"
  164. cdata_value = cdata_map[key]
  165. pretty_xml = pretty_xml.replace(placeholder, f"<![CDATA[{cdata_value}]]>")
  166. with open(file_path, "w", encoding="utf-8") as f:
  167. f.write(pretty_xml + "\n")
  168. def get_module_base_path(en_file_path):
  169. return os.path.dirname(os.path.dirname(en_file_path)) # 到 src/main/res
  170. def main():
  171. run_tolgee_pull()
  172. en_files = find_en_strings_files()
  173. translatable_false_keys = find_translatable_false_keys()
  174. cdata_keys = fetch_all_cdata_keys()
  175. for en_path in en_files:
  176. print(en_path)
  177. en_strings = parse_strings_file(en_path)
  178. if not en_strings:
  179. continue
  180. module_res_path = get_module_base_path(en_path)
  181. print(f"处理模块:{module_res_path}")
  182. for lang in SUPPORTED_LANGUAGES:
  183. if lang == "en":
  184. lang_dir = "values"
  185. elif lang == "id":
  186. lang_dir = "values-in"
  187. else:
  188. lang_dir = f"values-{lang}"
  189. pulled_path = os.path.join(TOLGEE_PATH, f"values-{lang}", "strings.xml")
  190. out_path = os.path.join(module_res_path, lang_dir, "strings.xml")
  191. print(f"处理语言:{lang} → {out_path}, 源:{pulled_path}")
  192. pulled_translations = parse_strings_file(pulled_path)
  193. if not pulled_translations:
  194. print(f"⚠️ 跳过:{pulled_path},没有可用翻译")
  195. continue
  196. # print(f"已拉取翻译:{pulled_translations}")
  197. existing_translations = parse_strings_file(out_path)
  198. # print(f"现有翻译:{existing_translations}")
  199. merged = merge_translations(
  200. lang=lang,
  201. base_dict=en_strings,
  202. pulled_dict=pulled_translations,
  203. existing_dict=existing_translations,
  204. translatable_false_keys=translatable_false_keys
  205. )
  206. write_strings_xml(out_path, merged,translatable_false_keys,cdata_keys)
  207. # ✅ 删除中间产物
  208. if os.path.exists(TOLGEE_PATH):
  209. shutil.rmtree(TOLGEE_PATH)
  210. print(f"🧹 已删除中间目录:{TOLGEE_PATH}")
  211. if __name__ == "__main__":
  212. main()