| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- import os
- import shutil
- import subprocess
- import xml.dom.minidom as minidom
- import xml.etree.ElementTree as ET
- from collections import OrderedDict
- import re
- import requests
- SUPPORTED_LANGUAGES = [
- "zh", "en", "ar"
- ]
- TOLGEE_PATH = "./tolgee_json"
- TOLGEE_API_URL = "http://47.236.31.243:8090/api"
- TOLGEE_API_KEY = "tgpak_hbpxgyltnryguy3lonyxiyrtgruta4rroryhiolooe3q"
- PROJECT_ID = "8"
- HEADERS = {
- "X-Api-Key": TOLGEE_API_KEY,
- "Content-Type": "application/json"
- }
- def fetch_all_cdata_keys():
- dataList = [] #key是name,value是id
- page = 0
- page_size = 1000
- while True:
- url = f"{TOLGEE_API_URL}/v2/projects/{PROJECT_ID}/keys?page={page}&size={page_size}"
- r = requests.get(url, headers=HEADERS)
- if r.status_code != 200:
- print(f"❌ 拉取失败: {r.status_code} -> {r.text}")
- break
- data = r.json()
- # print(data)
- dataList.extend(data['_embedded']['keys'])
- if data['page']['totalPages'] == page +1 : # 最后一页
- break
- page += 1
- print(f"✅ 已拉取 {len(dataList)} 个 Tolgee keys")
- # print(dataList)
- # 只保留包含 CDATA 的 keys,'custom': {'_androidWrapWithCdata': True}
- dataList = [item for item in dataList if
- item['custom'] and '_androidWrapWithCdata' in item['custom'] and item['custom'][
- '_androidWrapWithCdata']]
- # 提取name
- dataList = [item['name'] for item in dataList if 'name' in item]
- print(f"🔍 找到 {len(dataList)} 个包含 CDATA 的 keys")
- print(dataList)
- return dataList
- def run_tolgee_pull():
- os.makedirs(TOLGEE_PATH, exist_ok=True)
- cmd = [
- "tolgee", "pull",
- "--format", "ANDROID_XML",
- "--path", TOLGEE_PATH,
- "--languages"
- ] + SUPPORTED_LANGUAGES + [
- "--empty-dir",
- ]
- print(f"📥 拉取 Tolgee 翻译中...")
- subprocess.run(cmd, check=True)
- print(f"✅ Tolgee 拉取完成:{TOLGEE_PATH}")
- def find_en_strings_files():
- en_files = []
- for root, dirs, files in os.walk("."):
- normalized_root = root.replace("\\", "/")
- if "lite" in normalized_root or "lite" in files:
- continue
- if "/build/" in normalized_root or normalized_root.endswith("/build"):
- continue
- if "strings.xml" in files and "/res/values" in root.replace("\\", "/"):
- if "values-" not in root: # 只取英文
- en_files.append(os.path.join(root, "strings.xml"))
- return en_files
- def parse_strings_file(file_path):
- try:
- tree = ET.parse(file_path)
- root = tree.getroot()
- result = OrderedDict()
- for e in root.findall("string"):
- if 'name' in e.attrib:
- result[e.attrib['name']] = e.text or ""
- return result
- except Exception as e:
- print(f"❌ 解析失败: {file_path} -> {e}")
- return OrderedDict()
- # 解析 Tolgee 拉取的翻译,并合并到现有的 strings.xml 中
- def merge_translations(lang, base_dict: dict, pulled_dict: dict, existing_dict: dict,
- translatable_false_keys):
- merged = OrderedDict()
- # 先添加原有 key,保持顺序
- # 只有en才会有translatable_false_keys的数据
- for key in existing_dict:
- merged[key] = existing_dict[key]
- # if lang == "en" and key in translatable_false_keys:
- # merged[key] = pulled_dict.get(key) or existing_dict[key] or base_dict.get(key) or ""
- # else:
- # merged[key] = pulled_dict.get(key) or existing_dict[key] or ""
- # 再补充新 key(存在于 base_dict,但不在原文件中)
- for key in base_dict:
- #如果 key 不在 pulled_dict 中,跳过
- if key not in pulled_dict:
- continue
- if key in translatable_false_keys and lang != "en":
- # 如果是非英文语言且在translatable_false_keys中,则不添加,并且移除掉
- merged.pop(key, None)
- continue
- if pulled_dict.get(key) is not None:
- merged[key] = pulled_dict.get(key)
- return merged
- def find_translatable_false_keys():
- result = []
- for root, dirs, files in os.walk("."):
- normalized_root = root.replace("\\", "/")
- if normalized_root.endswith("/res/values") and "strings.xml" in files:
- file_path = os.path.join(root, "strings.xml")
- try:
- tree = ET.parse(file_path)
- root_elem = tree.getroot()
- for elem in root_elem.findall("string"):
- if elem.attrib.get("translatable") == "false":
- result.append(elem.attrib["name"])
- except Exception as e:
- print(f"⚠️ 解析失败:{file_path} -> {e}")
- print(f"🔍 找到 {len(result)} 个 translatable=false 的 key")
- # print(result)
- return result
- def write_strings_xml(file_path, data: dict,translatable_false_keys,cdata_keys):
- if data is None or not data:
- print(f"⚠️ 跳过写入:{file_path},没有数据")
- return
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
- cdata_map = {}
- # 创建 <resources> 根节点
- resources = ET.Element("resources")
- # 遍历 key,构建 string 元素
- for key, value in data.items():
- attributes = {"name": key}
- text = value
- if key in translatable_false_keys:
- attributes["translatable"] = "false"
- if isinstance(value, dict):
- text = value.get("value", "")
- for attr in ["translatable", "formatted", "product"]:
- if attr in value:
- attributes[attr] = value[attr]
- string_elem = ET.SubElement(resources, "string", attrib=attributes)
- if key in cdata_keys:
- placeholder = f"__CDATA__{key}__"
- string_elem.text = placeholder
- cdata_map[key] = text
- else:
- string_elem.text = text
- # 格式化 + 替换 CDATA
- rough_string = ET.tostring(resources, encoding="utf-8")
- pretty_xml = minidom.parseString(rough_string).toprettyxml(indent=" ")
- pretty_xml = "\n".join([line for line in pretty_xml.split("\n") if line.strip()])
- # 替换默认声明为标准的 XML 头部
- if pretty_xml.startswith('<?xml version="1.0" ?>'):
- pretty_xml = pretty_xml.replace(
- '<?xml version="1.0" ?>',
- '<?xml version="1.0" encoding="utf-8"?>',
- 1
- )
- for key in cdata_map:
- placeholder = f"__CDATA__{key}__"
- cdata_value = cdata_map[key]
- pretty_xml = pretty_xml.replace(placeholder, f"<![CDATA[{cdata_value}]]>")
- with open(file_path, "w", encoding="utf-8") as f:
- f.write(pretty_xml + "\n")
- def get_module_base_path(en_file_path):
- return os.path.dirname(os.path.dirname(en_file_path)) # 到 src/main/res
- def main():
- run_tolgee_pull()
- en_files = find_en_strings_files()
- translatable_false_keys = find_translatable_false_keys()
- cdata_keys = fetch_all_cdata_keys()
- for en_path in en_files:
- print(en_path)
- en_strings = parse_strings_file(en_path)
- if not en_strings:
- continue
- module_res_path = get_module_base_path(en_path)
- print(f"处理模块:{module_res_path}")
- for lang in SUPPORTED_LANGUAGES:
- if lang == "en":
- lang_dir = "values"
- elif lang == "id":
- lang_dir = "values-in"
- else:
- lang_dir = f"values-{lang}"
- pulled_path = os.path.join(TOLGEE_PATH, f"values-{lang}", "strings.xml")
- out_path = os.path.join(module_res_path, lang_dir, "strings.xml")
- print(f"处理语言:{lang} → {out_path}, 源:{pulled_path}")
- pulled_translations = parse_strings_file(pulled_path)
- if not pulled_translations:
- print(f"⚠️ 跳过:{pulled_path},没有可用翻译")
- continue
- # print(f"已拉取翻译:{pulled_translations}")
- existing_translations = parse_strings_file(out_path)
- # print(f"现有翻译:{existing_translations}")
- merged = merge_translations(
- lang=lang,
- base_dict=en_strings,
- pulled_dict=pulled_translations,
- existing_dict=existing_translations,
- translatable_false_keys=translatable_false_keys
- )
- write_strings_xml(out_path, merged,translatable_false_keys,cdata_keys)
- # ✅ 删除中间产物
- if os.path.exists(TOLGEE_PATH):
- shutil.rmtree(TOLGEE_PATH)
- print(f"🧹 已删除中间目录:{TOLGEE_PATH}")
- if __name__ == "__main__":
- main()
|