generate_log_report_html.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 生成日志分析HTML可视化报告
  5. 功能特性:
  6. - 自动根据输入文件名生成输出报告文件名
  7. - 统计和标记超过400字符的超长日志
  8. - 可视化展示TAG使用情况和超长日志分布
  9. - 提供优化建议
  10. 使用方法:
  11. # 自动生成报告文件名(推荐)
  12. python generate_log_report_html.py xxx.log
  13. # 输出: xxx.html
  14. # 指定输出文件名
  15. python generate_log_report_html.py logcat.txt --output custom_report.html
  16. """
  17. import sys
  18. import argparse
  19. from pathlib import Path
  20. from analyze_log_tags import LogTagAnalyzer
  21. HTML_TEMPLATE = """
  22. <!DOCTYPE html>
  23. <html lang="zh-CN">
  24. <head>
  25. <meta charset="UTF-8">
  26. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  27. <title>日志TAG分析报告</title>
  28. <style>
  29. * {{{{
  30. margin: 0;
  31. padding: 0;
  32. box-sizing: border-box;
  33. }}}}
  34. body {{
  35. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  36. background: #f5f5f5;
  37. padding: 20px;
  38. color: #333;
  39. }}
  40. .container {{
  41. max-width: 1400px;
  42. margin: 0 auto;
  43. background: white;
  44. border-radius: 8px;
  45. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  46. padding: 30px;
  47. }}
  48. h1 {{
  49. color: #1a73e8;
  50. margin-bottom: 10px;
  51. font-size: 28px;
  52. }}
  53. .subtitle {{
  54. color: #666;
  55. margin-bottom: 30px;
  56. font-size: 14px;
  57. }}
  58. .summary {{
  59. display: grid;
  60. grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
  61. gap: 20px;
  62. margin-bottom: 30px;
  63. }}
  64. .summary-card {{
  65. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  66. color: white;
  67. padding: 20px;
  68. border-radius: 8px;
  69. box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  70. }}
  71. .summary-card.info {{
  72. background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
  73. }}
  74. .summary-card.success {{
  75. background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
  76. }}
  77. .summary-card.warning {{
  78. background: linear-gradient(135deg, #ffa751 0%, #ffe259 100%);
  79. }}
  80. .summary-card h3 {{
  81. font-size: 14px;
  82. margin-bottom: 10px;
  83. opacity: 0.9;
  84. }}
  85. .summary-card .value {{
  86. font-size: 32px;
  87. font-weight: bold;
  88. }}
  89. .summary-card .sub-value {{
  90. font-size: 14px;
  91. margin-top: 5px;
  92. opacity: 0.9;
  93. }}
  94. .chart-container {{
  95. margin: 30px 0;
  96. background: #fafafa;
  97. padding: 20px;
  98. border-radius: 8px;
  99. }}
  100. .chart-title {{
  101. font-size: 18px;
  102. font-weight: bold;
  103. margin-bottom: 15px;
  104. color: #333;
  105. }}
  106. .bar-chart {{
  107. margin: 20px 0;
  108. }}
  109. .bar-item {{
  110. margin-bottom: 12px;
  111. }}
  112. .bar-label {{
  113. display: flex;
  114. justify-content: space-between;
  115. margin-bottom: 5px;
  116. font-size: 13px;
  117. }}
  118. .tag-name {{
  119. font-weight: 500;
  120. color: #333;
  121. }}
  122. .tag-stats {{
  123. color: #666;
  124. }}
  125. .bar-bg {{
  126. background: #e0e0e0;
  127. height: 24px;
  128. border-radius: 4px;
  129. overflow: hidden;
  130. position: relative;
  131. }}
  132. .bar-fill {{
  133. height: 100%;
  134. background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
  135. display: flex;
  136. align-items: center;
  137. justify-content: flex-end;
  138. padding-right: 8px;
  139. color: white;
  140. font-size: 12px;
  141. font-weight: bold;
  142. transition: width 0.3s ease;
  143. }}
  144. table {{
  145. width: 100%;
  146. border-collapse: collapse;
  147. margin: 20px 0;
  148. font-size: 14px;
  149. }}
  150. th, td {{
  151. padding: 12px;
  152. text-align: left;
  153. border-bottom: 1px solid #e0e0e0;
  154. }}
  155. th {{
  156. background: #f5f5f5;
  157. font-weight: 600;
  158. color: #333;
  159. position: sticky;
  160. top: 0;
  161. }}
  162. tr:hover {{
  163. background: #fafafa;
  164. }}
  165. .rank {{
  166. font-weight: bold;
  167. color: #666;
  168. }}
  169. .percentage {{
  170. font-weight: 600;
  171. }}
  172. .high-freq {{
  173. color: #d32f2f;
  174. }}
  175. .med-freq {{
  176. color: #f57c00;
  177. }}
  178. .low-freq {{
  179. color: #388e3c;
  180. }}
  181. .level-badge {{
  182. display: inline-block;
  183. padding: 2px 6px;
  184. border-radius: 3px;
  185. font-size: 11px;
  186. font-weight: bold;
  187. margin-right: 4px;
  188. }}
  189. .level-v {{ background: #9e9e9e; color: white; }}
  190. .level-d {{ background: #2196f3; color: white; }}
  191. .level-i {{ background: #4caf50; color: white; }}
  192. .level-w {{ background: #ff9800; color: white; }}
  193. .level-e {{ background: #f44336; color: white; }}
  194. .section {{
  195. margin: 40px 0;
  196. }}
  197. .section-title {{
  198. font-size: 20px;
  199. font-weight: bold;
  200. margin-bottom: 20px;
  201. color: #333;
  202. border-left: 4px solid #1a73e8;
  203. padding-left: 12px;
  204. }}
  205. .suggestion-box {{
  206. background: #fff3cd;
  207. border-left: 4px solid #ffc107;
  208. padding: 15px;
  209. border-radius: 4px;
  210. margin: 20px 0;
  211. }}
  212. .suggestion-title {{
  213. font-weight: bold;
  214. margin-bottom: 10px;
  215. color: #856404;
  216. }}
  217. .suggestion-list {{
  218. margin-left: 20px;
  219. color: #856404;
  220. }}
  221. footer {{
  222. margin-top: 40px;
  223. padding-top: 20px;
  224. border-top: 1px solid #e0e0e0;
  225. text-align: center;
  226. color: #666;
  227. font-size: 12px;
  228. }}
  229. </style>
  230. </head>
  231. <body>
  232. <div class="container">
  233. <h1>📊 日志TAG分析报告</h1>
  234. <div class="subtitle">生成时间: {timestamp} | 文件: {filename}</div>
  235. <div class="summary">
  236. <div class="summary-card">
  237. <h3>总日志行数</h3>
  238. <div class="value">{total_lines:,}</div>
  239. </div>
  240. <div class="summary-card info">
  241. <h3>有效日志行</h3>
  242. <div class="value">{matched_lines:,}</div>
  243. </div>
  244. <div class="summary-card success">
  245. <h3>不同TAG数量</h3>
  246. <div class="value">{unique_tags}</div>
  247. </div>
  248. <div class="summary-card warning">
  249. <h3>超长日志数</h3>
  250. <div class="value">{long_logs_count:,}</div>
  251. <div class="sub-value">占比 {long_logs_percentage:.2f}%</div>
  252. </div>
  253. </div>
  254. <div class="section">
  255. <div class="section-title">TOP 20 高频TAG</div>
  256. <div class="chart-container">
  257. <div class="bar-chart">
  258. {top20_bars}
  259. </div>
  260. </div>
  261. </div>
  262. <div class="section">
  263. <div class="section-title">超长日志TOP 10(>400字符)</div>
  264. <div class="chart-container">
  265. <div class="bar-chart">
  266. {long_logs_bars}
  267. </div>
  268. </div>
  269. </div>
  270. <div class="section">
  271. <div class="section-title">完整TAG列表</div>
  272. <table>
  273. <thead>
  274. <tr>
  275. <th>排名</th>
  276. <th>TAG名称</th>
  277. <th>出现次数</th>
  278. <th>占比</th>
  279. <th>超长日志</th>
  280. <th>日志级别分布</th>
  281. </tr>
  282. </thead>
  283. <tbody>
  284. {table_rows}
  285. </tbody>
  286. </table>
  287. </div>
  288. <div class="section">
  289. <div class="suggestion-box">
  290. <div class="suggestion-title">💡 优化建议</div>
  291. <div class="suggestion-list">
  292. {suggestions}
  293. </div>
  294. </div>
  295. </div>
  296. <footer>
  297. 报告由 analyze_log_tags.py 生成
  298. </footer>
  299. </div>
  300. </body>
  301. </html>
  302. """
  303. def generate_html_report(analyzer: LogTagAnalyzer, filename: str, output_path: str):
  304. """生成HTML报告"""
  305. from datetime import datetime
  306. stats = analyzer.get_statistics()
  307. # 计算超长日志统计
  308. total_long_logs = sum(analyzer.long_log_counter.values())
  309. long_logs_percentage = (total_long_logs / analyzer.matched_lines * 100) if analyzer.matched_lines > 0 else 0
  310. # 生成超长日志TOP 10柱状图
  311. long_logs_bars = ""
  312. if analyzer.long_log_counter:
  313. long_log_stats = analyzer.long_log_counter.most_common(10)
  314. max_long_count = long_log_stats[0][1] if long_log_stats else 1
  315. for i, (tag, long_count) in enumerate(long_log_stats, 1):
  316. tag_total = analyzer.tag_counter[tag]
  317. long_percentage_in_tag = (long_count / tag_total) * 100
  318. bar_width = (long_count / max_long_count) * 100
  319. long_logs_bars += f"""
  320. <div class="bar-item">
  321. <div class="bar-label">
  322. <span class="tag-name">{i}. {tag}</span>
  323. <span class="tag-stats high-freq">{long_count:,} 条 ({long_percentage_in_tag:.1f}% 的该TAG)</span>
  324. </div>
  325. <div class="bar-bg">
  326. <div class="bar-fill" style="width: {bar_width}%; background: linear-gradient(90deg, #ff9800 0%, #ff5722 100%);">{long_count:,}</div>
  327. </div>
  328. </div>
  329. """
  330. else:
  331. long_logs_bars = '<p style="color: #666; text-align: center; padding: 20px;">未发现超长日志</p>'
  332. # 生成TOP 20柱状图
  333. top20_bars = ""
  334. for i, (tag, count, percentage) in enumerate(stats[:20], 1):
  335. freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
  336. top20_bars += f"""
  337. <div class="bar-item">
  338. <div class="bar-label">
  339. <span class="tag-name">{i}. {tag}</span>
  340. <span class="tag-stats {freq_class}">{count:,} 次 ({percentage:.2f}%)</span>
  341. </div>
  342. <div class="bar-bg">
  343. <div class="bar-fill" style="width: {min(percentage * 2, 100)}%">{percentage:.2f}%</div>
  344. </div>
  345. </div>
  346. """
  347. # 生成表格行
  348. table_rows = ""
  349. for i, (tag, count, percentage) in enumerate(stats, 1):
  350. freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
  351. # 超长日志数量
  352. long_count = analyzer.long_log_counter.get(tag, 0)
  353. if long_count > 0:
  354. long_percentage = (long_count / count) * 100
  355. long_info = f'<span class="high-freq">{long_count:,} ({long_percentage:.1f}%)</span>'
  356. else:
  357. long_info = '<span style="color: #999;">-</span>'
  358. # 级别分布
  359. level_badges = ""
  360. if tag in analyzer.tag_level_counter:
  361. for level in ['V', 'D', 'I', 'W', 'E']:
  362. if level in analyzer.tag_level_counter[tag]:
  363. level_count = analyzer.tag_level_counter[tag][level]
  364. level_badges += f'<span class="level-badge level-{level.lower()}">{level}:{level_count}</span>'
  365. table_rows += f"""
  366. <tr>
  367. <td class="rank">{i}</td>
  368. <td><code>{tag}</code></td>
  369. <td>{count:,}</td>
  370. <td class="percentage {freq_class}">{percentage:.2f}%</td>
  371. <td>{long_info}</td>
  372. <td>{level_badges}</td>
  373. </tr>
  374. """
  375. # 生成优化建议
  376. suggestions_html = "<ul>"
  377. # 超长日志建议
  378. if total_long_logs > 0:
  379. suggestions_html += f"<li><strong>⚠️ 超长日志优化:</strong>发现 {total_long_logs:,} 条超长日志({long_logs_percentage:.2f}%),建议优化以下TAG:"
  380. suggestions_html += "<ul>"
  381. top_long_tags = analyzer.long_log_counter.most_common(5)
  382. for tag, long_count in top_long_tags:
  383. tag_total = analyzer.tag_counter[tag]
  384. long_pct = (long_count / tag_total) * 100
  385. suggestions_html += f"<li>{tag}: {long_count:,} 条超长日志(占该TAG的 {long_pct:.1f}%)</li>"
  386. suggestions_html += "</ul></li>"
  387. # 高频TAG建议
  388. high_freq_tags = [(tag, p) for tag, _, p in stats[:10] if p > 10]
  389. if high_freq_tags:
  390. suggestions_html += "<li><strong>高频TAG优化:</strong>以下TAG占比较高,建议检查是否有冗余日志<ul>"
  391. for tag, percentage in high_freq_tags:
  392. suggestions_html += f"<li>{tag} ({percentage:.2f}%)</li>"
  393. suggestions_html += "</ul></li>"
  394. # 低频TAG建议
  395. low_freq_tags = [tag for tag, count, _ in stats if count < 5]
  396. if len(low_freq_tags) > 10:
  397. suggestions_html += f"<li><strong>低频TAG清理:</strong>发现 {len(low_freq_tags)} 个出现次数 < 5 的TAG,建议review并移除不必要的日志</li>"
  398. # TOP占比分析
  399. if len(stats) >= 20:
  400. top20_percentage = sum(p for _, _, p in stats[:20])
  401. suggestions_html += f"<li><strong>集中度分析:</strong>TOP 20 TAG占总日志的 {top20_percentage:.2f}%,"
  402. if top20_percentage > 80:
  403. suggestions_html += "日志较为集中,重点优化这些TAG即可获得明显效果</li>"
  404. else:
  405. suggestions_html += "日志较为分散,建议统一TAG命名规范</li>"
  406. suggestions_html += "</ul>"
  407. # 填充模板
  408. html = HTML_TEMPLATE.format(
  409. timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
  410. filename=filename,
  411. total_lines=analyzer.total_lines,
  412. matched_lines=analyzer.matched_lines,
  413. unique_tags=len(analyzer.tag_counter),
  414. long_logs_count=total_long_logs,
  415. long_logs_percentage=long_logs_percentage,
  416. long_logs_bars=long_logs_bars,
  417. top20_bars=top20_bars,
  418. table_rows=table_rows,
  419. suggestions=suggestions_html
  420. )
  421. # 写入文件
  422. with open(output_path, 'w', encoding='utf-8') as f:
  423. f.write(html)
  424. print(f"\n✅ HTML报告已生成: {output_path}")
  425. print(f" 用浏览器打开查看: file://{Path(output_path).absolute()}")
  426. def generate_output_filename(input_file: str) -> str:
  427. """根据输入文件名生成输出报告文件名"""
  428. input_path = Path(input_file)
  429. # 获取文件名(不含扩展名)
  430. base_name = input_path.stem
  431. # 移除常见的日志文件后缀
  432. # 例如: com.jiehe.gami_20250922.xlog.log -> com.jiehe.gami_20250922
  433. for suffix in ['.xlog', '.log', '.txt']:
  434. if base_name.endswith(suffix):
  435. base_name = base_name[:-len(suffix)]
  436. # 生成报告文件名: 原文件名_report.html
  437. output_name = f"{base_name}_report.html"
  438. # 如果输入文件在某个目录下,输出到同一目录
  439. if input_path.parent != Path('.'):
  440. output_path = input_path.parent / output_name
  441. else:
  442. output_path = Path(output_name)
  443. return str(output_path)
  444. def main():
  445. parser = argparse.ArgumentParser(description='生成日志TAG分析的HTML可视化报告')
  446. parser.add_argument('logfile', help='日志文件路径')
  447. parser.add_argument('--output', '-o', default=None,
  448. help='输出HTML文件路径(默认: 根据输入文件名自动生成)')
  449. args = parser.parse_args()
  450. # 检查文件
  451. if not Path(args.logfile).exists():
  452. print(f"❌ 错误: 文件不存在: {args.logfile}")
  453. sys.exit(1)
  454. # 确定输出文件名
  455. if args.output:
  456. output_path = args.output
  457. else:
  458. output_path = generate_output_filename(args.logfile)
  459. print(f"📝 自动生成输出文件名: {output_path}")
  460. # 分析日志
  461. print(f"正在分析日志文件: {args.logfile}")
  462. analyzer = LogTagAnalyzer()
  463. analyzer.analyze_file(args.logfile)
  464. # 生成HTML
  465. generate_html_report(analyzer, args.logfile, output_path)
  466. if __name__ == "__main__":
  467. main()