| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 生成日志分析HTML可视化报告
- 功能特性:
- - 自动根据输入文件名生成输出报告文件名
- - 统计和标记超过400字符的超长日志
- - 可视化展示TAG使用情况和超长日志分布
- - 提供优化建议
- 使用方法:
- # 自动生成报告文件名(推荐)
- python generate_log_report_html.py com.partyjoy.yoki_20250922.xlog.log
- # 输出: com.partyjoy.yoki_20250922_report.html
-
- # 指定输出文件名
- python generate_log_report_html.py logcat.txt --output custom_report.html
- """
- import sys
- import argparse
- from pathlib import Path
- from analyze_log_tags import LogTagAnalyzer
- HTML_TEMPLATE = """
- <!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>日志TAG分析报告</title>
- <style>
- * {{{{
- margin: 0;
- padding: 0;
- box-sizing: border-box;
- }}}}
-
- body {{
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
- background: #f5f5f5;
- padding: 20px;
- color: #333;
- }}
-
- .container {{
- max-width: 1400px;
- margin: 0 auto;
- background: white;
- border-radius: 8px;
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
- padding: 30px;
- }}
-
- h1 {{
- color: #1a73e8;
- margin-bottom: 10px;
- font-size: 28px;
- }}
-
- .subtitle {{
- color: #666;
- margin-bottom: 30px;
- font-size: 14px;
- }}
-
- .summary {{
- display: grid;
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
- gap: 20px;
- margin-bottom: 30px;
- }}
-
- .summary-card {{
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
- color: white;
- padding: 20px;
- border-radius: 8px;
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
- }}
-
- .summary-card.info {{
- background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
- }}
-
- .summary-card.success {{
- background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
- }}
-
- .summary-card.warning {{
- background: linear-gradient(135deg, #ffa751 0%, #ffe259 100%);
- }}
-
- .summary-card h3 {{
- font-size: 14px;
- margin-bottom: 10px;
- opacity: 0.9;
- }}
-
- .summary-card .value {{
- font-size: 32px;
- font-weight: bold;
- }}
-
- .summary-card .sub-value {{
- font-size: 14px;
- margin-top: 5px;
- opacity: 0.9;
- }}
-
- .chart-container {{
- margin: 30px 0;
- background: #fafafa;
- padding: 20px;
- border-radius: 8px;
- }}
-
- .chart-title {{
- font-size: 18px;
- font-weight: bold;
- margin-bottom: 15px;
- color: #333;
- }}
-
- .bar-chart {{
- margin: 20px 0;
- }}
-
- .bar-item {{
- margin-bottom: 12px;
- }}
-
- .bar-label {{
- display: flex;
- justify-content: space-between;
- margin-bottom: 5px;
- font-size: 13px;
- }}
-
- .tag-name {{
- font-weight: 500;
- color: #333;
- }}
-
- .tag-stats {{
- color: #666;
- }}
-
- .bar-bg {{
- background: #e0e0e0;
- height: 24px;
- border-radius: 4px;
- overflow: hidden;
- position: relative;
- }}
-
- .bar-fill {{
- height: 100%;
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
- display: flex;
- align-items: center;
- justify-content: flex-end;
- padding-right: 8px;
- color: white;
- font-size: 12px;
- font-weight: bold;
- transition: width 0.3s ease;
- }}
-
- table {{
- width: 100%;
- border-collapse: collapse;
- margin: 20px 0;
- font-size: 14px;
- }}
-
- th, td {{
- padding: 12px;
- text-align: left;
- border-bottom: 1px solid #e0e0e0;
- }}
-
- th {{
- background: #f5f5f5;
- font-weight: 600;
- color: #333;
- position: sticky;
- top: 0;
- }}
-
- tr:hover {{
- background: #fafafa;
- }}
-
- .rank {{
- font-weight: bold;
- color: #666;
- }}
-
- .percentage {{
- font-weight: 600;
- }}
-
- .high-freq {{
- color: #d32f2f;
- }}
-
- .med-freq {{
- color: #f57c00;
- }}
-
- .low-freq {{
- color: #388e3c;
- }}
-
- .level-badge {{
- display: inline-block;
- padding: 2px 6px;
- border-radius: 3px;
- font-size: 11px;
- font-weight: bold;
- margin-right: 4px;
- }}
-
- .level-v {{ background: #9e9e9e; color: white; }}
- .level-d {{ background: #2196f3; color: white; }}
- .level-i {{ background: #4caf50; color: white; }}
- .level-w {{ background: #ff9800; color: white; }}
- .level-e {{ background: #f44336; color: white; }}
-
- .section {{
- margin: 40px 0;
- }}
-
- .section-title {{
- font-size: 20px;
- font-weight: bold;
- margin-bottom: 20px;
- color: #333;
- border-left: 4px solid #1a73e8;
- padding-left: 12px;
- }}
-
- .suggestion-box {{
- background: #fff3cd;
- border-left: 4px solid #ffc107;
- padding: 15px;
- border-radius: 4px;
- margin: 20px 0;
- }}
-
- .suggestion-title {{
- font-weight: bold;
- margin-bottom: 10px;
- color: #856404;
- }}
-
- .suggestion-list {{
- margin-left: 20px;
- color: #856404;
- }}
-
- footer {{
- margin-top: 40px;
- padding-top: 20px;
- border-top: 1px solid #e0e0e0;
- text-align: center;
- color: #666;
- font-size: 12px;
- }}
- </style>
- </head>
- <body>
- <div class="container">
- <h1>📊 日志TAG分析报告</h1>
- <div class="subtitle">生成时间: {timestamp} | 文件: {filename}</div>
-
- <div class="summary">
- <div class="summary-card">
- <h3>总日志行数</h3>
- <div class="value">{total_lines:,}</div>
- </div>
- <div class="summary-card info">
- <h3>有效日志行</h3>
- <div class="value">{matched_lines:,}</div>
- </div>
- <div class="summary-card success">
- <h3>不同TAG数量</h3>
- <div class="value">{unique_tags}</div>
- </div>
- <div class="summary-card warning">
- <h3>超长日志数</h3>
- <div class="value">{long_logs_count:,}</div>
- <div class="sub-value">占比 {long_logs_percentage:.2f}%</div>
- </div>
- </div>
-
- <div class="section">
- <div class="section-title">TOP 20 高频TAG</div>
- <div class="chart-container">
- <div class="bar-chart">
- {top20_bars}
- </div>
- </div>
- </div>
-
- <div class="section">
- <div class="section-title">超长日志TOP 10(>400字符)</div>
- <div class="chart-container">
- <div class="bar-chart">
- {long_logs_bars}
- </div>
- </div>
- </div>
-
- <div class="section">
- <div class="section-title">完整TAG列表</div>
- <table>
- <thead>
- <tr>
- <th>排名</th>
- <th>TAG名称</th>
- <th>出现次数</th>
- <th>占比</th>
- <th>超长日志</th>
- <th>日志级别分布</th>
- </tr>
- </thead>
- <tbody>
- {table_rows}
- </tbody>
- </table>
- </div>
-
- <div class="section">
- <div class="suggestion-box">
- <div class="suggestion-title">💡 优化建议</div>
- <div class="suggestion-list">
- {suggestions}
- </div>
- </div>
- </div>
-
- <footer>
- 报告由 analyze_log_tags.py 生成
- </footer>
- </div>
- </body>
- </html>
- """
- def generate_html_report(analyzer: LogTagAnalyzer, filename: str, output_path: str):
- """生成HTML报告"""
- from datetime import datetime
-
- stats = analyzer.get_statistics()
-
- # 计算超长日志统计
- total_long_logs = sum(analyzer.long_log_counter.values())
- long_logs_percentage = (total_long_logs / analyzer.matched_lines * 100) if analyzer.matched_lines > 0 else 0
-
- # 生成超长日志TOP 10柱状图
- long_logs_bars = ""
- if analyzer.long_log_counter:
- long_log_stats = analyzer.long_log_counter.most_common(10)
- max_long_count = long_log_stats[0][1] if long_log_stats else 1
-
- for i, (tag, long_count) in enumerate(long_log_stats, 1):
- tag_total = analyzer.tag_counter[tag]
- long_percentage_in_tag = (long_count / tag_total) * 100
- bar_width = (long_count / max_long_count) * 100
-
- long_logs_bars += f"""
- <div class="bar-item">
- <div class="bar-label">
- <span class="tag-name">{i}. {tag}</span>
- <span class="tag-stats high-freq">{long_count:,} 条 ({long_percentage_in_tag:.1f}% 的该TAG)</span>
- </div>
- <div class="bar-bg">
- <div class="bar-fill" style="width: {bar_width}%; background: linear-gradient(90deg, #ff9800 0%, #ff5722 100%);">{long_count:,}</div>
- </div>
- </div>
- """
- else:
- long_logs_bars = '<p style="color: #666; text-align: center; padding: 20px;">未发现超长日志</p>'
-
- # 生成TOP 20柱状图
- top20_bars = ""
- for i, (tag, count, percentage) in enumerate(stats[:20], 1):
- freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
- top20_bars += f"""
- <div class="bar-item">
- <div class="bar-label">
- <span class="tag-name">{i}. {tag}</span>
- <span class="tag-stats {freq_class}">{count:,} 次 ({percentage:.2f}%)</span>
- </div>
- <div class="bar-bg">
- <div class="bar-fill" style="width: {min(percentage * 2, 100)}%">{percentage:.2f}%</div>
- </div>
- </div>
- """
-
- # 生成表格行
- table_rows = ""
- for i, (tag, count, percentage) in enumerate(stats, 1):
- freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
-
- # 超长日志数量
- long_count = analyzer.long_log_counter.get(tag, 0)
- if long_count > 0:
- long_percentage = (long_count / count) * 100
- long_info = f'<span class="high-freq">{long_count:,} ({long_percentage:.1f}%)</span>'
- else:
- long_info = '<span style="color: #999;">-</span>'
-
- # 级别分布
- level_badges = ""
- if tag in analyzer.tag_level_counter:
- for level in ['V', 'D', 'I', 'W', 'E']:
- if level in analyzer.tag_level_counter[tag]:
- level_count = analyzer.tag_level_counter[tag][level]
- level_badges += f'<span class="level-badge level-{level.lower()}">{level}:{level_count}</span>'
-
- table_rows += f"""
- <tr>
- <td class="rank">{i}</td>
- <td><code>{tag}</code></td>
- <td>{count:,}</td>
- <td class="percentage {freq_class}">{percentage:.2f}%</td>
- <td>{long_info}</td>
- <td>{level_badges}</td>
- </tr>
- """
-
- # 生成优化建议
- suggestions_html = "<ul>"
-
- # 超长日志建议
- if total_long_logs > 0:
- suggestions_html += f"<li><strong>⚠️ 超长日志优化:</strong>发现 {total_long_logs:,} 条超长日志({long_logs_percentage:.2f}%),建议优化以下TAG:"
- suggestions_html += "<ul>"
- top_long_tags = analyzer.long_log_counter.most_common(5)
- for tag, long_count in top_long_tags:
- tag_total = analyzer.tag_counter[tag]
- long_pct = (long_count / tag_total) * 100
- suggestions_html += f"<li>{tag}: {long_count:,} 条超长日志(占该TAG的 {long_pct:.1f}%)</li>"
- suggestions_html += "</ul></li>"
-
- # 高频TAG建议
- high_freq_tags = [(tag, p) for tag, _, p in stats[:10] if p > 10]
- if high_freq_tags:
- suggestions_html += "<li><strong>高频TAG优化:</strong>以下TAG占比较高,建议检查是否有冗余日志<ul>"
- for tag, percentage in high_freq_tags:
- suggestions_html += f"<li>{tag} ({percentage:.2f}%)</li>"
- suggestions_html += "</ul></li>"
-
- # 低频TAG建议
- low_freq_tags = [tag for tag, count, _ in stats if count < 5]
- if len(low_freq_tags) > 10:
- suggestions_html += f"<li><strong>低频TAG清理:</strong>发现 {len(low_freq_tags)} 个出现次数 < 5 的TAG,建议review并移除不必要的日志</li>"
-
- # TOP占比分析
- if len(stats) >= 20:
- top20_percentage = sum(p for _, _, p in stats[:20])
- suggestions_html += f"<li><strong>集中度分析:</strong>TOP 20 TAG占总日志的 {top20_percentage:.2f}%,"
- if top20_percentage > 80:
- suggestions_html += "日志较为集中,重点优化这些TAG即可获得明显效果</li>"
- else:
- suggestions_html += "日志较为分散,建议统一TAG命名规范</li>"
-
- suggestions_html += "</ul>"
-
- # 填充模板
- html = HTML_TEMPLATE.format(
- timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
- filename=filename,
- total_lines=analyzer.total_lines,
- matched_lines=analyzer.matched_lines,
- unique_tags=len(analyzer.tag_counter),
- long_logs_count=total_long_logs,
- long_logs_percentage=long_logs_percentage,
- long_logs_bars=long_logs_bars,
- top20_bars=top20_bars,
- table_rows=table_rows,
- suggestions=suggestions_html
- )
-
- # 写入文件
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(html)
-
- print(f"\n✅ HTML报告已生成: {output_path}")
- print(f" 用浏览器打开查看: file://{Path(output_path).absolute()}")
- def generate_output_filename(input_file: str) -> str:
- """根据输入文件名生成输出报告文件名"""
- input_path = Path(input_file)
-
- # 获取文件名(不含扩展名)
- base_name = input_path.stem
-
- # 移除常见的日志文件后缀
- # 例如: com.partyjoy.yoki_20250922.xlog.log -> com.partyjoy.yoki_20250922
- for suffix in ['.xlog', '.log', '.txt']:
- if base_name.endswith(suffix):
- base_name = base_name[:-len(suffix)]
-
- # 生成报告文件名: 原文件名_report.html
- output_name = f"{base_name}_report.html"
-
- # 如果输入文件在某个目录下,输出到同一目录
- if input_path.parent != Path('.'):
- output_path = input_path.parent / output_name
- else:
- output_path = Path(output_name)
-
- return str(output_path)
- def main():
- parser = argparse.ArgumentParser(description='生成日志TAG分析的HTML可视化报告')
- parser.add_argument('logfile', help='日志文件路径')
- parser.add_argument('--output', '-o', default=None,
- help='输出HTML文件路径(默认: 根据输入文件名自动生成)')
-
- args = parser.parse_args()
-
- # 检查文件
- if not Path(args.logfile).exists():
- print(f"❌ 错误: 文件不存在: {args.logfile}")
- sys.exit(1)
-
- # 确定输出文件名
- if args.output:
- output_path = args.output
- else:
- output_path = generate_output_filename(args.logfile)
- print(f"📝 自动生成输出文件名: {output_path}")
-
- # 分析日志
- print(f"正在分析日志文件: {args.logfile}")
- analyzer = LogTagAnalyzer()
- analyzer.analyze_file(args.logfile)
-
- # 生成HTML
- generate_html_report(analyzer, args.logfile, output_path)
- if __name__ == "__main__":
- main()
|