#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
生成日志分析HTML可视化报告
功能特性:
- 自动根据输入文件名生成输出报告文件名
- 统计和标记超过400字符的超长日志
- 可视化展示TAG使用情况和超长日志分布
- 提供优化建议
使用方法:
# 自动生成报告文件名(推荐)
python generate_log_report_html.py com.partyjoy.yoki_20250922.xlog.log
# 输出: com.partyjoy.yoki_20250922_report.html
# 指定输出文件名
python generate_log_report_html.py logcat.txt --output custom_report.html
"""
import sys
import argparse
from pathlib import Path
from analyze_log_tags import LogTagAnalyzer
HTML_TEMPLATE = """
日志TAG分析报告
📊 日志TAG分析报告
生成时间: {timestamp} | 文件: {filename}
超长日志数
{long_logs_count:,}
占比 {long_logs_percentage:.2f}%
完整TAG列表
| 排名 |
TAG名称 |
出现次数 |
占比 |
超长日志 |
日志级别分布 |
{table_rows}
"""
def generate_html_report(analyzer: LogTagAnalyzer, filename: str, output_path: str):
"""生成HTML报告"""
from datetime import datetime
stats = analyzer.get_statistics()
# 计算超长日志统计
total_long_logs = sum(analyzer.long_log_counter.values())
long_logs_percentage = (total_long_logs / analyzer.matched_lines * 100) if analyzer.matched_lines > 0 else 0
# 生成超长日志TOP 10柱状图
long_logs_bars = ""
if analyzer.long_log_counter:
long_log_stats = analyzer.long_log_counter.most_common(10)
max_long_count = long_log_stats[0][1] if long_log_stats else 1
for i, (tag, long_count) in enumerate(long_log_stats, 1):
tag_total = analyzer.tag_counter[tag]
long_percentage_in_tag = (long_count / tag_total) * 100
bar_width = (long_count / max_long_count) * 100
long_logs_bars += f"""
{i}. {tag}
{long_count:,} 条 ({long_percentage_in_tag:.1f}% 的该TAG)
"""
else:
long_logs_bars = '未发现超长日志
'
# 生成TOP 20柱状图
top20_bars = ""
for i, (tag, count, percentage) in enumerate(stats[:20], 1):
freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
top20_bars += f"""
{i}. {tag}
{count:,} 次 ({percentage:.2f}%)
"""
# 生成表格行
table_rows = ""
for i, (tag, count, percentage) in enumerate(stats, 1):
freq_class = "high-freq" if percentage > 5 else ("med-freq" if percentage > 1 else "low-freq")
# 超长日志数量
long_count = analyzer.long_log_counter.get(tag, 0)
if long_count > 0:
long_percentage = (long_count / count) * 100
long_info = f'{long_count:,} ({long_percentage:.1f}%)'
else:
long_info = '-'
# 级别分布
level_badges = ""
if tag in analyzer.tag_level_counter:
for level in ['V', 'D', 'I', 'W', 'E']:
if level in analyzer.tag_level_counter[tag]:
level_count = analyzer.tag_level_counter[tag][level]
level_badges += f'{level}:{level_count}'
table_rows += f"""
| {i} |
{tag} |
{count:,} |
{percentage:.2f}% |
{long_info} |
{level_badges} |
"""
# 生成优化建议
suggestions_html = ""
# 超长日志建议
if total_long_logs > 0:
suggestions_html += f"- ⚠️ 超长日志优化:发现 {total_long_logs:,} 条超长日志({long_logs_percentage:.2f}%),建议优化以下TAG:"
suggestions_html += "
"
top_long_tags = analyzer.long_log_counter.most_common(5)
for tag, long_count in top_long_tags:
tag_total = analyzer.tag_counter[tag]
long_pct = (long_count / tag_total) * 100
suggestions_html += f"- {tag}: {long_count:,} 条超长日志(占该TAG的 {long_pct:.1f}%)
"
suggestions_html += "
"
# 高频TAG建议
high_freq_tags = [(tag, p) for tag, _, p in stats[:10] if p > 10]
if high_freq_tags:
suggestions_html += "- 高频TAG优化:以下TAG占比较高,建议检查是否有冗余日志
"
for tag, percentage in high_freq_tags:
suggestions_html += f"- {tag} ({percentage:.2f}%)
"
suggestions_html += "
"
# 低频TAG建议
low_freq_tags = [tag for tag, count, _ in stats if count < 5]
if len(low_freq_tags) > 10:
suggestions_html += f"- 低频TAG清理:发现 {len(low_freq_tags)} 个出现次数 < 5 的TAG,建议review并移除不必要的日志
"
# TOP占比分析
if len(stats) >= 20:
top20_percentage = sum(p for _, _, p in stats[:20])
suggestions_html += f"- 集中度分析:TOP 20 TAG占总日志的 {top20_percentage:.2f}%,"
if top20_percentage > 80:
suggestions_html += "日志较为集中,重点优化这些TAG即可获得明显效果
"
else:
suggestions_html += "日志较为分散,建议统一TAG命名规范"
suggestions_html += "
"
# 填充模板
html = HTML_TEMPLATE.format(
timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
filename=filename,
total_lines=analyzer.total_lines,
matched_lines=analyzer.matched_lines,
unique_tags=len(analyzer.tag_counter),
long_logs_count=total_long_logs,
long_logs_percentage=long_logs_percentage,
long_logs_bars=long_logs_bars,
top20_bars=top20_bars,
table_rows=table_rows,
suggestions=suggestions_html
)
# 写入文件
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html)
print(f"\n✅ HTML报告已生成: {output_path}")
print(f" 用浏览器打开查看: file://{Path(output_path).absolute()}")
def generate_output_filename(input_file: str) -> str:
"""根据输入文件名生成输出报告文件名"""
input_path = Path(input_file)
# 获取文件名(不含扩展名)
base_name = input_path.stem
# 移除常见的日志文件后缀
# 例如: com.partyjoy.yoki_20250922.xlog.log -> com.partyjoy.yoki_20250922
for suffix in ['.xlog', '.log', '.txt']:
if base_name.endswith(suffix):
base_name = base_name[:-len(suffix)]
# 生成报告文件名: 原文件名_report.html
output_name = f"{base_name}_report.html"
# 如果输入文件在某个目录下,输出到同一目录
if input_path.parent != Path('.'):
output_path = input_path.parent / output_name
else:
output_path = Path(output_name)
return str(output_path)
def main():
parser = argparse.ArgumentParser(description='生成日志TAG分析的HTML可视化报告')
parser.add_argument('logfile', help='日志文件路径')
parser.add_argument('--output', '-o', default=None,
help='输出HTML文件路径(默认: 根据输入文件名自动生成)')
args = parser.parse_args()
# 检查文件
if not Path(args.logfile).exists():
print(f"❌ 错误: 文件不存在: {args.logfile}")
sys.exit(1)
# 确定输出文件名
if args.output:
output_path = args.output
else:
output_path = generate_output_filename(args.logfile)
print(f"📝 自动生成输出文件名: {output_path}")
# 分析日志
print(f"正在分析日志文件: {args.logfile}")
analyzer = LogTagAnalyzer()
analyzer.analyze_file(args.logfile)
# 生成HTML
generate_html_report(analyzer, args.logfile, output_path)
if __name__ == "__main__":
main()