excel-toolkit/script_templates/pivot_summary.py

73 lines
2.3 KiB
Python

#!/usr/bin/env python3
"""
数据透视汇总模板
按照指定列进行分组聚合统计
"""
import pandas as pd
import sys
from pathlib import Path
def main():
# 参数配置
file_path = "{file}"
output_path = "{output}"
group_by = "{group_by}" # 分组列(逗号分隔)
agg_column = "{agg_column}" # 要聚合的列
agg_func = "{agg_func}" # 聚合函数: sum/mean/count/max/min
# 读取文件
df = pd.read_excel(file_path)
# 解析分组列
group_columns = [col.strip() for col in group_by.split(",")]
# 检查列是否存在
missing_cols = [col for col in group_columns if col not in df.columns]
if missing_cols:
print(f"错误: 分组列不存在: {{missing_cols}}")
print(f"可用列: {{list(df.columns)}}")
sys.exit(1)
if agg_column and agg_column not in df.columns:
print(f"错误: 聚合列 '{agg_column}' 不存在")
print(f"可用列: {{list(df.columns)}}")
sys.exit(1)
# 执行透视
if agg_column:
if agg_func == "sum":
pivot = df.groupby(group_columns)[agg_column].sum().reset_index()
elif agg_func == "mean":
pivot = df.groupby(group_columns)[agg_column].mean().reset_index()
elif agg_func == "count":
pivot = df.groupby(group_columns)[agg_column].count().reset_index()
elif agg_func == "max":
pivot = df.groupby(group_columns)[agg_column].max().reset_index()
elif agg_func == "min":
pivot = df.groupby(group_columns)[agg_column].min().reset_index()
else:
pivot = df.groupby(group_columns)[agg_column].sum().reset_index()
else:
# 不指定聚合列,只计数
pivot = df.groupby(group_columns).size().reset_index(name="count")
# 保存结果
pivot.to_excel(output_path, index=False)
print(f"数据透视完成")
print(f"文件: {{file_path}}")
print(f"分组列: {{group_columns}}")
if agg_column:
print(f"聚合列: {{agg_column}}")
print(f"聚合函数: {{agg_func}}")
print(f"输出: {{output_path}}")
print(f"\n汇总行数: {{len(pivot)}}")
# 显示前几行
print("\n预览:")
print(pivot.head(10).to_string(index=False))
if __name__ == "__main__":
main()