#!/usr/bin/env python3 """ 数据透视汇总模板 按照指定列进行分组聚合统计 """ import pandas as pd import sys from pathlib import Path def main(): # 参数配置 file_path = "{file}" output_path = "{output}" group_by = "{group_by}" # 分组列(逗号分隔) agg_column = "{agg_column}" # 要聚合的列 agg_func = "{agg_func}" # 聚合函数: sum/mean/count/max/min # 读取文件 df = pd.read_excel(file_path) # 解析分组列 group_columns = [col.strip() for col in group_by.split(",")] # 检查列是否存在 missing_cols = [col for col in group_columns if col not in df.columns] if missing_cols: print(f"错误: 分组列不存在: {{missing_cols}}") print(f"可用列: {{list(df.columns)}}") sys.exit(1) if agg_column and agg_column not in df.columns: print(f"错误: 聚合列 '{agg_column}' 不存在") print(f"可用列: {{list(df.columns)}}") sys.exit(1) # 执行透视 if agg_column: if agg_func == "sum": pivot = df.groupby(group_columns)[agg_column].sum().reset_index() elif agg_func == "mean": pivot = df.groupby(group_columns)[agg_column].mean().reset_index() elif agg_func == "count": pivot = df.groupby(group_columns)[agg_column].count().reset_index() elif agg_func == "max": pivot = df.groupby(group_columns)[agg_column].max().reset_index() elif agg_func == "min": pivot = df.groupby(group_columns)[agg_column].min().reset_index() else: pivot = df.groupby(group_columns)[agg_column].sum().reset_index() else: # 不指定聚合列,只计数 pivot = df.groupby(group_columns).size().reset_index(name="count") # 保存结果 pivot.to_excel(output_path, index=False) print(f"数据透视完成") print(f"文件: {{file_path}}") print(f"分组列: {{group_columns}}") if agg_column: print(f"聚合列: {{agg_column}}") print(f"聚合函数: {{agg_func}}") print(f"输出: {{output_path}}") print(f"\n汇总行数: {{len(pivot)}}") # 显示前几行 print("\n预览:") print(pivot.head(10).to_string(index=False)) if __name__ == "__main__": main()