excel-toolkit/script_templates/merge_columns.py

81 lines
2.4 KiB
Python

#!/usr/bin/env python3
"""
列合并/拆分模板
将多列合并为一列,或将一列拆分为多列
"""
import pandas as pd
import sys
from pathlib import Path
def main():
# 参数配置
file_path = "{file}"
output_path = "{output}"
operation = "{operation}" # 操作类型: merge/split
# 合并参数
merge_columns = "{merge_columns}" # 要合并的列(逗号分隔)
merge_separator = "{merge_separator}" # 分隔符
result_column = "{result_column}" # 结果列名
# 拆分参数
split_column = "{split_column}" # 要拆分的列
split_separator = "{split_separator}" # 分隔符
new_columns = "{new_columns}" # 新列名(逗号分隔)
# 读取文件
df = pd.read_excel(file_path)
print(f"原始数据: {{len(df)}} 行, {{len(df.columns)}}")
if operation == "merge":
# 合并列
columns = [col.strip() for col in merge_columns.split(",")]
# 检查列是否存在
missing_cols = [col for col in columns if col not in df.columns]
if missing_cols:
print(f"错误: 列不存在: {{missing_cols}}")
sys.exit(1)
# 合并列
df[result_column] = df[columns].astype(str).agg(merge_separator.join, axis=1)
print(f"合并列: {{columns}}")
print(f"分隔符: '{{merge_separator}}'")
print(f"结果列: {{result_column}}")
elif operation == "split":
# 拆分列
if split_column not in df.columns:
print(f"错误: 列 '{split_column}' 不存在")
sys.exit(1)
# 拆分列
split_df = df[split_column].str.split(split_separator, expand=True)
# 设置新列名
new_col_names = [col.strip() for col in new_columns.split(",")]
for i, name in enumerate(new_col_names):
if i < split_df.shape[1]:
df[name] = split_df[i]
print(f"拆分列: {{split_column}}")
print(f"分隔符: '{{split_separator}}'")
print(f"新列: {{new_col_names}}")
# 保存结果
df.to_excel(output_path, index=False)
print(f"\n操作完成")
print(f"输出文件: {{output_path}}")
print(f"最终数据: {{len(df)}} 行, {{len(df.columns)}}")
# 显示前几行
print(f"\n预览:")
print(df.head(5).to_string(index=False))
if __name__ == "__main__":
main()