81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
列合并/拆分模板
|
|||
|
|
将多列合并为一列,或将一列拆分为多列
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import pandas as pd
|
|||
|
|
import sys
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
# 参数配置
|
|||
|
|
file_path = "{file}"
|
|||
|
|
output_path = "{output}"
|
|||
|
|
operation = "{operation}" # 操作类型: merge/split
|
|||
|
|
|
|||
|
|
# 合并参数
|
|||
|
|
merge_columns = "{merge_columns}" # 要合并的列(逗号分隔)
|
|||
|
|
merge_separator = "{merge_separator}" # 分隔符
|
|||
|
|
result_column = "{result_column}" # 结果列名
|
|||
|
|
|
|||
|
|
# 拆分参数
|
|||
|
|
split_column = "{split_column}" # 要拆分的列
|
|||
|
|
split_separator = "{split_separator}" # 分隔符
|
|||
|
|
new_columns = "{new_columns}" # 新列名(逗号分隔)
|
|||
|
|
|
|||
|
|
# 读取文件
|
|||
|
|
df = pd.read_excel(file_path)
|
|||
|
|
|
|||
|
|
print(f"原始数据: {{len(df)}} 行, {{len(df.columns)}} 列")
|
|||
|
|
|
|||
|
|
if operation == "merge":
|
|||
|
|
# 合并列
|
|||
|
|
columns = [col.strip() for col in merge_columns.split(",")]
|
|||
|
|
|
|||
|
|
# 检查列是否存在
|
|||
|
|
missing_cols = [col for col in columns if col not in df.columns]
|
|||
|
|
if missing_cols:
|
|||
|
|
print(f"错误: 列不存在: {{missing_cols}}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
# 合并列
|
|||
|
|
df[result_column] = df[columns].astype(str).agg(merge_separator.join, axis=1)
|
|||
|
|
|
|||
|
|
print(f"合并列: {{columns}}")
|
|||
|
|
print(f"分隔符: '{{merge_separator}}'")
|
|||
|
|
print(f"结果列: {{result_column}}")
|
|||
|
|
|
|||
|
|
elif operation == "split":
|
|||
|
|
# 拆分列
|
|||
|
|
if split_column not in df.columns:
|
|||
|
|
print(f"错误: 列 '{split_column}' 不存在")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
# 拆分列
|
|||
|
|
split_df = df[split_column].str.split(split_separator, expand=True)
|
|||
|
|
|
|||
|
|
# 设置新列名
|
|||
|
|
new_col_names = [col.strip() for col in new_columns.split(",")]
|
|||
|
|
for i, name in enumerate(new_col_names):
|
|||
|
|
if i < split_df.shape[1]:
|
|||
|
|
df[name] = split_df[i]
|
|||
|
|
|
|||
|
|
print(f"拆分列: {{split_column}}")
|
|||
|
|
print(f"分隔符: '{{split_separator}}'")
|
|||
|
|
print(f"新列: {{new_col_names}}")
|
|||
|
|
|
|||
|
|
# 保存结果
|
|||
|
|
df.to_excel(output_path, index=False)
|
|||
|
|
|
|||
|
|
print(f"\n操作完成")
|
|||
|
|
print(f"输出文件: {{output_path}}")
|
|||
|
|
print(f"最终数据: {{len(df)}} 行, {{len(df.columns)}} 列")
|
|||
|
|
|
|||
|
|
# 显示前几行
|
|||
|
|
print(f"\n预览:")
|
|||
|
|
print(df.head(5).to_string(index=False))
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|