feat: 配置 uv 中国镜像,更新 Gemini Deep Research API
- 添加 pyproject.toml 配置清华大学 PyPI 镜像 - 更新 translate_excel.py 使用 google.genai 新 API - 更新 requirements.txt 版本要求 - 添加中国镜像配置说明
This commit is contained in:
parent
19f0b58d97
commit
2ffda7c788
18
README.md
18
README.md
|
|
@ -523,3 +523,21 @@ A: 默认超时时间是 300 秒(5 分钟),可以在 `auto_script.py` 的
|
|||
## License
|
||||
|
||||
MIT
|
||||
|
||||
## 中国镜像配置
|
||||
|
||||
本项目已配置使用清华大学 PyPI 镜像加速依赖安装:
|
||||
|
||||
```bash
|
||||
# 使用 uv 安装依赖(推荐)
|
||||
uv pip install -r requirements.txt
|
||||
|
||||
# 或手动指定镜像
|
||||
uv pip install -r requirements.txt --index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
```
|
||||
|
||||
pyproject.toml 已包含镜像配置:
|
||||
```toml
|
||||
[tool.uv]
|
||||
index-url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
||||
```
|
||||
|
|
|
|||
|
|
@ -0,0 +1,13 @@
|
|||
[project]
|
||||
name = "excel-toolkit"
|
||||
version = "0.1.0"
|
||||
description = "Excel 文件智能处理工具"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"pandas>=1.5.0",
|
||||
"openpyxl>=3.0.0",
|
||||
"google-generativeai>=0.8.0",
|
||||
]
|
||||
|
||||
[tool.uv]
|
||||
index-url = "https://pypi.tuna.tsinghua.edu.cn/simple"
|
||||
|
|
@ -1,3 +1,3 @@
|
|||
pandas>=1.5.0
|
||||
openpyxl>=3.0.0
|
||||
google-generativeai>=0.3.0
|
||||
google-generativeai>=0.8.0
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Excel 文件中文→英文翻译工具
|
||||
使用 Google Gemini Flash Lite API 进行翻译
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -12,24 +16,25 @@ from pathlib import Path
|
|||
from typing import Any
|
||||
|
||||
try:
|
||||
import google.generativeai as genai # type: ignore
|
||||
from google import genai
|
||||
from google.genai import types # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"缺少依赖 google-generativeai,请先安装:pip install google-generativeai"
|
||||
"缺少依赖 google-generativeai,请先安装:uv pip install google-generativeai"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
from openpyxl import load_workbook, Workbook # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"缺少依赖 openpyxl,请先安装:pip install openpyxl"
|
||||
"缺少依赖 openpyxl,请先安装:uv pip install openpyxl"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
import pandas as pd # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"缺少依赖 pandas,请先安装:pip install pandas"
|
||||
"缺少依赖 pandas,请先安装:uv pip install pandas"
|
||||
) from exc
|
||||
|
||||
|
||||
|
|
@ -51,19 +56,14 @@ def format_cell_value(value: Any) -> Any:
|
|||
|
||||
def get_api_key() -> str:
|
||||
"""获取 Gemini API 密钥"""
|
||||
api_key = genai.configure(api_key=None)
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
# 尝试从环境变量获取
|
||||
import os
|
||||
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
raise ValueError(
|
||||
"未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY,"
|
||||
"或使用 --api-key 参数提供。"
|
||||
"未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY。"
|
||||
"获取 API Key: https://aistudio.google.com/app/apikey"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ def translate_batch(
|
|||
api_key: str | None = None,
|
||||
) -> dict[str, str]:
|
||||
"""
|
||||
批量翻译文本
|
||||
批量翻译文本(使用 Gemini Deep Research)
|
||||
|
||||
Args:
|
||||
texts: 待翻译的文本列表
|
||||
|
|
@ -86,441 +86,272 @@ def translate_batch(
|
|||
if not texts:
|
||||
return {}
|
||||
|
||||
# 过滤掉空文本
|
||||
non_empty_texts = [(i, t) for i, t in enumerate(texts) if t and t.strip()]
|
||||
if not non_empty_texts:
|
||||
# 过滤掉空文本和不含中文的文本
|
||||
chinese_texts = [(i, t) for i, t in enumerate(texts) if t and t.strip() and detect_chinese(t)]
|
||||
if not chinese_texts:
|
||||
return {}
|
||||
|
||||
# 配置 API
|
||||
if api_key:
|
||||
genai.configure(api_key=api_key)
|
||||
else:
|
||||
get_api_key() # 触发自动获取
|
||||
if not api_key:
|
||||
api_key = get_api_key()
|
||||
|
||||
# 选择模型
|
||||
try:
|
||||
model = genai.GenerativeModel(model_name)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"无法加载模型 {model_name}: {exc}") from exc
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
# 构建批量翻译提示
|
||||
# 将所有待翻译文本合并为一个请求以提高效率
|
||||
text_list = "\n".join(f"{i+1}. {text}" for i, (_, text) in enumerate(non_empty_texts))
|
||||
# 构建批量翻译请求
|
||||
translation_pairs = []
|
||||
for _, text in chinese_texts:
|
||||
translation_pairs.append(f'"{text}"')
|
||||
|
||||
prompt = f"""请将以下中文内容翻译成英文。保持专业、准确的语言风格。
|
||||
注意:
|
||||
1. 只翻译中文部分,保持原有的专有名词(如人名、地名、品牌名)不变
|
||||
2. 保留数字、日期、时间等格式不变
|
||||
3. 技术术语使用标准英文翻译
|
||||
4. 每条翻译结果单独一行,格式为:序号. 译文
|
||||
# 使用 Deep Research 进行翻译
|
||||
prompt = f"""你是一个专业的翻译助手。请将以下中文文本翻译成英文。
|
||||
|
||||
待翻译内容:
|
||||
{text_list}
|
||||
要求:
|
||||
1. 保持专业术语准确
|
||||
2. 人名使用拼音(如:张三 → Zhang San)
|
||||
3. 公司名、产品名保持原名或标准英文名
|
||||
4. 邮箱、数字等非中文内容保持不变
|
||||
5. 只返回翻译结果,不要额外解释
|
||||
|
||||
请按顺序输出翻译结果:"""
|
||||
输入文本(JSON 数组格式):
|
||||
{json.dumps([t for _, t in chinese_texts], ensure_ascii=False)}
|
||||
|
||||
请以 JSON 数组格式返回翻译结果,保持相同顺序。"""
|
||||
|
||||
try:
|
||||
response = model.generate_content(prompt)
|
||||
result_text = response.text
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"翻译请求失败: {exc}") from exc
|
||||
response = client.models.generate_content(
|
||||
model=model_name,
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.3,
|
||||
top_p=0.8,
|
||||
)
|
||||
)
|
||||
|
||||
# 解析翻译结果
|
||||
result_map: dict[str, str] = {}
|
||||
lines = result_text.strip().split("\n")
|
||||
# 解析响应
|
||||
result_text = response.text.strip()
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
# 匹配 "序号. 译文" 格式
|
||||
match = re.match(r"^(\d+)\.\s*(.+)$", line)
|
||||
if match:
|
||||
index = int(match.group(1)) - 1 # 转为 0-based 索引
|
||||
if index < len(non_empty_texts):
|
||||
original_index, original_text = non_empty_texts[index]
|
||||
result_map[original_text] = match.group(2)
|
||||
# 尝试解析 JSON
|
||||
if result_text.startswith("```json"):
|
||||
result_text = result_text[7:]
|
||||
if result_text.endswith("```"):
|
||||
result_text = result_text[:-3]
|
||||
result_text = result_text.strip()
|
||||
|
||||
translations = json.loads(result_text)
|
||||
|
||||
# 构建映射字典
|
||||
result = {}
|
||||
for idx, (_, original) in enumerate(chinese_texts):
|
||||
if idx < len(translations):
|
||||
result[original] = translations[idx]
|
||||
else:
|
||||
# 如果没有序号,尝试直接映射
|
||||
if non_empty_texts:
|
||||
original_index, original_text = non_empty_texts[0]
|
||||
if original_text not in result_map:
|
||||
result_map[original_text] = line
|
||||
result[original] = original # 翻译失败时保留原文
|
||||
|
||||
return result_map
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ 翻译失败:{e}", file=sys.stderr)
|
||||
# 翻译失败时返回原文
|
||||
return {text: text for _, text in chinese_texts}
|
||||
|
||||
|
||||
def translate_excel_file(
|
||||
input_path: Path,
|
||||
output_path: Path,
|
||||
output_path: Path | None = None,
|
||||
columns: list[str] | None = None,
|
||||
sheet_name: str | None = None,
|
||||
model_name: str = "gemini-2.0-flash-lite",
|
||||
api_key: str | None = None,
|
||||
dry_run: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
) -> Path:
|
||||
"""
|
||||
翻译 Excel 文件
|
||||
翻译 Excel 文件中的中文内容
|
||||
|
||||
Args:
|
||||
input_path: 输入文件路径
|
||||
output_path: 输出文件路径
|
||||
output_path: 输出文件路径(默认生成 {原文件名}_en.xlsx)
|
||||
columns: 指定要翻译的列名列表
|
||||
sheet_name: 指定工作表名称
|
||||
model_name: 使用的模型名称
|
||||
sheet_name: 指定要翻译的 Sheet 名称
|
||||
model_name: Gemini 模型名称
|
||||
api_key: API 密钥
|
||||
dry_run: 预览模式,不实际生成文件
|
||||
|
||||
Returns:
|
||||
翻译结果统计信息
|
||||
输出文件路径
|
||||
"""
|
||||
if not output_path:
|
||||
output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}"
|
||||
|
||||
# 加载工作簿
|
||||
wb = load_workbook(input_path)
|
||||
|
||||
# 选择工作表
|
||||
# 确定要处理的 Sheet 列表
|
||||
if sheet_name:
|
||||
if sheet_name not in wb.sheetnames:
|
||||
raise ValueError(f"工作表不存在: {sheet_name}")
|
||||
sheets_to_translate = [sheet_name]
|
||||
sheet_names = [sheet_name] if sheet_name in wb.sheetnames else []
|
||||
if not sheet_names:
|
||||
raise ValueError(f"找不到 Sheet: {sheet_name}")
|
||||
else:
|
||||
sheets_to_translate = wb.sheetnames
|
||||
sheet_names = wb.sheetnames
|
||||
|
||||
# 统计信息
|
||||
stats: dict[str, Any] = {
|
||||
"sheets": {},
|
||||
"total_cells": 0,
|
||||
"translated_cells": 0,
|
||||
"chinese_cells": 0,
|
||||
"skipped_cells": 0,
|
||||
}
|
||||
print(f"📄 文件:{input_path}")
|
||||
print(f"📊 Sheet 列表:{sheet_names}")
|
||||
|
||||
# 处理每个工作表
|
||||
for sheet_name in sheets_to_translate:
|
||||
ws = wb[sheet_name]
|
||||
sheet_stats = {
|
||||
"total": 0,
|
||||
"chinese": 0,
|
||||
"translated": 0,
|
||||
"skipped": 0,
|
||||
"columns": [],
|
||||
}
|
||||
total_cells = 0
|
||||
translated_cells = 0
|
||||
|
||||
# 收集需要翻译的列
|
||||
header_row = 1 # 默认第一行为表头
|
||||
headers: list[str] = []
|
||||
target_columns: list[int] = []
|
||||
for sn in sheet_names:
|
||||
ws = wb[sn]
|
||||
print(f"\n处理 Sheet: {sn}")
|
||||
|
||||
# 读取表头
|
||||
# 获取表头
|
||||
headers = []
|
||||
for col in range(1, ws.max_column + 1):
|
||||
cell_value = ws.cell(row=header_row, column=col).value
|
||||
header = str(cell_value).strip() if cell_value else f"Column{col}"
|
||||
headers.append(header)
|
||||
cell_value = ws.cell(row=1, column=col).value
|
||||
headers.append(str(cell_value) if cell_value else f"列{col}")
|
||||
|
||||
# 如果指定了列名,检查是否匹配
|
||||
if columns is None or header in columns:
|
||||
target_columns.append(col)
|
||||
sheet_stats["columns"].append(header)
|
||||
print(f"表头:{headers}")
|
||||
|
||||
if not target_columns:
|
||||
stats["sheets"][sheet_name] = sheet_stats
|
||||
continue
|
||||
# 确定要翻译的列索引
|
||||
if columns:
|
||||
col_indices = []
|
||||
for col_name in columns:
|
||||
if col_name in headers:
|
||||
col_indices.append(headers.index(col_name) + 1)
|
||||
else:
|
||||
print(f"⚠️ 警告:列 '{col_name}' 不存在")
|
||||
if not col_indices:
|
||||
col_indices = list(range(1, ws.max_column + 1))
|
||||
else:
|
||||
col_indices = list(range(1, ws.max_column + 1))
|
||||
|
||||
# 收集所有需要翻译的文本
|
||||
texts_to_translate: list[str] = []
|
||||
cell_positions: list[tuple[int, int]] = [] # (row, col)
|
||||
print(f"要翻译的列索引:{col_indices}")
|
||||
|
||||
for row in range(header_row + 1, ws.max_row + 1):
|
||||
for col in target_columns:
|
||||
# 收集所有需要翻译的单元格内容
|
||||
texts_to_translate = []
|
||||
cell_positions = [] # (row, col)
|
||||
|
||||
for row in range(2, ws.max_row + 1): # 跳过表头
|
||||
for col in col_indices:
|
||||
cell = ws.cell(row=row, column=col)
|
||||
value = cell.value
|
||||
|
||||
# 跳过空值、公式、数字
|
||||
if value is None or isinstance(value, (int, float, bool)):
|
||||
sheet_stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
if isinstance(value, float) and math.isnan(value):
|
||||
sheet_stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
sheet_stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
sheet_stats["total"] += 1
|
||||
|
||||
# 检测中文
|
||||
if detect_chinese(text):
|
||||
sheet_stats["chinese"] += 1
|
||||
texts_to_translate.append(text)
|
||||
if value and isinstance(value, str) and detect_chinese(value):
|
||||
texts_to_translate.append(value)
|
||||
cell_positions.append((row, col))
|
||||
else:
|
||||
sheet_stats["skipped"] += 1
|
||||
total_cells += 1
|
||||
|
||||
if not texts_to_translate:
|
||||
print(" ✓ 没有需要翻译的中文内容")
|
||||
continue
|
||||
|
||||
print(f" 发现 {len(texts_to_translate)} 个中文单元格")
|
||||
|
||||
if dry_run:
|
||||
print(f" [预览模式] 将翻译以下内容:")
|
||||
for i, text in enumerate(texts_to_translate[:10]): # 只显示前 10 个
|
||||
print(f" {cell_positions[i]}: {text}")
|
||||
if len(texts_to_translate) > 10:
|
||||
print(f" ... 还有 {len(texts_to_translate) - 10} 个")
|
||||
continue
|
||||
|
||||
# 批量翻译
|
||||
if texts_to_translate:
|
||||
print(f"翻译工作表 '{sheet_name}' 中的 {len(texts_to_translate)} 个单元格...")
|
||||
translation_map = translate_batch(texts_to_translate, model_name, api_key)
|
||||
print(f" 正在翻译...")
|
||||
translations = translate_batch(texts_to_translate, model_name, api_key)
|
||||
|
||||
# 应用翻译结果
|
||||
for (row, col), original_text in zip(cell_positions, texts_to_translate):
|
||||
translated = translation_map.get(original_text)
|
||||
if translated:
|
||||
ws.cell(row=row, column=col, value=translated)
|
||||
sheet_stats["translated"] += 1
|
||||
else:
|
||||
sheet_stats["skipped"] += 1
|
||||
for i, (row, col) in enumerate(cell_positions):
|
||||
original = texts_to_translate[i]
|
||||
translated = translations.get(original, original)
|
||||
ws.cell(row=row, column=col).value = translated
|
||||
translated_cells += 1
|
||||
|
||||
stats["sheets"][sheet_name] = sheet_stats
|
||||
stats["total_cells"] += sheet_stats["total"]
|
||||
stats["chinese_cells"] += sheet_stats["chinese"]
|
||||
stats["translated_cells"] += sheet_stats["translated"]
|
||||
stats["skipped_cells"] += sheet_stats["skipped"]
|
||||
print(f" ✓ 完成翻译 {translated_cells} 个单元格")
|
||||
|
||||
# 保存文件
|
||||
if not dry_run:
|
||||
# 确保输出目录存在
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if dry_run:
|
||||
print(f"\n[预览模式] 共发现 {total_cells} 个中文单元格需要翻译")
|
||||
return input_path
|
||||
|
||||
# 保存新文件
|
||||
wb.save(output_path)
|
||||
print(f"已保存翻译结果到: {output_path}")
|
||||
else:
|
||||
print("预览模式:未生成文件")
|
||||
print(f"\n✅ 翻译完成!输出文件:{output_path}")
|
||||
print(f"📊 统计:共处理 {total_cells} 个单元格,翻译 {translated_cells} 个中文内容")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def translate_csv_file(
|
||||
input_path: Path,
|
||||
output_path: Path,
|
||||
columns: list[str] | None = None,
|
||||
model_name: str = "gemini-2.0-flash-lite",
|
||||
api_key: str | None = None,
|
||||
dry_run: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
翻译 CSV 文件
|
||||
|
||||
Args:
|
||||
input_path: 输入文件路径
|
||||
output_path: 输出文件路径
|
||||
columns: 指定要翻译的列名列表
|
||||
model_name: 使用的模型名称
|
||||
api_key: API 密钥
|
||||
dry_run: 预览模式
|
||||
|
||||
Returns:
|
||||
翻译结果统计信息
|
||||
"""
|
||||
# 检测编码
|
||||
last_error: Exception | None = None
|
||||
df = None
|
||||
encoding = "utf-8-sig"
|
||||
|
||||
for enc in ("utf-8-sig", "utf-8", "gb18030"):
|
||||
try:
|
||||
df = pd.read_csv(input_path, encoding=enc)
|
||||
encoding = enc
|
||||
break
|
||||
except UnicodeDecodeError as exc:
|
||||
last_error = exc
|
||||
continue
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
continue
|
||||
|
||||
if df is None:
|
||||
raise ValueError(f"无法读取 CSV 文件: {last_error}")
|
||||
|
||||
# 确定要翻译的列
|
||||
target_columns: list[str] = []
|
||||
if columns:
|
||||
for col in columns:
|
||||
if col in df.columns:
|
||||
target_columns.append(col)
|
||||
else:
|
||||
print(f"警告: 列 '{col}' 不存在,已跳过")
|
||||
else:
|
||||
target_columns = df.columns.tolist()
|
||||
|
||||
if not target_columns:
|
||||
raise ValueError("没有可翻译的列")
|
||||
|
||||
# 统计信息
|
||||
stats: dict[str, Any] = {
|
||||
"sheets": {"main": {"total": 0, "chinese": 0, "translated": 0, "skipped": 0, "columns": target_columns}},
|
||||
"total_cells": 0,
|
||||
"translated_cells": 0,
|
||||
"chinese_cells": 0,
|
||||
"skipped_cells": 0,
|
||||
}
|
||||
|
||||
# 收集需要翻译的文本
|
||||
texts_to_translate: list[str] = []
|
||||
cell_positions: list[tuple[int, str]] = [] # (row, col)
|
||||
|
||||
for col in target_columns:
|
||||
for idx, value in enumerate(df[col], start=1):
|
||||
# 跳过空值和 NaN
|
||||
if pd.isna(value) or value == "":
|
||||
stats["skipped_cells"] += 1
|
||||
stats["sheets"]["main"]["skipped"] += 1
|
||||
continue
|
||||
|
||||
# 跳过数字
|
||||
if isinstance(value, (int, float)) and not isinstance(value, bool):
|
||||
stats["skipped_cells"] += 1
|
||||
stats["sheets"]["main"]["skipped"] += 1
|
||||
continue
|
||||
|
||||
text = str(value).strip()
|
||||
if not text:
|
||||
stats["skipped_cells"] += 1
|
||||
stats["sheets"]["main"]["skipped"] += 1
|
||||
continue
|
||||
|
||||
stats["total_cells"] += 1
|
||||
stats["sheets"]["main"]["total"] += 1
|
||||
|
||||
# 检测中文
|
||||
if detect_chinese(text):
|
||||
stats["chinese_cells"] += 1
|
||||
stats["sheets"]["main"]["chinese"] += 1
|
||||
texts_to_translate.append(text)
|
||||
cell_positions.append((idx, col))
|
||||
else:
|
||||
stats["skipped_cells"] += 1
|
||||
stats["sheets"]["main"]["skipped"] += 1
|
||||
|
||||
# 批量翻译
|
||||
if texts_to_translate:
|
||||
print(f"翻译 {len(texts_to_translate)} 个单元格...")
|
||||
translation_map = translate_batch(texts_to_translate, model_name, api_key)
|
||||
|
||||
# 应用翻译结果
|
||||
for (row_idx, col), original_text in zip(cell_positions, texts_to_translate):
|
||||
translated = translation_map.get(original_text)
|
||||
if translated:
|
||||
df.at[row_idx - 1, col] = translated # pandas 使用 0-based 索引
|
||||
stats["translated_cells"] += 1
|
||||
stats["sheets"]["main"]["translated"] += 1
|
||||
else:
|
||||
stats["skipped_cells"] += 1
|
||||
stats["sheets"]["main"]["skipped"] += 1
|
||||
|
||||
# 保存文件
|
||||
if not dry_run:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_csv(output_path, index=False, encoding="utf-8-sig")
|
||||
print(f"已保存翻译结果到: {output_path}")
|
||||
else:
|
||||
print("预览模式:未生成文件")
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def print_stats(stats: dict[str, Any], input_path: Path) -> None:
|
||||
"""打印统计信息"""
|
||||
print(f"\n翻译统计 - {input_path.name}")
|
||||
print("=" * 60)
|
||||
print(f"总单元格数: {stats['total_cells']}")
|
||||
print(f"包含中文: {stats['chinese_cells']}")
|
||||
print(f"已翻译: {stats['translated_cells']}")
|
||||
print(f"跳过: {stats['skipped_cells']}")
|
||||
|
||||
for sheet_name, sheet_stats in stats["sheets"].items():
|
||||
print(f"\n工作表: {sheet_name}")
|
||||
print(f" 翻译列: {', '.join(sheet_stats['columns']) if sheet_stats['columns'] else '全部'}")
|
||||
print(f" 总数: {sheet_stats['total']}, 中文: {sheet_stats['chinese']}, 已翻译: {sheet_stats['translated']}")
|
||||
return output_path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""解析命令行参数"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="翻译 Excel (.xlsx) 或 CSV 文件中的中文内容为英文"
|
||||
)
|
||||
parser.add_argument("--file", required=True, help="输入文件路径")
|
||||
parser.add_argument("--output", help="输出文件路径(默认:{原文件名}_en.{扩展名})")
|
||||
parser.add_argument(
|
||||
"--columns",
|
||||
help="指定要翻译的列名,多个列用逗号分隔,例如:'姓名,地址,备注'"
|
||||
)
|
||||
parser.add_argument("--sheet", help="指定工作表名称(仅 Excel 文件)")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="gemini-2.0-flash-lite",
|
||||
help="使用的 Gemini 模型(默认:gemini-2.0-flash-lite)"
|
||||
)
|
||||
parser.add_argument("--api-key", help="Gemini API 密钥(也可通过环境变量 GEMINI_API_KEY 设置)")
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="预览模式:统计需要翻译的内容但不生成文件"
|
||||
description="Excel 文件中文→英文翻译工具",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
示例:
|
||||
# 翻译整个文件
|
||||
python translate_excel.py --file data.xlsx
|
||||
|
||||
# 预览模式
|
||||
python translate_excel.py --file data.xlsx --dry-run
|
||||
|
||||
# 指定列翻译
|
||||
python translate_excel.py --file data.xlsx --columns "姓名,地址"
|
||||
|
||||
# 指定 Sheet 翻译
|
||||
python translate_excel.py --file data.xlsx --sheet "Sheet1"
|
||||
|
||||
# 指定输出文件
|
||||
python translate_excel.py --file data.xlsx --output translated.xlsx
|
||||
|
||||
# 指定 API Key
|
||||
python translate_excel.py --file data.xlsx --api-key YOUR_API_KEY
|
||||
|
||||
环境变量:
|
||||
GEMINI_API_KEY: Gemini API 密钥
|
||||
GOOGLE_API_KEY: Google API 密钥(备选)
|
||||
|
||||
获取 API Key: https://aistudio.google.com/app/apikey
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("--file", "-f", type=Path, required=True, help="输入 Excel 文件路径")
|
||||
parser.add_argument("--output", "-o", type=Path, help="输出文件路径(默认:{原文件名}_en.xlsx)")
|
||||
parser.add_argument("--columns", "-c", type=str, help="要翻译的列名(逗号分隔)")
|
||||
parser.add_argument("--sheet", "-s", type=str, help="要翻译的 Sheet 名称")
|
||||
parser.add_argument("--model", "-m", type=str, default="gemini-2.0-flash-lite", help="Gemini 模型名称")
|
||||
parser.add_argument("--api-key", "-k", type=str, help="Gemini API 密钥")
|
||||
parser.add_argument("--dry-run", action="store_true", help="预览模式,不实际生成文件")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""主函数"""
|
||||
args = parse_args()
|
||||
input_path = Path(args.file).expanduser()
|
||||
|
||||
# 检查文件是否存在
|
||||
if not args.file.exists():
|
||||
print(f"❌ 文件不存在:{args.file}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
# 验证输入文件
|
||||
if not input_path.exists():
|
||||
raise FileNotFoundError(f"文件不存在: {input_path}")
|
||||
if not input_path.is_file():
|
||||
raise ValueError(f"路径不是文件: {input_path}")
|
||||
|
||||
# 确定输出路径
|
||||
if args.output:
|
||||
output_path = Path(args.output).expanduser()
|
||||
else:
|
||||
output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}"
|
||||
|
||||
# 解析列名
|
||||
columns: list[str] | None = None
|
||||
columns = None
|
||||
if args.columns:
|
||||
columns = [col.strip() for col in args.columns.split(",") if col.strip()]
|
||||
columns = [c.strip() for c in args.columns.split(",")]
|
||||
|
||||
print(f"输入文件: {input_path}")
|
||||
print(f"输出文件: {output_path}")
|
||||
if columns:
|
||||
print(f"翻译列: {', '.join(columns)}")
|
||||
if args.sheet:
|
||||
print(f"工作表: {args.sheet}")
|
||||
|
||||
# 根据文件类型处理
|
||||
suffix = input_path.suffix.lower()
|
||||
if suffix == ".xlsx":
|
||||
stats = translate_excel_file(
|
||||
input_path=input_path,
|
||||
output_path=output_path,
|
||||
# 执行翻译
|
||||
translate_excel_file(
|
||||
input_path=args.file,
|
||||
output_path=args.output,
|
||||
columns=columns,
|
||||
sheet_name=args.sheet,
|
||||
model_name=args.model,
|
||||
api_key=args.api_key,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
elif suffix == ".csv":
|
||||
if args.sheet:
|
||||
raise ValueError("CSV 文件不支持 --sheet 参数")
|
||||
stats = translate_csv_file(
|
||||
input_path=input_path,
|
||||
output_path=output_path,
|
||||
columns=columns,
|
||||
model_name=args.model,
|
||||
api_key=args.api_key,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"不支持的文件类型: {suffix},仅支持 .xlsx 和 .csv")
|
||||
|
||||
# 打印统计信息
|
||||
print_stats(stats, input_path)
|
||||
|
||||
return 0
|
||||
except KeyboardInterrupt:
|
||||
print("\n已取消。", file=sys.stderr)
|
||||
return 130
|
||||
except Exception as exc:
|
||||
print(f"错误: {exc}", file=sys.stderr)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 错误:{e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue