403 lines
12 KiB
Python
Executable File
403 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Excel 文件中文→英文翻译工具
|
||
使用 Google Gemini Flash Lite API 进行翻译
|
||
|
||
流程:
|
||
1. 提取:收集所有中文内容及其位置映射
|
||
2. 翻译:批量翻译所有中文内容
|
||
3. 应用:将翻译结果写入新 Excel 文件
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import sys
|
||
from dataclasses import dataclass, field
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
import sys
|
||
sys.path.insert(0, str(Path(__file__).parent))
|
||
from load_env import load_env
|
||
load_env() # 加载 .env 文件
|
||
try:
|
||
import google.generativeai as genai # type: ignore
|
||
except ImportError as exc:
|
||
raise RuntimeError(
|
||
"缺少依赖 google-generativeai,请先安装:uv pip install google-generativeai"
|
||
) from exc
|
||
|
||
try:
|
||
from openpyxl import load_workbook, Workbook # type: ignore
|
||
except ImportError as exc:
|
||
raise RuntimeError(
|
||
"缺少依赖 openpyxl,请先安装:uv pip install openpyxl"
|
||
) from exc
|
||
|
||
try:
|
||
import pandas as pd # type: ignore
|
||
except ImportError as exc:
|
||
raise RuntimeError(
|
||
"缺少依赖 pandas,请先安装:uv pip install pandas"
|
||
) from exc
|
||
|
||
|
||
@dataclass
|
||
class CellPosition:
|
||
"""单元格位置信息"""
|
||
sheet: str
|
||
row: int
|
||
col: int
|
||
col_letter: str = field(init=False)
|
||
|
||
def __post_init__(self):
|
||
self.col_letter = self._index_to_letter(self.col)
|
||
|
||
@staticmethod
|
||
def _index_to_letter(col: int) -> str:
|
||
result = ""
|
||
while col > 0:
|
||
col -= 1
|
||
result = chr(65 + (col % 26)) + result
|
||
col //= 26
|
||
return result
|
||
|
||
@property
|
||
def cell_ref(self) -> str:
|
||
return f"{self.col_letter}{self.row}"
|
||
|
||
@property
|
||
def full_ref(self) -> str:
|
||
return f"{self.sheet}!{self.cell_ref}"
|
||
|
||
|
||
@dataclass
|
||
class TranslationEntry:
|
||
"""翻译条目"""
|
||
position: CellPosition
|
||
original: str
|
||
translated: str = ""
|
||
|
||
|
||
def detect_chinese(text: str) -> bool:
|
||
if not text or not isinstance(text, str):
|
||
return False
|
||
return bool(re.search(r"[\u4e00-\u9fff]", text))
|
||
|
||
|
||
def get_api_key() -> str:
|
||
import os
|
||
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
|
||
if api_key:
|
||
return api_key
|
||
raise ValueError(
|
||
"未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY。"
|
||
"获取 API Key: https://aistudio.google.com/app/apikey"
|
||
)
|
||
|
||
|
||
def get_model_name() -> str:
|
||
import os
|
||
return os.getenv("GEMINI_MODEL", "gemini-2.0-flash-lite")
|
||
|
||
|
||
def extract_chinese_content(
|
||
input_path: Path,
|
||
columns: list[str] | None = None,
|
||
sheet_name: str | None = None,
|
||
) -> tuple[list[TranslationEntry], dict[str, list[str]]]:
|
||
wb = load_workbook(input_path)
|
||
entries: list[TranslationEntry] = []
|
||
sheet_headers: dict[str, list[str]] = {}
|
||
|
||
if sheet_name:
|
||
sheet_names = [sheet_name] if sheet_name in wb.sheetnames else []
|
||
if not sheet_names:
|
||
raise ValueError(f"找不到 Sheet: {sheet_name}")
|
||
else:
|
||
sheet_names = wb.sheetnames
|
||
|
||
print(f"📄 文件:{input_path}")
|
||
print(f"📊 Sheet 列表:{sheet_names}")
|
||
|
||
for sn in sheet_names:
|
||
ws = wb[sn]
|
||
print(f"\n处理 Sheet: {sn}")
|
||
|
||
headers = []
|
||
for col in range(1, ws.max_column + 1):
|
||
cell_value = ws.cell(row=1, column=col).value
|
||
headers.append(str(cell_value) if cell_value else f"列{col}")
|
||
|
||
sheet_headers[sn] = headers
|
||
print(f"表头:{headers}")
|
||
|
||
if columns:
|
||
col_indices = []
|
||
for col_name in columns:
|
||
if col_name in headers:
|
||
col_indices.append(headers.index(col_name) + 1)
|
||
else:
|
||
print(f"⚠️ 警告:列 '{col_name}' 不存在")
|
||
if not col_indices:
|
||
col_indices = list(range(1, ws.max_column + 1))
|
||
else:
|
||
col_indices = list(range(1, ws.max_column + 1))
|
||
|
||
print(f"要翻译的列索引:{col_indices}")
|
||
|
||
count = 0
|
||
for row in range(2, ws.max_row + 1):
|
||
for col in col_indices:
|
||
cell = ws.cell(row=row, column=col)
|
||
value = cell.value
|
||
if value and isinstance(value, str) and detect_chinese(value):
|
||
pos = CellPosition(sheet=sn, row=row, col=col)
|
||
entry = TranslationEntry(position=pos, original=value)
|
||
entries.append(entry)
|
||
count += 1
|
||
|
||
print(f" ✓ 发现 {count} 个中文单元格")
|
||
|
||
wb.close()
|
||
return entries, sheet_headers
|
||
|
||
|
||
def translate_entries(
|
||
entries: list[TranslationEntry],
|
||
model_name: str | None = None,
|
||
api_key: str | None = None,
|
||
) -> list[TranslationEntry]:
|
||
if model_name is None:
|
||
model_name = get_model_name()
|
||
if not entries:
|
||
print("✓ 没有需要翻译的内容")
|
||
return entries
|
||
|
||
if not api_key:
|
||
api_key = get_api_key()
|
||
|
||
originals = [entry.original for entry in entries]
|
||
print(f"\n🌐 正在翻译 {len(originals)} 个中文内容...")
|
||
print(f" 模型:{model_name}")
|
||
|
||
genai.configure(api_key=api_key)
|
||
|
||
prompt = f"""你是一个专业的翻译助手。请将以下中文文本翻译成英文。
|
||
|
||
要求:
|
||
1. 保持专业术语准确
|
||
2. 人名使用拼音(如:张三 → Zhang San)
|
||
3. 公司名、产品名保持原名或标准英文名
|
||
4. 邮箱、数字、URL 等非中文内容保持不变
|
||
5. 只返回翻译结果,不要额外解释
|
||
|
||
输入文本(JSON 数组格式):
|
||
{json.dumps(originals, ensure_ascii=False)}
|
||
|
||
请以 JSON 数组格式返回翻译结果,保持相同顺序。"""
|
||
|
||
try:
|
||
model = genai.GenerativeModel(model_name)
|
||
response = model.generate_content(prompt)
|
||
|
||
result_text = response.text.strip()
|
||
|
||
if result_text.startswith("```json"):
|
||
result_text = result_text[7:]
|
||
if result_text.endswith("```"):
|
||
result_text = result_text[:-3]
|
||
result_text = result_text.strip()
|
||
|
||
translations = json.loads(result_text)
|
||
|
||
if len(translations) != len(originals):
|
||
print(f"⚠️ 警告:翻译返回 {len(translations)} 条,期望 {len(originals)} 条")
|
||
while len(translations) < len(originals):
|
||
translations.append(originals[len(translations)])
|
||
|
||
for i, entry in enumerate(entries):
|
||
entry.translated = translations[i] if i < len(translations) else entry.original
|
||
|
||
print(f"✅ 翻译完成!")
|
||
translated_count = sum(1 for e in entries if e.translated != e.original)
|
||
print(f" 成功翻译:{translated_count}/{len(entries)}")
|
||
|
||
except Exception as e:
|
||
print(f"❌ 翻译失败:{e}")
|
||
print(f" 保留原文")
|
||
for entry in entries:
|
||
entry.translated = entry.original
|
||
|
||
return entries
|
||
|
||
|
||
def apply_translations(
|
||
input_path: Path,
|
||
output_path: Path,
|
||
entries: list[TranslationEntry],
|
||
sheet_headers: dict[str, list[str]],
|
||
) -> Path:
|
||
print(f"\n💾 保存翻译结果到:{output_path}")
|
||
|
||
wb = load_workbook(input_path)
|
||
|
||
applied_count = 0
|
||
for entry in entries:
|
||
if entry.translated and entry.translated != entry.original:
|
||
ws = wb[entry.position.sheet]
|
||
ws.cell(row=entry.position.row, column=entry.position.col).value = entry.translated
|
||
applied_count += 1
|
||
|
||
wb.save(output_path)
|
||
wb.close()
|
||
|
||
print(f"✅ 完成!共更新 {applied_count} 个单元格")
|
||
|
||
print(f"\n📋 翻译预览(前 10 条):")
|
||
for i, entry in enumerate(entries[:10]):
|
||
if entry.translated != entry.original:
|
||
print(f" {entry.position.full_ref}: {entry.original} → {entry.translated}")
|
||
|
||
if len(entries) > 10:
|
||
print(f" ... 还有 {len(entries) - 10} 条")
|
||
|
||
return output_path
|
||
|
||
|
||
def translate_excel_file(
|
||
input_path: Path,
|
||
output_path: Path | None = None,
|
||
columns: list[str] | None = None,
|
||
sheet_name: str | None = None,
|
||
model_name: str | None = None,
|
||
api_key: str | None = None,
|
||
dry_run: bool = False,
|
||
) -> Path:
|
||
if model_name is None:
|
||
model_name = get_model_name()
|
||
if not output_path:
|
||
output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}"
|
||
|
||
print("=" * 60)
|
||
print("Excel 中文→英文翻译工具")
|
||
print("=" * 60)
|
||
|
||
print("\n【阶段 1/3】提取中文内容...")
|
||
entries, sheet_headers = extract_chinese_content(
|
||
input_path=input_path,
|
||
columns=columns,
|
||
sheet_name=sheet_name,
|
||
)
|
||
|
||
if not entries:
|
||
print("\n✓ 没有发现中文内容,无需翻译")
|
||
return input_path
|
||
|
||
print(f"\n共发现 {len(entries)} 个中文单元格")
|
||
|
||
if dry_run:
|
||
print(f"\n[预览模式] 将翻译以下内容:")
|
||
for i, entry in enumerate(entries[:20]):
|
||
print(f" {entry.position.full_ref}: {entry.original}")
|
||
if len(entries) > 20:
|
||
print(f" ... 还有 {len(entries) - 20} 个")
|
||
return input_path
|
||
|
||
print("\n【阶段 2/3】批量翻译...")
|
||
entries = translate_entries(
|
||
entries=entries,
|
||
model_name=model_name,
|
||
api_key=api_key,
|
||
)
|
||
|
||
print("\n【阶段 3/3】应用翻译结果...")
|
||
output_path = apply_translations(
|
||
input_path=input_path,
|
||
output_path=output_path,
|
||
entries=entries,
|
||
sheet_headers=sheet_headers,
|
||
)
|
||
|
||
print("\n" + "=" * 60)
|
||
print("✅ 翻译完成!")
|
||
print("=" * 60)
|
||
|
||
return output_path
|
||
|
||
|
||
def parse_args() -> argparse.Namespace:
|
||
parser = argparse.ArgumentParser(
|
||
description="Excel 文件中文→英文翻译工具",
|
||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
epilog="""
|
||
示例:
|
||
# 翻译整个文件
|
||
python translate_excel.py --file data.xlsx
|
||
|
||
# 预览模式
|
||
python translate_excel.py --file data.xlsx --dry-run
|
||
|
||
# 指定列翻译
|
||
python translate_excel.py --file data.xlsx --columns "姓名,地址"
|
||
|
||
# 指定 Sheet 翻译
|
||
python translate_excel.py --file data.xlsx --sheet "Sheet1"
|
||
|
||
环境变量:
|
||
GEMINI_API_KEY: Gemini API 密钥
|
||
GOOGLE_API_KEY: Google API 密钥(备选)
|
||
|
||
获取 API Key: https://aistudio.google.com/app/apikey
|
||
""",
|
||
)
|
||
|
||
parser.add_argument("--file", "-f", type=Path, required=True, help="输入 Excel 文件路径")
|
||
parser.add_argument("--output", "-o", type=Path, help="输出文件路径")
|
||
parser.add_argument("--columns", "-c", type=str, help="要翻译的列名(逗号分隔)")
|
||
parser.add_argument("--sheet", "-s", type=str, help="要翻译的 Sheet 名称")
|
||
parser.add_argument("--model", "-m", type=str, default="gemini-2.0-flash-lite", help="Gemini 模型名称")
|
||
parser.add_argument("--api-key", "-k", type=str, help="Gemini API 密钥")
|
||
parser.add_argument("--dry-run", action="store_true", help="预览模式")
|
||
|
||
return parser.parse_args()
|
||
|
||
|
||
def main() -> int:
|
||
args = parse_args()
|
||
|
||
if not args.file.exists():
|
||
print(f"❌ 文件不存在:{args.file}", file=sys.stderr)
|
||
return 1
|
||
|
||
try:
|
||
columns = None
|
||
if args.columns:
|
||
columns = [c.strip() for c in args.columns.split(",")]
|
||
|
||
translate_excel_file(
|
||
input_path=args.file,
|
||
output_path=args.output,
|
||
columns=columns,
|
||
sheet_name=args.sheet,
|
||
model_name=args.model,
|
||
api_key=args.api_key,
|
||
dry_run=args.dry_run,
|
||
)
|
||
|
||
return 0
|
||
|
||
except Exception as e:
|
||
print(f"❌ 错误:{e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc()
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|