#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Excel 文件中文→英文翻译工具 使用 Google Gemini Flash Lite API 进行翻译 流程: 1. 提取:收集所有中文内容及其位置映射 2. 翻译:批量翻译所有中文内容 3. 应用:将翻译结果写入新 Excel 文件 """ from __future__ import annotations import argparse import json import re import sys from dataclasses import dataclass, field from pathlib import Path from typing import Any try: import google.generativeai as genai # type: ignore except ImportError as exc: raise RuntimeError( "缺少依赖 google-generativeai,请先安装:uv pip install google-generativeai" ) from exc try: from openpyxl import load_workbook, Workbook # type: ignore except ImportError as exc: raise RuntimeError( "缺少依赖 openpyxl,请先安装:uv pip install openpyxl" ) from exc try: import pandas as pd # type: ignore except ImportError as exc: raise RuntimeError( "缺少依赖 pandas,请先安装:uv pip install pandas" ) from exc @dataclass class CellPosition: """单元格位置信息""" sheet: str row: int col: int col_letter: str = field(init=False) def __post_init__(self): self.col_letter = self._index_to_letter(self.col) @staticmethod def _index_to_letter(col: int) -> str: result = "" while col > 0: col -= 1 result = chr(65 + (col % 26)) + result col //= 26 return result @property def cell_ref(self) -> str: return f"{self.col_letter}{self.row}" @property def full_ref(self) -> str: return f"{self.sheet}!{self.cell_ref}" @dataclass class TranslationEntry: """翻译条目""" position: CellPosition original: str translated: str = "" def detect_chinese(text: str) -> bool: if not text or not isinstance(text, str): return False return bool(re.search(r"[\u4e00-\u9fff]", text)) def get_api_key() -> str: import os api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY") if api_key: return api_key raise ValueError( "未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY。" "获取 API Key: https://aistudio.google.com/app/apikey" ) def extract_chinese_content( input_path: Path, columns: list[str] | None = None, sheet_name: str | None = None, ) -> tuple[list[TranslationEntry], dict[str, list[str]]]: wb = load_workbook(input_path) entries: list[TranslationEntry] = [] sheet_headers: dict[str, list[str]] = {} if sheet_name: sheet_names = [sheet_name] if sheet_name in wb.sheetnames else [] if not sheet_names: raise ValueError(f"找不到 Sheet: {sheet_name}") else: sheet_names = wb.sheetnames print(f"📄 文件:{input_path}") print(f"📊 Sheet 列表:{sheet_names}") for sn in sheet_names: ws = wb[sn] print(f"\n处理 Sheet: {sn}") headers = [] for col in range(1, ws.max_column + 1): cell_value = ws.cell(row=1, column=col).value headers.append(str(cell_value) if cell_value else f"列{col}") sheet_headers[sn] = headers print(f"表头:{headers}") if columns: col_indices = [] for col_name in columns: if col_name in headers: col_indices.append(headers.index(col_name) + 1) else: print(f"⚠️ 警告:列 '{col_name}' 不存在") if not col_indices: col_indices = list(range(1, ws.max_column + 1)) else: col_indices = list(range(1, ws.max_column + 1)) print(f"要翻译的列索引:{col_indices}") count = 0 for row in range(2, ws.max_row + 1): for col in col_indices: cell = ws.cell(row=row, column=col) value = cell.value if value and isinstance(value, str) and detect_chinese(value): pos = CellPosition(sheet=sn, row=row, col=col) entry = TranslationEntry(position=pos, original=value) entries.append(entry) count += 1 print(f" ✓ 发现 {count} 个中文单元格") wb.close() return entries, sheet_headers def translate_entries( entries: list[TranslationEntry], model_name: str = "gemini-2.0-flash-lite", api_key: str | None = None, ) -> list[TranslationEntry]: if not entries: print("✓ 没有需要翻译的内容") return entries if not api_key: api_key = get_api_key() originals = [entry.original for entry in entries] print(f"\n🌐 正在翻译 {len(originals)} 个中文内容...") print(f" 模型:{model_name}") genai.configure(api_key=api_key) prompt = f"""你是一个专业的翻译助手。请将以下中文文本翻译成英文。 要求: 1. 保持专业术语准确 2. 人名使用拼音(如:张三 → Zhang San) 3. 公司名、产品名保持原名或标准英文名 4. 邮箱、数字、URL 等非中文内容保持不变 5. 只返回翻译结果,不要额外解释 输入文本(JSON 数组格式): {json.dumps(originals, ensure_ascii=False)} 请以 JSON 数组格式返回翻译结果,保持相同顺序。""" try: model = genai.GenerativeModel(model_name) response = model.generate_content(prompt) result_text = response.text.strip() if result_text.startswith("```json"): result_text = result_text[7:] if result_text.endswith("```"): result_text = result_text[:-3] result_text = result_text.strip() translations = json.loads(result_text) if len(translations) != len(originals): print(f"⚠️ 警告:翻译返回 {len(translations)} 条,期望 {len(originals)} 条") while len(translations) < len(originals): translations.append(originals[len(translations)]) for i, entry in enumerate(entries): entry.translated = translations[i] if i < len(translations) else entry.original print(f"✅ 翻译完成!") translated_count = sum(1 for e in entries if e.translated != e.original) print(f" 成功翻译:{translated_count}/{len(entries)}") except Exception as e: print(f"❌ 翻译失败:{e}") print(f" 保留原文") for entry in entries: entry.translated = entry.original return entries def apply_translations( input_path: Path, output_path: Path, entries: list[TranslationEntry], sheet_headers: dict[str, list[str]], ) -> Path: print(f"\n💾 保存翻译结果到:{output_path}") wb = load_workbook(input_path) applied_count = 0 for entry in entries: if entry.translated and entry.translated != entry.original: ws = wb[entry.position.sheet] ws.cell(row=entry.position.row, column=entry.position.col).value = entry.translated applied_count += 1 wb.save(output_path) wb.close() print(f"✅ 完成!共更新 {applied_count} 个单元格") print(f"\n📋 翻译预览(前 10 条):") for i, entry in enumerate(entries[:10]): if entry.translated != entry.original: print(f" {entry.position.full_ref}: {entry.original} → {entry.translated}") if len(entries) > 10: print(f" ... 还有 {len(entries) - 10} 条") return output_path def translate_excel_file( input_path: Path, output_path: Path | None = None, columns: list[str] | None = None, sheet_name: str | None = None, model_name: str = "gemini-2.0-flash-lite", api_key: str | None = None, dry_run: bool = False, ) -> Path: if not output_path: output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}" print("=" * 60) print("Excel 中文→英文翻译工具") print("=" * 60) print("\n【阶段 1/3】提取中文内容...") entries, sheet_headers = extract_chinese_content( input_path=input_path, columns=columns, sheet_name=sheet_name, ) if not entries: print("\n✓ 没有发现中文内容,无需翻译") return input_path print(f"\n共发现 {len(entries)} 个中文单元格") if dry_run: print(f"\n[预览模式] 将翻译以下内容:") for i, entry in enumerate(entries[:20]): print(f" {entry.position.full_ref}: {entry.original}") if len(entries) > 20: print(f" ... 还有 {len(entries) - 20} 个") return input_path print("\n【阶段 2/3】批量翻译...") entries = translate_entries( entries=entries, model_name=model_name, api_key=api_key, ) print("\n【阶段 3/3】应用翻译结果...") output_path = apply_translations( input_path=input_path, output_path=output_path, entries=entries, sheet_headers=sheet_headers, ) print("\n" + "=" * 60) print("✅ 翻译完成!") print("=" * 60) return output_path def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Excel 文件中文→英文翻译工具", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: # 翻译整个文件 python translate_excel.py --file data.xlsx # 预览模式 python translate_excel.py --file data.xlsx --dry-run # 指定列翻译 python translate_excel.py --file data.xlsx --columns "姓名,地址" # 指定 Sheet 翻译 python translate_excel.py --file data.xlsx --sheet "Sheet1" 环境变量: GEMINI_API_KEY: Gemini API 密钥 GOOGLE_API_KEY: Google API 密钥(备选) 获取 API Key: https://aistudio.google.com/app/apikey """, ) parser.add_argument("--file", "-f", type=Path, required=True, help="输入 Excel 文件路径") parser.add_argument("--output", "-o", type=Path, help="输出文件路径") parser.add_argument("--columns", "-c", type=str, help="要翻译的列名(逗号分隔)") parser.add_argument("--sheet", "-s", type=str, help="要翻译的 Sheet 名称") parser.add_argument("--model", "-m", type=str, default="gemini-2.0-flash-lite", help="Gemini 模型名称") parser.add_argument("--api-key", "-k", type=str, help="Gemini API 密钥") parser.add_argument("--dry-run", action="store_true", help="预览模式") return parser.parse_args() def main() -> int: args = parse_args() if not args.file.exists(): print(f"❌ 文件不存在:{args.file}", file=sys.stderr) return 1 try: columns = None if args.columns: columns = [c.strip() for c in args.columns.split(",")] translate_excel_file( input_path=args.file, output_path=args.output, columns=columns, sheet_name=args.sheet, model_name=args.model, api_key=args.api_key, dry_run=args.dry_run, ) return 0 except Exception as e: print(f"❌ 错误:{e}", file=sys.stderr) import traceback traceback.print_exc() return 1 if __name__ == "__main__": sys.exit(main())