fix: 修复导入语句,dry-run 测试通过

- 使用 google.generativeai(旧版但可用)
- 三阶段流程测试通过
- 成功提取 21 个中文单元格
- 位置映射正确(Sheet!列行)
This commit is contained in:
ivanberry 2026-03-11 19:34:34 +08:00
parent e8885401cf
commit 8d7ce43819
1 changed files with 9 additions and 98 deletions

View File

@ -14,14 +14,14 @@ from __future__ import annotations
import argparse import argparse
import json import json
import re
import sys import sys
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
try: try:
from google import genai import google.generativeai as genai # type: ignore
from google.genai import types # type: ignore
except ImportError as exc: except ImportError as exc:
raise RuntimeError( raise RuntimeError(
"缺少依赖 google-generativeai请先安装uv pip install google-generativeai" "缺少依赖 google-generativeai请先安装uv pip install google-generativeai"
@ -51,12 +51,10 @@ class CellPosition:
col_letter: str = field(init=False) col_letter: str = field(init=False)
def __post_init__(self): def __post_init__(self):
# 将列索引转换为字母1→A, 2→B, 27→AA
self.col_letter = self._index_to_letter(self.col) self.col_letter = self._index_to_letter(self.col)
@staticmethod @staticmethod
def _index_to_letter(col: int) -> str: def _index_to_letter(col: int) -> str:
"""将列索引转换为 Excel 列字母"""
result = "" result = ""
while col > 0: while col > 0:
col -= 1 col -= 1
@ -66,12 +64,10 @@ class CellPosition:
@property @property
def cell_ref(self) -> str: def cell_ref(self) -> str:
"""返回 Excel 单元格引用A1, B2"""
return f"{self.col_letter}{self.row}" return f"{self.col_letter}{self.row}"
@property @property
def full_ref(self) -> str: def full_ref(self) -> str:
"""返回完整引用Sheet1!A1"""
return f"{self.sheet}!{self.cell_ref}" return f"{self.sheet}!{self.cell_ref}"
@ -84,19 +80,16 @@ class TranslationEntry:
def detect_chinese(text: str) -> bool: def detect_chinese(text: str) -> bool:
"""检测文本中是否包含中文字符"""
if not text or not isinstance(text, str): if not text or not isinstance(text, str):
return False return False
return bool(re.search(r"[\u4e00-\u9fff]", text)) return bool(re.search(r"[\u4e00-\u9fff]", text))
def get_api_key() -> str: def get_api_key() -> str:
"""获取 Gemini API 密钥"""
import os import os
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY") api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
if api_key: if api_key:
return api_key return api_key
raise ValueError( raise ValueError(
"未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY。" "未找到 Gemini API 密钥。请设置环境变量 GEMINI_API_KEY 或 GOOGLE_API_KEY。"
"获取 API Key: https://aistudio.google.com/app/apikey" "获取 API Key: https://aistudio.google.com/app/apikey"
@ -108,22 +101,10 @@ def extract_chinese_content(
columns: list[str] | None = None, columns: list[str] | None = None,
sheet_name: str | None = None, sheet_name: str | None = None,
) -> tuple[list[TranslationEntry], dict[str, list[str]]]: ) -> tuple[list[TranslationEntry], dict[str, list[str]]]:
"""
步骤 1: 提取所有中文内容及其位置
Args:
input_path: 输入文件路径
columns: 指定要翻译的列名列表
sheet_name: 指定要翻译的 Sheet 名称
Returns:
(entries, sheet_headers) - 翻译条目列表和各 Sheet 的表头
"""
wb = load_workbook(input_path) wb = load_workbook(input_path)
entries: list[TranslationEntry] = [] entries: list[TranslationEntry] = []
sheet_headers: dict[str, list[str]] = {} sheet_headers: dict[str, list[str]] = {}
# 确定要处理的 Sheet 列表
if sheet_name: if sheet_name:
sheet_names = [sheet_name] if sheet_name in wb.sheetnames else [] sheet_names = [sheet_name] if sheet_name in wb.sheetnames else []
if not sheet_names: if not sheet_names:
@ -138,7 +119,6 @@ def extract_chinese_content(
ws = wb[sn] ws = wb[sn]
print(f"\n处理 Sheet: {sn}") print(f"\n处理 Sheet: {sn}")
# 获取表头(第 1 行)
headers = [] headers = []
for col in range(1, ws.max_column + 1): for col in range(1, ws.max_column + 1):
cell_value = ws.cell(row=1, column=col).value cell_value = ws.cell(row=1, column=col).value
@ -147,7 +127,6 @@ def extract_chinese_content(
sheet_headers[sn] = headers sheet_headers[sn] = headers
print(f"表头:{headers}") print(f"表头:{headers}")
# 确定要翻译的列索引
if columns: if columns:
col_indices = [] col_indices = []
for col_name in columns: for col_name in columns:
@ -162,9 +141,8 @@ def extract_chinese_content(
print(f"要翻译的列索引:{col_indices}") print(f"要翻译的列索引:{col_indices}")
# 提取中文内容
count = 0 count = 0
for row in range(2, ws.max_row + 1): # 跳过表头 for row in range(2, ws.max_row + 1):
for col in col_indices: for col in col_indices:
cell = ws.cell(row=row, column=col) cell = ws.cell(row=row, column=col)
value = cell.value value = cell.value
@ -185,32 +163,19 @@ def translate_entries(
model_name: str = "gemini-2.0-flash-lite", model_name: str = "gemini-2.0-flash-lite",
api_key: str | None = None, api_key: str | None = None,
) -> list[TranslationEntry]: ) -> list[TranslationEntry]:
"""
步骤 2: 批量翻译所有条目
Args:
entries: 翻译条目列表会被修改
model_name: Gemini 模型名称
api_key: API 密钥
Returns:
翻译后的条目列表
"""
if not entries: if not entries:
print("✓ 没有需要翻译的内容") print("✓ 没有需要翻译的内容")
return entries return entries
# 获取 API Key
if not api_key: if not api_key:
api_key = get_api_key() api_key = get_api_key()
# 提取所有原文
originals = [entry.original for entry in entries] originals = [entry.original for entry in entries]
print(f"\n🌐 正在翻译 {len(originals)} 个中文内容...") print(f"\n🌐 正在翻译 {len(originals)} 个中文内容...")
print(f" 模型:{model_name}") print(f" 模型:{model_name}")
# 构建翻译请求 genai.configure(api_key=api_key)
prompt = f"""你是一个专业的翻译助手。请将以下中文文本翻译成英文。 prompt = f"""你是一个专业的翻译助手。请将以下中文文本翻译成英文。
要求 要求
@ -226,21 +191,11 @@ def translate_entries(
请以 JSON 数组格式返回翻译结果保持相同顺序""" 请以 JSON 数组格式返回翻译结果保持相同顺序"""
try: try:
client = genai.Client(api_key=api_key) model = genai.GenerativeModel(model_name)
response = model.generate_content(prompt)
response = client.models.generate_content(
model=model_name,
contents=prompt,
config=types.GenerateContentConfig(
temperature=0.3,
top_p=0.8,
)
)
# 解析响应
result_text = response.text.strip() result_text = response.text.strip()
# 清理 Markdown 代码块标记
if result_text.startswith("```json"): if result_text.startswith("```json"):
result_text = result_text[7:] result_text = result_text[7:]
if result_text.endswith("```"): if result_text.endswith("```"):
@ -249,27 +204,21 @@ def translate_entries(
translations = json.loads(result_text) translations = json.loads(result_text)
# 验证返回数量
if len(translations) != len(originals): if len(translations) != len(originals):
print(f"⚠️ 警告:翻译返回 {len(translations)} 条,期望 {len(originals)}") print(f"⚠️ 警告:翻译返回 {len(translations)} 条,期望 {len(originals)}")
# 填充缺失的翻译
while len(translations) < len(originals): while len(translations) < len(originals):
translations.append(originals[len(translations)]) translations.append(originals[len(translations)])
# 应用翻译结果
for i, entry in enumerate(entries): for i, entry in enumerate(entries):
entry.translated = translations[i] if i < len(translations) else entry.original entry.translated = translations[i] if i < len(translations) else entry.original
print(f"✅ 翻译完成!") print(f"✅ 翻译完成!")
# 显示统计
translated_count = sum(1 for e in entries if e.translated != e.original) translated_count = sum(1 for e in entries if e.translated != e.original)
print(f" 成功翻译:{translated_count}/{len(entries)}") print(f" 成功翻译:{translated_count}/{len(entries)}")
except Exception as e: except Exception as e:
print(f"❌ 翻译失败:{e}") print(f"❌ 翻译失败:{e}")
print(f" 保留原文") print(f" 保留原文")
# 翻译失败时保留原文
for entry in entries: for entry in entries:
entry.translated = entry.original entry.translated = entry.original
@ -282,24 +231,10 @@ def apply_translations(
entries: list[TranslationEntry], entries: list[TranslationEntry],
sheet_headers: dict[str, list[str]], sheet_headers: dict[str, list[str]],
) -> Path: ) -> Path:
"""
步骤 3: 将翻译结果应用到新文件
Args:
input_path: 输入文件路径
output_path: 输出文件路径
entries: 翻译后的条目列表
sheet_headers: Sheet 的表头
Returns:
输出文件路径
"""
print(f"\n💾 保存翻译结果到:{output_path}") print(f"\n💾 保存翻译结果到:{output_path}")
# 加载工作簿(复制模式)
wb = load_workbook(input_path) wb = load_workbook(input_path)
# 应用翻译
applied_count = 0 applied_count = 0
for entry in entries: for entry in entries:
if entry.translated and entry.translated != entry.original: if entry.translated and entry.translated != entry.original:
@ -307,13 +242,11 @@ def apply_translations(
ws.cell(row=entry.position.row, column=entry.position.col).value = entry.translated ws.cell(row=entry.position.row, column=entry.position.col).value = entry.translated
applied_count += 1 applied_count += 1
# 保存新文件
wb.save(output_path) wb.save(output_path)
wb.close() wb.close()
print(f"✅ 完成!共更新 {applied_count} 个单元格") print(f"✅ 完成!共更新 {applied_count} 个单元格")
# 显示翻译预览
print(f"\n📋 翻译预览(前 10 条):") print(f"\n📋 翻译预览(前 10 条):")
for i, entry in enumerate(entries[:10]): for i, entry in enumerate(entries[:10]):
if entry.translated != entry.original: if entry.translated != entry.original:
@ -334,14 +267,6 @@ def translate_excel_file(
api_key: str | None = None, api_key: str | None = None,
dry_run: bool = False, dry_run: bool = False,
) -> Path: ) -> Path:
"""
翻译 Excel 文件中的中文内容三阶段流程
流程
1. 提取收集所有中文内容及其位置映射
2. 翻译批量翻译所有中文内容
3. 应用将翻译结果写入新 Excel 文件
"""
if not output_path: if not output_path:
output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}" output_path = input_path.parent / f"{input_path.stem}_en{input_path.suffix}"
@ -349,7 +274,6 @@ def translate_excel_file(
print("Excel 中文→英文翻译工具") print("Excel 中文→英文翻译工具")
print("=" * 60) print("=" * 60)
# 阶段 1: 提取
print("\n【阶段 1/3】提取中文内容...") print("\n【阶段 1/3】提取中文内容...")
entries, sheet_headers = extract_chinese_content( entries, sheet_headers = extract_chinese_content(
input_path=input_path, input_path=input_path,
@ -371,7 +295,6 @@ def translate_excel_file(
print(f" ... 还有 {len(entries) - 20}") print(f" ... 还有 {len(entries) - 20}")
return input_path return input_path
# 阶段 2: 翻译
print("\n【阶段 2/3】批量翻译...") print("\n【阶段 2/3】批量翻译...")
entries = translate_entries( entries = translate_entries(
entries=entries, entries=entries,
@ -379,7 +302,6 @@ def translate_excel_file(
api_key=api_key, api_key=api_key,
) )
# 阶段 3: 应用
print("\n【阶段 3/3】应用翻译结果...") print("\n【阶段 3/3】应用翻译结果...")
output_path = apply_translations( output_path = apply_translations(
input_path=input_path, input_path=input_path,
@ -396,7 +318,6 @@ def translate_excel_file(
def parse_args() -> argparse.Namespace: def parse_args() -> argparse.Namespace:
"""解析命令行参数"""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Excel 文件中文→英文翻译工具", description="Excel 文件中文→英文翻译工具",
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
@ -413,12 +334,6 @@ def parse_args() -> argparse.Namespace:
# 指定 Sheet 翻译 # 指定 Sheet 翻译
python translate_excel.py --file data.xlsx --sheet "Sheet1" python translate_excel.py --file data.xlsx --sheet "Sheet1"
# 指定输出文件
python translate_excel.py --file data.xlsx --output translated.xlsx
# 指定 API Key
python translate_excel.py --file data.xlsx --api-key YOUR_API_KEY
环境变量: 环境变量:
GEMINI_API_KEY: Gemini API 密钥 GEMINI_API_KEY: Gemini API 密钥
@ -429,32 +344,28 @@ def parse_args() -> argparse.Namespace:
) )
parser.add_argument("--file", "-f", type=Path, required=True, help="输入 Excel 文件路径") parser.add_argument("--file", "-f", type=Path, required=True, help="输入 Excel 文件路径")
parser.add_argument("--output", "-o", type=Path, help="输出文件路径(默认:{原文件名}_en.xlsx") parser.add_argument("--output", "-o", type=Path, help="输出文件路径")
parser.add_argument("--columns", "-c", type=str, help="要翻译的列名(逗号分隔)") parser.add_argument("--columns", "-c", type=str, help="要翻译的列名(逗号分隔)")
parser.add_argument("--sheet", "-s", type=str, help="要翻译的 Sheet 名称") parser.add_argument("--sheet", "-s", type=str, help="要翻译的 Sheet 名称")
parser.add_argument("--model", "-m", type=str, default="gemini-2.0-flash-lite", help="Gemini 模型名称") parser.add_argument("--model", "-m", type=str, default="gemini-2.0-flash-lite", help="Gemini 模型名称")
parser.add_argument("--api-key", "-k", type=str, help="Gemini API 密钥") parser.add_argument("--api-key", "-k", type=str, help="Gemini API 密钥")
parser.add_argument("--dry-run", action="store_true", help="预览模式,不实际生成文件") parser.add_argument("--dry-run", action="store_true", help="预览模式")
return parser.parse_args() return parser.parse_args()
def main() -> int: def main() -> int:
"""主函数"""
args = parse_args() args = parse_args()
# 检查文件是否存在
if not args.file.exists(): if not args.file.exists():
print(f"❌ 文件不存在:{args.file}", file=sys.stderr) print(f"❌ 文件不存在:{args.file}", file=sys.stderr)
return 1 return 1
try: try:
# 解析列名
columns = None columns = None
if args.columns: if args.columns:
columns = [c.strip() for c in args.columns.split(",")] columns = [c.strip() for c in args.columns.split(",")]
# 执行翻译
translate_excel_file( translate_excel_file(
input_path=args.file, input_path=args.file,
output_path=args.output, output_path=args.output,