diff --git a/SKILL.md b/SKILL.md index e895188..df154b9 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,26 +1,34 @@ --- -name: my-skill -description: "TODO: describe what this skill does and when to use it." +name: gemini-image-translator +description: Translate text inside images with Gemini 2.5 Flash, then render the translated text back onto the original image. Use this skill when given a local image path and an optional target language. --- -# my-skill +# gemini-image-translator -TODO: one-line description. +Translate text in a local image with Gemini 2.5 Flash OCR + translation, then write the translated text back onto the image with `sharp`. -> Auth (CLIENT_KEY) is loaded automatically from `~/.openclaw/.env`. +Set `GEMINI_API_KEY` in the environment before running. ## Run ```bash -bun scripts/run.ts [args] [--dry-run] +bun scripts/run.ts translate [target-language] [output-path] [--dry-run] ``` ## Commands | Command | Description | |---------|-------------| -| `run ` | TODO: describe | +| `translate [target-language] [output-path]` | OCR the image, translate all detected text to the target language (default `zh-CN`), and save a new image with translated text overlaid. | ## Output -Returns JSON: `{ "status": "success" | "failed", "data": ... }` +Returns JSON with `status`, `command`, `dryRun`, and `data`. + +On success, `data` includes: + +- `inputPath`: resolved source image path +- `outputPath`: generated output image path +- `targetLanguage`: requested target language +- `width` / `height`: image dimensions +- `regions`: detected OCR regions with translated text and pixel bounds diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..a357c29 --- /dev/null +++ b/bun.lock @@ -0,0 +1,73 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "gemini-image-translator", + "dependencies": { + "sharp": "^0.34.3", + }, + }, + }, + "packages": { + "@emnapi/runtime": ["@emnapi/runtime@1.9.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-QN75eB0IH2ywSpRpNddCRfQIhmJYBCJ1x5Lb3IscKAL8bMnVAKnRg8dCoXbHzVLLH7P38N2Z3mtulB7W0J0FKw=="], + + "@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="], + + "@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="], + + "@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="], + + "@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="], + + "@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="], + + "@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="], + + "@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="], + + "@img/sharp-libvips-linux-ppc64": ["@img/sharp-libvips-linux-ppc64@1.2.4", "", { "os": "linux", "cpu": "ppc64" }, "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA=="], + + "@img/sharp-libvips-linux-riscv64": ["@img/sharp-libvips-linux-riscv64@1.2.4", "", { "os": "linux", "cpu": "none" }, "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA=="], + + "@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.2.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ=="], + + "@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="], + + "@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="], + + "@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="], + + "@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="], + + "@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="], + + "@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="], + + "@img/sharp-linux-riscv64": ["@img/sharp-linux-riscv64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-riscv64": "1.2.4" }, "os": "linux", "cpu": "none" }, "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw=="], + + "@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="], + + "@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="], + + "@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="], + + "@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="], + + "@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="], + + "@img/sharp-win32-arm64": ["@img/sharp-win32-arm64@0.34.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g=="], + + "@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="], + + "@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="], + + "detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="], + + "semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="], + + "sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="], + + "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], + } +} diff --git a/package.json b/package.json index 5fd4e73..ba5da3d 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "my-skill", + "name": "gemini-image-translator", "version": "0.1.0", "type": "module", "scripts": { @@ -7,6 +7,6 @@ "build": "bun build scripts/run.ts --outfile dist/run.js --target bun" }, "dependencies": { - "@clawd/auth-runtime": "git+http://192.168.0.108:3030/agent-skills/auth-runtime.git" + "sharp": "^0.34.3" } } diff --git a/scripts/run.ts b/scripts/run.ts index f2dfab8..67e9638 100644 --- a/scripts/run.ts +++ b/scripts/run.ts @@ -1,15 +1,13 @@ #!/usr/bin/env bun -import type { Command } from '../src/index.ts'; -import { run } from '../src/index.ts'; - function printUsage(): void { console.error(`Usage: - bun scripts/run.ts [--api-base=] [args...] [--dry-run] + bun scripts/run.ts translate [target-language] [output-path] [--dry-run] Commands: - run + translate [target-language] [output-path] -Config: ~/.openclaw/.env (CLIENT_KEY, API_BASE) +Config: + GEMINI_API_KEY= `); } @@ -20,8 +18,6 @@ async function main(): Promise { for (const arg of process.argv.slice(2)) { if (arg === '--dry-run') { dryRun = true; - } else if (arg.startsWith('--api-base=')) { - process.env.API_BASE = arg.slice('--api-base='.length).trim(); } else if (arg === '-h' || arg === '--help') { printUsage(); process.exit(0); } else { @@ -31,7 +27,8 @@ async function main(): Promise { if (positionals.length < 1) { printUsage(); process.exit(1); } - const result = await run(positionals[0] as Command, positionals.slice(1), dryRun); + const { run } = await import('../src/index.ts'); + const result = await run(positionals[0] as 'translate', positionals.slice(1), dryRun); console.log(JSON.stringify(result, null, 2)); } diff --git a/src/index.ts b/src/index.ts index 086be5f..57b306e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,42 +1,348 @@ -import { - createEnvConfig, - requestApiWithAutoRefresh, - type ApiResponse, -} from '@clawd/auth-runtime'; +import { basename, dirname, extname, join, resolve } from 'node:path'; +import { readFile } from 'node:fs/promises'; +import sharp from 'sharp'; -export type Command = 'run'; // TODO: add your commands +const GEMINI_MODEL = 'gemini-2.5-flash'; +const GEMINI_API_URL = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`; +const DEFAULT_TARGET_LANGUAGE = 'zh-CN'; + +export type Command = 'translate'; + +export interface TextRegion { + x: number; + y: number; + width: number; + height: number; + sourceText: string; + translatedText: string; +} export interface RunResult { status: 'success' | 'failed'; command: Command; dryRun: boolean; - data?: unknown; + data?: { + inputPath: string; + outputPath: string; + targetLanguage: string; + width: number; + height: number; + regions: TextRegion[]; + }; error?: string; } +interface GeminiRegion { + x: number; + y: number; + width: number; + height: number; + sourceText: string; + translatedText: string; +} + +interface GeminiResponse { + regions?: GeminiRegion[]; +} + +function ensureApiKey(): string { + const apiKey = process.env.GEMINI_API_KEY?.trim(); + if (!apiKey) { + throw new Error('Missing GEMINI_API_KEY'); + } + return apiKey; +} + +function buildPrompt(targetLanguage: string): string { + return [ + 'You are an OCR and image translation engine.', + `Detect all visible text in the image and translate it into ${targetLanguage}.`, + 'Return strict JSON only. No markdown, no commentary.', + 'Use this exact schema:', + '{"regions":[{"x":0,"y":0,"width":0,"height":0,"sourceText":"","translatedText":""}]}', + 'Coordinates must be normalized floats between 0 and 1 relative to the full image.', + 'Each region should cover one coherent text block.', + 'If text is already in the target language, keep translatedText close to sourceText.', + 'If no text is found, return {"regions":[]}.', + ].join('\n'); +} + +function extractJson(text: string): GeminiResponse { + const trimmed = text.trim(); + const fenced = trimmed.match(/```json\s*([\s\S]*?)\s*```/i) ?? trimmed.match(/```\s*([\s\S]*?)\s*```/); + const jsonText = fenced ? fenced[1] : trimmed; + return JSON.parse(jsonText) as GeminiResponse; +} + +function clampUnit(value: number): number { + if (!Number.isFinite(value)) { + return 0; + } + if (value < 0) { + return 0; + } + if (value > 1) { + return 1; + } + return value; +} + +function sanitizeRegions(regions: GeminiRegion[], width: number, height: number): TextRegion[] { + return regions + .map((region) => { + const x = clampUnit(region.x); + const y = clampUnit(region.y); + const maxWidth = 1 - x; + const maxHeight = 1 - y; + const normalizedWidth = Math.min(clampUnit(region.width), maxWidth); + const normalizedHeight = Math.min(clampUnit(region.height), maxHeight); + const sourceText = String(region.sourceText ?? '').trim(); + const translatedText = String(region.translatedText ?? '').trim(); + + return { + x: Math.round(x * width), + y: Math.round(y * height), + width: Math.max(1, Math.round(normalizedWidth * width)), + height: Math.max(1, Math.round(normalizedHeight * height)), + sourceText, + translatedText, + }; + }) + .filter((region) => region.sourceText && region.translatedText); +} + +function escapeXml(text: string): string { + return text + .replaceAll('&', '&') + .replaceAll('<', '<') + .replaceAll('>', '>') + .replaceAll('"', '"') + .replaceAll("'", '''); +} + +function createWrappedText(text: string, maxCharsPerLine: number): string[] { + const collapsed = text.replace(/\s+/g, ' ').trim(); + if (!collapsed) { + return []; + } + + const useWordBoundaries = collapsed.includes(' '); + const words = useWordBoundaries ? collapsed.split(' ') : Array.from(collapsed); + const lines: string[] = []; + let current = ''; + + for (const word of words) { + const nextLine = useWordBoundaries + ? (current ? `${current} ${word}` : word) + : `${current}${word}`; + if (nextLine.length <= maxCharsPerLine || !current) { + current = nextLine; + } else { + lines.push(current); + current = word; + } + } + + if (current) { + lines.push(current); + } + + return lines; +} + +function buildRegionSvg(region: TextRegion): string { + const padding = Math.max(6, Math.round(Math.min(region.width, region.height) * 0.06)); + const innerWidth = Math.max(1, region.width - padding * 2); + const innerHeight = Math.max(1, region.height - padding * 2); + const fontSize = Math.max(14, Math.round(region.height * 0.22)); + const lineHeight = Math.max(fontSize * 1.2, 16); + const maxCharsPerLine = Math.max(6, Math.floor(innerWidth / Math.max(fontSize * 0.55, 1))); + const rawLines = createWrappedText(region.translatedText, maxCharsPerLine); + const maxLines = Math.max(1, Math.floor(innerHeight / lineHeight)); + const lines = rawLines.slice(0, maxLines); + + if (rawLines.length > maxLines && lines.length > 0) { + const last = lines[lines.length - 1]; + lines[lines.length - 1] = `${last.slice(0, Math.max(1, last.length - 1)).trimEnd()}…`; + } + + const textHeight = lines.length * lineHeight; + const startY = padding + Math.max(fontSize, (innerHeight - textHeight) / 2 + fontSize * 0.9); + const tspans = lines + .map((line, index) => { + const dy = index === 0 ? 0 : lineHeight; + return `${escapeXml(line)}`; + }) + .join(''); + + return [ + ``, + ``, + '', + tspans, + '', + '', + ].join(''); +} + +async function callGemini(imageBuffer: Buffer, mimeType: string, targetLanguage: string): Promise { + const apiKey = ensureApiKey(); + const response = await fetch(`${GEMINI_API_URL}?key=${encodeURIComponent(apiKey)}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + contents: [ + { + parts: [ + { text: buildPrompt(targetLanguage) }, + { + inlineData: { + mimeType, + data: imageBuffer.toString('base64'), + }, + }, + ], + }, + ], + generationConfig: { + temperature: 0.1, + responseMimeType: 'application/json', + }, + }), + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`Gemini API request failed: HTTP ${response.status}: ${errorText}`); + } + + const payload = await response.json() as { + candidates?: Array<{ + content?: { + parts?: Array<{ text?: string }>; + }; + }>; + }; + + const text = payload.candidates?.[0]?.content?.parts?.map((part) => part.text ?? '').join('').trim(); + if (!text) { + throw new Error('Gemini API returned an empty response'); + } + + return extractJson(text); +} + +function inferMimeType(inputPath: string): string { + const extension = extname(inputPath).toLowerCase(); + if (extension === '.png') { + return 'image/png'; + } + if (extension === '.webp') { + return 'image/webp'; + } + if (extension === '.gif') { + return 'image/gif'; + } + return 'image/jpeg'; +} + +function deriveOutputPath(inputPath: string, targetLanguage: string): string { + const extension = extname(inputPath); + const name = basename(inputPath, extension); + const safeLanguage = targetLanguage.replace(/[^a-z0-9]+/gi, '-').replace(/^-+|-+$/g, '').toLowerCase() || 'translated'; + return join(dirname(inputPath), `${name}.${safeLanguage}.translated${extension || '.png'}`); +} + +async function translateImage( + inputPath: string, + targetLanguage: string, + outputPath?: string, +): Promise { + const resolvedInputPath = resolve(inputPath); + const resolvedOutputPath = resolve(outputPath ? outputPath : deriveOutputPath(resolvedInputPath, targetLanguage)); + const imageBuffer = await readFile(resolvedInputPath); + const baseImage = sharp(imageBuffer); + const metadata = await baseImage.metadata(); + const width = metadata.width; + const height = metadata.height; + + if (!width || !height) { + throw new Error('Unable to determine image dimensions'); + } + + const geminiResponse = await callGemini(imageBuffer, inferMimeType(resolvedInputPath), targetLanguage); + const regions = sanitizeRegions(geminiResponse.regions ?? [], width, height); + const overlays = regions.map((region) => ({ + input: Buffer.from(buildRegionSvg(region)), + left: region.x, + top: region.y, + })); + + await baseImage.composite(overlays).toFile(resolvedOutputPath); + + return { + inputPath: resolvedInputPath, + outputPath: resolvedOutputPath, + targetLanguage, + width, + height, + regions, + }; +} + export async function run( command: Command, args: string[], dryRun: boolean, ): Promise { - const config = createEnvConfig(); - const apiBase = (process.env.API_BASE ?? 'https://api-gw-test.yuanwei-lnc.com').replace(/\/$/, ''); - - if (command === 'run') { - const response: ApiResponse = await requestApiWithAutoRefresh( - 'POST', - `${apiBase}/your/endpoint`, - dryRun, - config, - JSON.stringify({ param: args[0] }), - ); - - if (response.status < 200 || response.status >= 300) { - return { status: 'failed', command, dryRun, error: `HTTP ${response.status}: ${response.body}` }; - } - - return { status: 'success', command, dryRun, data: JSON.parse(response.body) }; + if (command !== 'translate') { + return { status: 'failed', command, dryRun, error: `unknown command: ${command}` }; } - return { status: 'failed', command, dryRun, error: `unknown command: ${command}` }; + const [inputPath, targetLanguage = DEFAULT_TARGET_LANGUAGE, outputPath] = args; + if (!inputPath) { + return { + status: 'failed', + command, + dryRun, + error: 'missing image path: translate [target-language] [output-path]', + }; + } + + if (dryRun) { + return { + status: 'success', + command, + dryRun, + data: { + inputPath: resolve(inputPath), + outputPath: resolve(outputPath ? outputPath : deriveOutputPath(resolve(inputPath), targetLanguage)), + targetLanguage, + width: 0, + height: 0, + regions: [], + }, + }; + } + + try { + const data = await translateImage(inputPath, targetLanguage, outputPath); + return { status: 'success', command, dryRun, data }; + } catch (error) { + return { + status: 'failed', + command, + dryRun, + error: error instanceof Error ? error.message : String(error), + }; + } }