feat: initial skill implementation

This commit is contained in:
ivanberry 2026-03-14 11:31:25 +08:00
parent 2c2c24a310
commit 1ce61a50b8
5 changed files with 428 additions and 44 deletions

View File

@ -1,26 +1,34 @@
---
name: my-skill
description: "TODO: describe what this skill does and when to use it."
name: gemini-image-translator
description: Translate text inside images with Gemini 2.5 Flash, then render the translated text back onto the original image. Use this skill when given a local image path and an optional target language.
---
# my-skill
# gemini-image-translator
TODO: one-line description.
Translate text in a local image with Gemini 2.5 Flash OCR + translation, then write the translated text back onto the image with `sharp`.
> Auth (CLIENT_KEY) is loaded automatically from `~/.openclaw/.env`.
Set `GEMINI_API_KEY` in the environment before running.
## Run
```bash
bun scripts/run.ts <command> [args] [--dry-run]
bun scripts/run.ts translate <image-path> [target-language] [output-path] [--dry-run]
```
## Commands
| Command | Description |
|---------|-------------|
| `run <arg>` | TODO: describe |
| `translate <image-path> [target-language] [output-path]` | OCR the image, translate all detected text to the target language (default `zh-CN`), and save a new image with translated text overlaid. |
## Output
Returns JSON: `{ "status": "success" | "failed", "data": ... }`
Returns JSON with `status`, `command`, `dryRun`, and `data`.
On success, `data` includes:
- `inputPath`: resolved source image path
- `outputPath`: generated output image path
- `targetLanguage`: requested target language
- `width` / `height`: image dimensions
- `regions`: detected OCR regions with translated text and pixel bounds

73
bun.lock Normal file
View File

@ -0,0 +1,73 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "gemini-image-translator",
"dependencies": {
"sharp": "^0.34.3",
},
},
},
"packages": {
"@emnapi/runtime": ["@emnapi/runtime@1.9.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-QN75eB0IH2ywSpRpNddCRfQIhmJYBCJ1x5Lb3IscKAL8bMnVAKnRg8dCoXbHzVLLH7P38N2Z3mtulB7W0J0FKw=="],
"@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="],
"@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="],
"@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="],
"@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="],
"@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="],
"@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="],
"@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="],
"@img/sharp-libvips-linux-ppc64": ["@img/sharp-libvips-linux-ppc64@1.2.4", "", { "os": "linux", "cpu": "ppc64" }, "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA=="],
"@img/sharp-libvips-linux-riscv64": ["@img/sharp-libvips-linux-riscv64@1.2.4", "", { "os": "linux", "cpu": "none" }, "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA=="],
"@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.2.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ=="],
"@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="],
"@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="],
"@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="],
"@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="],
"@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="],
"@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="],
"@img/sharp-linux-riscv64": ["@img/sharp-linux-riscv64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-riscv64": "1.2.4" }, "os": "linux", "cpu": "none" }, "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw=="],
"@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="],
"@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="],
"@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="],
"@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="],
"@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="],
"@img/sharp-win32-arm64": ["@img/sharp-win32-arm64@0.34.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g=="],
"@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="],
"@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="],
"detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="],
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
"sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="],
"tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
}
}

View File

@ -1,5 +1,5 @@
{
"name": "my-skill",
"name": "gemini-image-translator",
"version": "0.1.0",
"type": "module",
"scripts": {
@ -7,6 +7,6 @@
"build": "bun build scripts/run.ts --outfile dist/run.js --target bun"
},
"dependencies": {
"@clawd/auth-runtime": "git+http://192.168.0.108:3030/agent-skills/auth-runtime.git"
"sharp": "^0.34.3"
}
}

View File

@ -1,15 +1,13 @@
#!/usr/bin/env bun
import type { Command } from '../src/index.ts';
import { run } from '../src/index.ts';
function printUsage(): void {
console.error(`Usage:
bun scripts/run.ts [--api-base=<url>] <command> [args...] [--dry-run]
bun scripts/run.ts translate <image-path> [target-language] [output-path] [--dry-run]
Commands:
run <arg>
translate <image-path> [target-language] [output-path]
Config: ~/.openclaw/.env (CLIENT_KEY, API_BASE)
Config:
GEMINI_API_KEY=<your-api-key>
`);
}
@ -20,8 +18,6 @@ async function main(): Promise<void> {
for (const arg of process.argv.slice(2)) {
if (arg === '--dry-run') {
dryRun = true;
} else if (arg.startsWith('--api-base=')) {
process.env.API_BASE = arg.slice('--api-base='.length).trim();
} else if (arg === '-h' || arg === '--help') {
printUsage(); process.exit(0);
} else {
@ -31,7 +27,8 @@ async function main(): Promise<void> {
if (positionals.length < 1) { printUsage(); process.exit(1); }
const result = await run(positionals[0] as Command, positionals.slice(1), dryRun);
const { run } = await import('../src/index.ts');
const result = await run(positionals[0] as 'translate', positionals.slice(1), dryRun);
console.log(JSON.stringify(result, null, 2));
}

View File

@ -1,42 +1,348 @@
import {
createEnvConfig,
requestApiWithAutoRefresh,
type ApiResponse,
} from '@clawd/auth-runtime';
import { basename, dirname, extname, join, resolve } from 'node:path';
import { readFile } from 'node:fs/promises';
import sharp from 'sharp';
export type Command = 'run'; // TODO: add your commands
const GEMINI_MODEL = 'gemini-2.5-flash';
const GEMINI_API_URL = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`;
const DEFAULT_TARGET_LANGUAGE = 'zh-CN';
export type Command = 'translate';
export interface TextRegion {
x: number;
y: number;
width: number;
height: number;
sourceText: string;
translatedText: string;
}
export interface RunResult {
status: 'success' | 'failed';
command: Command;
dryRun: boolean;
data?: unknown;
data?: {
inputPath: string;
outputPath: string;
targetLanguage: string;
width: number;
height: number;
regions: TextRegion[];
};
error?: string;
}
interface GeminiRegion {
x: number;
y: number;
width: number;
height: number;
sourceText: string;
translatedText: string;
}
interface GeminiResponse {
regions?: GeminiRegion[];
}
function ensureApiKey(): string {
const apiKey = process.env.GEMINI_API_KEY?.trim();
if (!apiKey) {
throw new Error('Missing GEMINI_API_KEY');
}
return apiKey;
}
function buildPrompt(targetLanguage: string): string {
return [
'You are an OCR and image translation engine.',
`Detect all visible text in the image and translate it into ${targetLanguage}.`,
'Return strict JSON only. No markdown, no commentary.',
'Use this exact schema:',
'{"regions":[{"x":0,"y":0,"width":0,"height":0,"sourceText":"","translatedText":""}]}',
'Coordinates must be normalized floats between 0 and 1 relative to the full image.',
'Each region should cover one coherent text block.',
'If text is already in the target language, keep translatedText close to sourceText.',
'If no text is found, return {"regions":[]}.',
].join('\n');
}
function extractJson(text: string): GeminiResponse {
const trimmed = text.trim();
const fenced = trimmed.match(/```json\s*([\s\S]*?)\s*```/i) ?? trimmed.match(/```\s*([\s\S]*?)\s*```/);
const jsonText = fenced ? fenced[1] : trimmed;
return JSON.parse(jsonText) as GeminiResponse;
}
function clampUnit(value: number): number {
if (!Number.isFinite(value)) {
return 0;
}
if (value < 0) {
return 0;
}
if (value > 1) {
return 1;
}
return value;
}
function sanitizeRegions(regions: GeminiRegion[], width: number, height: number): TextRegion[] {
return regions
.map((region) => {
const x = clampUnit(region.x);
const y = clampUnit(region.y);
const maxWidth = 1 - x;
const maxHeight = 1 - y;
const normalizedWidth = Math.min(clampUnit(region.width), maxWidth);
const normalizedHeight = Math.min(clampUnit(region.height), maxHeight);
const sourceText = String(region.sourceText ?? '').trim();
const translatedText = String(region.translatedText ?? '').trim();
return {
x: Math.round(x * width),
y: Math.round(y * height),
width: Math.max(1, Math.round(normalizedWidth * width)),
height: Math.max(1, Math.round(normalizedHeight * height)),
sourceText,
translatedText,
};
})
.filter((region) => region.sourceText && region.translatedText);
}
function escapeXml(text: string): string {
return text
.replaceAll('&', '&amp;')
.replaceAll('<', '&lt;')
.replaceAll('>', '&gt;')
.replaceAll('"', '&quot;')
.replaceAll("'", '&apos;');
}
function createWrappedText(text: string, maxCharsPerLine: number): string[] {
const collapsed = text.replace(/\s+/g, ' ').trim();
if (!collapsed) {
return [];
}
const useWordBoundaries = collapsed.includes(' ');
const words = useWordBoundaries ? collapsed.split(' ') : Array.from(collapsed);
const lines: string[] = [];
let current = '';
for (const word of words) {
const nextLine = useWordBoundaries
? (current ? `${current} ${word}` : word)
: `${current}${word}`;
if (nextLine.length <= maxCharsPerLine || !current) {
current = nextLine;
} else {
lines.push(current);
current = word;
}
}
if (current) {
lines.push(current);
}
return lines;
}
function buildRegionSvg(region: TextRegion): string {
const padding = Math.max(6, Math.round(Math.min(region.width, region.height) * 0.06));
const innerWidth = Math.max(1, region.width - padding * 2);
const innerHeight = Math.max(1, region.height - padding * 2);
const fontSize = Math.max(14, Math.round(region.height * 0.22));
const lineHeight = Math.max(fontSize * 1.2, 16);
const maxCharsPerLine = Math.max(6, Math.floor(innerWidth / Math.max(fontSize * 0.55, 1)));
const rawLines = createWrappedText(region.translatedText, maxCharsPerLine);
const maxLines = Math.max(1, Math.floor(innerHeight / lineHeight));
const lines = rawLines.slice(0, maxLines);
if (rawLines.length > maxLines && lines.length > 0) {
const last = lines[lines.length - 1];
lines[lines.length - 1] = `${last.slice(0, Math.max(1, last.length - 1)).trimEnd()}`;
}
const textHeight = lines.length * lineHeight;
const startY = padding + Math.max(fontSize, (innerHeight - textHeight) / 2 + fontSize * 0.9);
const tspans = lines
.map((line, index) => {
const dy = index === 0 ? 0 : lineHeight;
return `<tspan x="${padding}" dy="${dy}">${escapeXml(line)}</tspan>`;
})
.join('');
return [
`<svg width="${region.width}" height="${region.height}" xmlns="http://www.w3.org/2000/svg">`,
`<rect width="100%" height="100%" rx="${Math.max(4, Math.round(padding * 0.6))}" fill="white" fill-opacity="0.92"/>`,
'<text',
` x="${padding}"`,
` y="${startY}"`,
` font-size="${fontSize}"`,
' font-family="Arial, PingFang SC, Microsoft YaHei, Noto Sans CJK SC, sans-serif"',
' font-weight="600"',
' fill="#111827"',
'>',
tspans,
'</text>',
'</svg>',
].join('');
}
async function callGemini(imageBuffer: Buffer, mimeType: string, targetLanguage: string): Promise<GeminiResponse> {
const apiKey = ensureApiKey();
const response = await fetch(`${GEMINI_API_URL}?key=${encodeURIComponent(apiKey)}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
contents: [
{
parts: [
{ text: buildPrompt(targetLanguage) },
{
inlineData: {
mimeType,
data: imageBuffer.toString('base64'),
},
},
],
},
],
generationConfig: {
temperature: 0.1,
responseMimeType: 'application/json',
},
}),
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Gemini API request failed: HTTP ${response.status}: ${errorText}`);
}
const payload = await response.json() as {
candidates?: Array<{
content?: {
parts?: Array<{ text?: string }>;
};
}>;
};
const text = payload.candidates?.[0]?.content?.parts?.map((part) => part.text ?? '').join('').trim();
if (!text) {
throw new Error('Gemini API returned an empty response');
}
return extractJson(text);
}
function inferMimeType(inputPath: string): string {
const extension = extname(inputPath).toLowerCase();
if (extension === '.png') {
return 'image/png';
}
if (extension === '.webp') {
return 'image/webp';
}
if (extension === '.gif') {
return 'image/gif';
}
return 'image/jpeg';
}
function deriveOutputPath(inputPath: string, targetLanguage: string): string {
const extension = extname(inputPath);
const name = basename(inputPath, extension);
const safeLanguage = targetLanguage.replace(/[^a-z0-9]+/gi, '-').replace(/^-+|-+$/g, '').toLowerCase() || 'translated';
return join(dirname(inputPath), `${name}.${safeLanguage}.translated${extension || '.png'}`);
}
async function translateImage(
inputPath: string,
targetLanguage: string,
outputPath?: string,
): Promise<RunResult['data']> {
const resolvedInputPath = resolve(inputPath);
const resolvedOutputPath = resolve(outputPath ? outputPath : deriveOutputPath(resolvedInputPath, targetLanguage));
const imageBuffer = await readFile(resolvedInputPath);
const baseImage = sharp(imageBuffer);
const metadata = await baseImage.metadata();
const width = metadata.width;
const height = metadata.height;
if (!width || !height) {
throw new Error('Unable to determine image dimensions');
}
const geminiResponse = await callGemini(imageBuffer, inferMimeType(resolvedInputPath), targetLanguage);
const regions = sanitizeRegions(geminiResponse.regions ?? [], width, height);
const overlays = regions.map((region) => ({
input: Buffer.from(buildRegionSvg(region)),
left: region.x,
top: region.y,
}));
await baseImage.composite(overlays).toFile(resolvedOutputPath);
return {
inputPath: resolvedInputPath,
outputPath: resolvedOutputPath,
targetLanguage,
width,
height,
regions,
};
}
export async function run(
command: Command,
args: string[],
dryRun: boolean,
): Promise<RunResult> {
const config = createEnvConfig();
const apiBase = (process.env.API_BASE ?? 'https://api-gw-test.yuanwei-lnc.com').replace(/\/$/, '');
if (command === 'run') {
const response: ApiResponse = await requestApiWithAutoRefresh(
'POST',
`${apiBase}/your/endpoint`,
dryRun,
config,
JSON.stringify({ param: args[0] }),
);
if (response.status < 200 || response.status >= 300) {
return { status: 'failed', command, dryRun, error: `HTTP ${response.status}: ${response.body}` };
}
return { status: 'success', command, dryRun, data: JSON.parse(response.body) };
if (command !== 'translate') {
return { status: 'failed', command, dryRun, error: `unknown command: ${command}` };
}
return { status: 'failed', command, dryRun, error: `unknown command: ${command}` };
const [inputPath, targetLanguage = DEFAULT_TARGET_LANGUAGE, outputPath] = args;
if (!inputPath) {
return {
status: 'failed',
command,
dryRun,
error: 'missing image path: translate <image-path> [target-language] [output-path]',
};
}
if (dryRun) {
return {
status: 'success',
command,
dryRun,
data: {
inputPath: resolve(inputPath),
outputPath: resolve(outputPath ? outputPath : deriveOutputPath(resolve(inputPath), targetLanguage)),
targetLanguage,
width: 0,
height: 0,
regions: [],
},
};
}
try {
const data = await translateImage(inputPath, targetLanguage, outputPath);
return { status: 'success', command, dryRun, data };
} catch (error) {
return {
status: 'failed',
command,
dryRun,
error: error instanceof Error ? error.message : String(error),
};
}
}