generated from agent-skills/template-skill
feat: initial skill implementation
This commit is contained in:
parent
2c2c24a310
commit
1ce61a50b8
24
SKILL.md
24
SKILL.md
|
|
@ -1,26 +1,34 @@
|
||||||
---
|
---
|
||||||
name: my-skill
|
name: gemini-image-translator
|
||||||
description: "TODO: describe what this skill does and when to use it."
|
description: Translate text inside images with Gemini 2.5 Flash, then render the translated text back onto the original image. Use this skill when given a local image path and an optional target language.
|
||||||
---
|
---
|
||||||
|
|
||||||
# my-skill
|
# gemini-image-translator
|
||||||
|
|
||||||
TODO: one-line description.
|
Translate text in a local image with Gemini 2.5 Flash OCR + translation, then write the translated text back onto the image with `sharp`.
|
||||||
|
|
||||||
> Auth (CLIENT_KEY) is loaded automatically from `~/.openclaw/.env`.
|
Set `GEMINI_API_KEY` in the environment before running.
|
||||||
|
|
||||||
## Run
|
## Run
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bun scripts/run.ts <command> [args] [--dry-run]
|
bun scripts/run.ts translate <image-path> [target-language] [output-path] [--dry-run]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Commands
|
## Commands
|
||||||
|
|
||||||
| Command | Description |
|
| Command | Description |
|
||||||
|---------|-------------|
|
|---------|-------------|
|
||||||
| `run <arg>` | TODO: describe |
|
| `translate <image-path> [target-language] [output-path]` | OCR the image, translate all detected text to the target language (default `zh-CN`), and save a new image with translated text overlaid. |
|
||||||
|
|
||||||
## Output
|
## Output
|
||||||
|
|
||||||
Returns JSON: `{ "status": "success" | "failed", "data": ... }`
|
Returns JSON with `status`, `command`, `dryRun`, and `data`.
|
||||||
|
|
||||||
|
On success, `data` includes:
|
||||||
|
|
||||||
|
- `inputPath`: resolved source image path
|
||||||
|
- `outputPath`: generated output image path
|
||||||
|
- `targetLanguage`: requested target language
|
||||||
|
- `width` / `height`: image dimensions
|
||||||
|
- `regions`: detected OCR regions with translated text and pixel bounds
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
{
|
||||||
|
"lockfileVersion": 1,
|
||||||
|
"configVersion": 1,
|
||||||
|
"workspaces": {
|
||||||
|
"": {
|
||||||
|
"name": "gemini-image-translator",
|
||||||
|
"dependencies": {
|
||||||
|
"sharp": "^0.34.3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"packages": {
|
||||||
|
"@emnapi/runtime": ["@emnapi/runtime@1.9.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-QN75eB0IH2ywSpRpNddCRfQIhmJYBCJ1x5Lb3IscKAL8bMnVAKnRg8dCoXbHzVLLH7P38N2Z3mtulB7W0J0FKw=="],
|
||||||
|
|
||||||
|
"@img/colour": ["@img/colour@1.1.0", "", {}, "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ=="],
|
||||||
|
|
||||||
|
"@img/sharp-darwin-arm64": ["@img/sharp-darwin-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-arm64": "1.2.4" }, "os": "darwin", "cpu": "arm64" }, "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w=="],
|
||||||
|
|
||||||
|
"@img/sharp-darwin-x64": ["@img/sharp-darwin-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-darwin-x64": "1.2.4" }, "os": "darwin", "cpu": "x64" }, "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-darwin-arm64": ["@img/sharp-libvips-darwin-arm64@1.2.4", "", { "os": "darwin", "cpu": "arm64" }, "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-darwin-x64": ["@img/sharp-libvips-darwin-x64@1.2.4", "", { "os": "darwin", "cpu": "x64" }, "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-arm": ["@img/sharp-libvips-linux-arm@1.2.4", "", { "os": "linux", "cpu": "arm" }, "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-arm64": ["@img/sharp-libvips-linux-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-ppc64": ["@img/sharp-libvips-linux-ppc64@1.2.4", "", { "os": "linux", "cpu": "ppc64" }, "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-riscv64": ["@img/sharp-libvips-linux-riscv64@1.2.4", "", { "os": "linux", "cpu": "none" }, "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-s390x": ["@img/sharp-libvips-linux-s390x@1.2.4", "", { "os": "linux", "cpu": "s390x" }, "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linux-x64": ["@img/sharp-libvips-linux-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linuxmusl-arm64": ["@img/sharp-libvips-linuxmusl-arm64@1.2.4", "", { "os": "linux", "cpu": "arm64" }, "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw=="],
|
||||||
|
|
||||||
|
"@img/sharp-libvips-linuxmusl-x64": ["@img/sharp-libvips-linuxmusl-x64@1.2.4", "", { "os": "linux", "cpu": "x64" }, "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-arm": ["@img/sharp-linux-arm@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm": "1.2.4" }, "os": "linux", "cpu": "arm" }, "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-arm64": ["@img/sharp-linux-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-ppc64": ["@img/sharp-linux-ppc64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-ppc64": "1.2.4" }, "os": "linux", "cpu": "ppc64" }, "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-riscv64": ["@img/sharp-linux-riscv64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-riscv64": "1.2.4" }, "os": "linux", "cpu": "none" }, "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-s390x": ["@img/sharp-linux-s390x@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-s390x": "1.2.4" }, "os": "linux", "cpu": "s390x" }, "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg=="],
|
||||||
|
|
||||||
|
"@img/sharp-linux-x64": ["@img/sharp-linux-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linux-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ=="],
|
||||||
|
|
||||||
|
"@img/sharp-linuxmusl-arm64": ["@img/sharp-linuxmusl-arm64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-arm64": "1.2.4" }, "os": "linux", "cpu": "arm64" }, "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg=="],
|
||||||
|
|
||||||
|
"@img/sharp-linuxmusl-x64": ["@img/sharp-linuxmusl-x64@0.34.5", "", { "optionalDependencies": { "@img/sharp-libvips-linuxmusl-x64": "1.2.4" }, "os": "linux", "cpu": "x64" }, "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q=="],
|
||||||
|
|
||||||
|
"@img/sharp-wasm32": ["@img/sharp-wasm32@0.34.5", "", { "dependencies": { "@emnapi/runtime": "^1.7.0" }, "cpu": "none" }, "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw=="],
|
||||||
|
|
||||||
|
"@img/sharp-win32-arm64": ["@img/sharp-win32-arm64@0.34.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g=="],
|
||||||
|
|
||||||
|
"@img/sharp-win32-ia32": ["@img/sharp-win32-ia32@0.34.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg=="],
|
||||||
|
|
||||||
|
"@img/sharp-win32-x64": ["@img/sharp-win32-x64@0.34.5", "", { "os": "win32", "cpu": "x64" }, "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw=="],
|
||||||
|
|
||||||
|
"detect-libc": ["detect-libc@2.1.2", "", {}, "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ=="],
|
||||||
|
|
||||||
|
"semver": ["semver@7.7.4", "", { "bin": { "semver": "bin/semver.js" } }, "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA=="],
|
||||||
|
|
||||||
|
"sharp": ["sharp@0.34.5", "", { "dependencies": { "@img/colour": "^1.0.0", "detect-libc": "^2.1.2", "semver": "^7.7.3" }, "optionalDependencies": { "@img/sharp-darwin-arm64": "0.34.5", "@img/sharp-darwin-x64": "0.34.5", "@img/sharp-libvips-darwin-arm64": "1.2.4", "@img/sharp-libvips-darwin-x64": "1.2.4", "@img/sharp-libvips-linux-arm": "1.2.4", "@img/sharp-libvips-linux-arm64": "1.2.4", "@img/sharp-libvips-linux-ppc64": "1.2.4", "@img/sharp-libvips-linux-riscv64": "1.2.4", "@img/sharp-libvips-linux-s390x": "1.2.4", "@img/sharp-libvips-linux-x64": "1.2.4", "@img/sharp-libvips-linuxmusl-arm64": "1.2.4", "@img/sharp-libvips-linuxmusl-x64": "1.2.4", "@img/sharp-linux-arm": "0.34.5", "@img/sharp-linux-arm64": "0.34.5", "@img/sharp-linux-ppc64": "0.34.5", "@img/sharp-linux-riscv64": "0.34.5", "@img/sharp-linux-s390x": "0.34.5", "@img/sharp-linux-x64": "0.34.5", "@img/sharp-linuxmusl-arm64": "0.34.5", "@img/sharp-linuxmusl-x64": "0.34.5", "@img/sharp-wasm32": "0.34.5", "@img/sharp-win32-arm64": "0.34.5", "@img/sharp-win32-ia32": "0.34.5", "@img/sharp-win32-x64": "0.34.5" } }, "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg=="],
|
||||||
|
|
||||||
|
"tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"name": "my-skill",
|
"name": "gemini-image-translator",
|
||||||
"version": "0.1.0",
|
"version": "0.1.0",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
|
@ -7,6 +7,6 @@
|
||||||
"build": "bun build scripts/run.ts --outfile dist/run.js --target bun"
|
"build": "bun build scripts/run.ts --outfile dist/run.js --target bun"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@clawd/auth-runtime": "git+http://192.168.0.108:3030/agent-skills/auth-runtime.git"
|
"sharp": "^0.34.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,13 @@
|
||||||
#!/usr/bin/env bun
|
#!/usr/bin/env bun
|
||||||
import type { Command } from '../src/index.ts';
|
|
||||||
import { run } from '../src/index.ts';
|
|
||||||
|
|
||||||
function printUsage(): void {
|
function printUsage(): void {
|
||||||
console.error(`Usage:
|
console.error(`Usage:
|
||||||
bun scripts/run.ts [--api-base=<url>] <command> [args...] [--dry-run]
|
bun scripts/run.ts translate <image-path> [target-language] [output-path] [--dry-run]
|
||||||
|
|
||||||
Commands:
|
Commands:
|
||||||
run <arg>
|
translate <image-path> [target-language] [output-path]
|
||||||
|
|
||||||
Config: ~/.openclaw/.env (CLIENT_KEY, API_BASE)
|
Config:
|
||||||
|
GEMINI_API_KEY=<your-api-key>
|
||||||
`);
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -20,8 +18,6 @@ async function main(): Promise<void> {
|
||||||
for (const arg of process.argv.slice(2)) {
|
for (const arg of process.argv.slice(2)) {
|
||||||
if (arg === '--dry-run') {
|
if (arg === '--dry-run') {
|
||||||
dryRun = true;
|
dryRun = true;
|
||||||
} else if (arg.startsWith('--api-base=')) {
|
|
||||||
process.env.API_BASE = arg.slice('--api-base='.length).trim();
|
|
||||||
} else if (arg === '-h' || arg === '--help') {
|
} else if (arg === '-h' || arg === '--help') {
|
||||||
printUsage(); process.exit(0);
|
printUsage(); process.exit(0);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -31,7 +27,8 @@ async function main(): Promise<void> {
|
||||||
|
|
||||||
if (positionals.length < 1) { printUsage(); process.exit(1); }
|
if (positionals.length < 1) { printUsage(); process.exit(1); }
|
||||||
|
|
||||||
const result = await run(positionals[0] as Command, positionals.slice(1), dryRun);
|
const { run } = await import('../src/index.ts');
|
||||||
|
const result = await run(positionals[0] as 'translate', positionals.slice(1), dryRun);
|
||||||
console.log(JSON.stringify(result, null, 2));
|
console.log(JSON.stringify(result, null, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
356
src/index.ts
356
src/index.ts
|
|
@ -1,42 +1,348 @@
|
||||||
import {
|
import { basename, dirname, extname, join, resolve } from 'node:path';
|
||||||
createEnvConfig,
|
import { readFile } from 'node:fs/promises';
|
||||||
requestApiWithAutoRefresh,
|
import sharp from 'sharp';
|
||||||
type ApiResponse,
|
|
||||||
} from '@clawd/auth-runtime';
|
|
||||||
|
|
||||||
export type Command = 'run'; // TODO: add your commands
|
const GEMINI_MODEL = 'gemini-2.5-flash';
|
||||||
|
const GEMINI_API_URL = `https://generativelanguage.googleapis.com/v1beta/models/${GEMINI_MODEL}:generateContent`;
|
||||||
|
const DEFAULT_TARGET_LANGUAGE = 'zh-CN';
|
||||||
|
|
||||||
|
export type Command = 'translate';
|
||||||
|
|
||||||
|
export interface TextRegion {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
sourceText: string;
|
||||||
|
translatedText: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface RunResult {
|
export interface RunResult {
|
||||||
status: 'success' | 'failed';
|
status: 'success' | 'failed';
|
||||||
command: Command;
|
command: Command;
|
||||||
dryRun: boolean;
|
dryRun: boolean;
|
||||||
data?: unknown;
|
data?: {
|
||||||
|
inputPath: string;
|
||||||
|
outputPath: string;
|
||||||
|
targetLanguage: string;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
regions: TextRegion[];
|
||||||
|
};
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface GeminiRegion {
|
||||||
|
x: number;
|
||||||
|
y: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
sourceText: string;
|
||||||
|
translatedText: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GeminiResponse {
|
||||||
|
regions?: GeminiRegion[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureApiKey(): string {
|
||||||
|
const apiKey = process.env.GEMINI_API_KEY?.trim();
|
||||||
|
if (!apiKey) {
|
||||||
|
throw new Error('Missing GEMINI_API_KEY');
|
||||||
|
}
|
||||||
|
return apiKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildPrompt(targetLanguage: string): string {
|
||||||
|
return [
|
||||||
|
'You are an OCR and image translation engine.',
|
||||||
|
`Detect all visible text in the image and translate it into ${targetLanguage}.`,
|
||||||
|
'Return strict JSON only. No markdown, no commentary.',
|
||||||
|
'Use this exact schema:',
|
||||||
|
'{"regions":[{"x":0,"y":0,"width":0,"height":0,"sourceText":"","translatedText":""}]}',
|
||||||
|
'Coordinates must be normalized floats between 0 and 1 relative to the full image.',
|
||||||
|
'Each region should cover one coherent text block.',
|
||||||
|
'If text is already in the target language, keep translatedText close to sourceText.',
|
||||||
|
'If no text is found, return {"regions":[]}.',
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractJson(text: string): GeminiResponse {
|
||||||
|
const trimmed = text.trim();
|
||||||
|
const fenced = trimmed.match(/```json\s*([\s\S]*?)\s*```/i) ?? trimmed.match(/```\s*([\s\S]*?)\s*```/);
|
||||||
|
const jsonText = fenced ? fenced[1] : trimmed;
|
||||||
|
return JSON.parse(jsonText) as GeminiResponse;
|
||||||
|
}
|
||||||
|
|
||||||
|
function clampUnit(value: number): number {
|
||||||
|
if (!Number.isFinite(value)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (value < 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (value > 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
function sanitizeRegions(regions: GeminiRegion[], width: number, height: number): TextRegion[] {
|
||||||
|
return regions
|
||||||
|
.map((region) => {
|
||||||
|
const x = clampUnit(region.x);
|
||||||
|
const y = clampUnit(region.y);
|
||||||
|
const maxWidth = 1 - x;
|
||||||
|
const maxHeight = 1 - y;
|
||||||
|
const normalizedWidth = Math.min(clampUnit(region.width), maxWidth);
|
||||||
|
const normalizedHeight = Math.min(clampUnit(region.height), maxHeight);
|
||||||
|
const sourceText = String(region.sourceText ?? '').trim();
|
||||||
|
const translatedText = String(region.translatedText ?? '').trim();
|
||||||
|
|
||||||
|
return {
|
||||||
|
x: Math.round(x * width),
|
||||||
|
y: Math.round(y * height),
|
||||||
|
width: Math.max(1, Math.round(normalizedWidth * width)),
|
||||||
|
height: Math.max(1, Math.round(normalizedHeight * height)),
|
||||||
|
sourceText,
|
||||||
|
translatedText,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((region) => region.sourceText && region.translatedText);
|
||||||
|
}
|
||||||
|
|
||||||
|
function escapeXml(text: string): string {
|
||||||
|
return text
|
||||||
|
.replaceAll('&', '&')
|
||||||
|
.replaceAll('<', '<')
|
||||||
|
.replaceAll('>', '>')
|
||||||
|
.replaceAll('"', '"')
|
||||||
|
.replaceAll("'", ''');
|
||||||
|
}
|
||||||
|
|
||||||
|
function createWrappedText(text: string, maxCharsPerLine: number): string[] {
|
||||||
|
const collapsed = text.replace(/\s+/g, ' ').trim();
|
||||||
|
if (!collapsed) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const useWordBoundaries = collapsed.includes(' ');
|
||||||
|
const words = useWordBoundaries ? collapsed.split(' ') : Array.from(collapsed);
|
||||||
|
const lines: string[] = [];
|
||||||
|
let current = '';
|
||||||
|
|
||||||
|
for (const word of words) {
|
||||||
|
const nextLine = useWordBoundaries
|
||||||
|
? (current ? `${current} ${word}` : word)
|
||||||
|
: `${current}${word}`;
|
||||||
|
if (nextLine.length <= maxCharsPerLine || !current) {
|
||||||
|
current = nextLine;
|
||||||
|
} else {
|
||||||
|
lines.push(current);
|
||||||
|
current = word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current) {
|
||||||
|
lines.push(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRegionSvg(region: TextRegion): string {
|
||||||
|
const padding = Math.max(6, Math.round(Math.min(region.width, region.height) * 0.06));
|
||||||
|
const innerWidth = Math.max(1, region.width - padding * 2);
|
||||||
|
const innerHeight = Math.max(1, region.height - padding * 2);
|
||||||
|
const fontSize = Math.max(14, Math.round(region.height * 0.22));
|
||||||
|
const lineHeight = Math.max(fontSize * 1.2, 16);
|
||||||
|
const maxCharsPerLine = Math.max(6, Math.floor(innerWidth / Math.max(fontSize * 0.55, 1)));
|
||||||
|
const rawLines = createWrappedText(region.translatedText, maxCharsPerLine);
|
||||||
|
const maxLines = Math.max(1, Math.floor(innerHeight / lineHeight));
|
||||||
|
const lines = rawLines.slice(0, maxLines);
|
||||||
|
|
||||||
|
if (rawLines.length > maxLines && lines.length > 0) {
|
||||||
|
const last = lines[lines.length - 1];
|
||||||
|
lines[lines.length - 1] = `${last.slice(0, Math.max(1, last.length - 1)).trimEnd()}…`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const textHeight = lines.length * lineHeight;
|
||||||
|
const startY = padding + Math.max(fontSize, (innerHeight - textHeight) / 2 + fontSize * 0.9);
|
||||||
|
const tspans = lines
|
||||||
|
.map((line, index) => {
|
||||||
|
const dy = index === 0 ? 0 : lineHeight;
|
||||||
|
return `<tspan x="${padding}" dy="${dy}">${escapeXml(line)}</tspan>`;
|
||||||
|
})
|
||||||
|
.join('');
|
||||||
|
|
||||||
|
return [
|
||||||
|
`<svg width="${region.width}" height="${region.height}" xmlns="http://www.w3.org/2000/svg">`,
|
||||||
|
`<rect width="100%" height="100%" rx="${Math.max(4, Math.round(padding * 0.6))}" fill="white" fill-opacity="0.92"/>`,
|
||||||
|
'<text',
|
||||||
|
` x="${padding}"`,
|
||||||
|
` y="${startY}"`,
|
||||||
|
` font-size="${fontSize}"`,
|
||||||
|
' font-family="Arial, PingFang SC, Microsoft YaHei, Noto Sans CJK SC, sans-serif"',
|
||||||
|
' font-weight="600"',
|
||||||
|
' fill="#111827"',
|
||||||
|
'>',
|
||||||
|
tspans,
|
||||||
|
'</text>',
|
||||||
|
'</svg>',
|
||||||
|
].join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
async function callGemini(imageBuffer: Buffer, mimeType: string, targetLanguage: string): Promise<GeminiResponse> {
|
||||||
|
const apiKey = ensureApiKey();
|
||||||
|
const response = await fetch(`${GEMINI_API_URL}?key=${encodeURIComponent(apiKey)}`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
contents: [
|
||||||
|
{
|
||||||
|
parts: [
|
||||||
|
{ text: buildPrompt(targetLanguage) },
|
||||||
|
{
|
||||||
|
inlineData: {
|
||||||
|
mimeType,
|
||||||
|
data: imageBuffer.toString('base64'),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
generationConfig: {
|
||||||
|
temperature: 0.1,
|
||||||
|
responseMimeType: 'application/json',
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
throw new Error(`Gemini API request failed: HTTP ${response.status}: ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = await response.json() as {
|
||||||
|
candidates?: Array<{
|
||||||
|
content?: {
|
||||||
|
parts?: Array<{ text?: string }>;
|
||||||
|
};
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
|
||||||
|
const text = payload.candidates?.[0]?.content?.parts?.map((part) => part.text ?? '').join('').trim();
|
||||||
|
if (!text) {
|
||||||
|
throw new Error('Gemini API returned an empty response');
|
||||||
|
}
|
||||||
|
|
||||||
|
return extractJson(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
function inferMimeType(inputPath: string): string {
|
||||||
|
const extension = extname(inputPath).toLowerCase();
|
||||||
|
if (extension === '.png') {
|
||||||
|
return 'image/png';
|
||||||
|
}
|
||||||
|
if (extension === '.webp') {
|
||||||
|
return 'image/webp';
|
||||||
|
}
|
||||||
|
if (extension === '.gif') {
|
||||||
|
return 'image/gif';
|
||||||
|
}
|
||||||
|
return 'image/jpeg';
|
||||||
|
}
|
||||||
|
|
||||||
|
function deriveOutputPath(inputPath: string, targetLanguage: string): string {
|
||||||
|
const extension = extname(inputPath);
|
||||||
|
const name = basename(inputPath, extension);
|
||||||
|
const safeLanguage = targetLanguage.replace(/[^a-z0-9]+/gi, '-').replace(/^-+|-+$/g, '').toLowerCase() || 'translated';
|
||||||
|
return join(dirname(inputPath), `${name}.${safeLanguage}.translated${extension || '.png'}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function translateImage(
|
||||||
|
inputPath: string,
|
||||||
|
targetLanguage: string,
|
||||||
|
outputPath?: string,
|
||||||
|
): Promise<RunResult['data']> {
|
||||||
|
const resolvedInputPath = resolve(inputPath);
|
||||||
|
const resolvedOutputPath = resolve(outputPath ? outputPath : deriveOutputPath(resolvedInputPath, targetLanguage));
|
||||||
|
const imageBuffer = await readFile(resolvedInputPath);
|
||||||
|
const baseImage = sharp(imageBuffer);
|
||||||
|
const metadata = await baseImage.metadata();
|
||||||
|
const width = metadata.width;
|
||||||
|
const height = metadata.height;
|
||||||
|
|
||||||
|
if (!width || !height) {
|
||||||
|
throw new Error('Unable to determine image dimensions');
|
||||||
|
}
|
||||||
|
|
||||||
|
const geminiResponse = await callGemini(imageBuffer, inferMimeType(resolvedInputPath), targetLanguage);
|
||||||
|
const regions = sanitizeRegions(geminiResponse.regions ?? [], width, height);
|
||||||
|
const overlays = regions.map((region) => ({
|
||||||
|
input: Buffer.from(buildRegionSvg(region)),
|
||||||
|
left: region.x,
|
||||||
|
top: region.y,
|
||||||
|
}));
|
||||||
|
|
||||||
|
await baseImage.composite(overlays).toFile(resolvedOutputPath);
|
||||||
|
|
||||||
|
return {
|
||||||
|
inputPath: resolvedInputPath,
|
||||||
|
outputPath: resolvedOutputPath,
|
||||||
|
targetLanguage,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
regions,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export async function run(
|
export async function run(
|
||||||
command: Command,
|
command: Command,
|
||||||
args: string[],
|
args: string[],
|
||||||
dryRun: boolean,
|
dryRun: boolean,
|
||||||
): Promise<RunResult> {
|
): Promise<RunResult> {
|
||||||
const config = createEnvConfig();
|
if (command !== 'translate') {
|
||||||
const apiBase = (process.env.API_BASE ?? 'https://api-gw-test.yuanwei-lnc.com').replace(/\/$/, '');
|
return { status: 'failed', command, dryRun, error: `unknown command: ${command}` };
|
||||||
|
|
||||||
if (command === 'run') {
|
|
||||||
const response: ApiResponse = await requestApiWithAutoRefresh(
|
|
||||||
'POST',
|
|
||||||
`${apiBase}/your/endpoint`,
|
|
||||||
dryRun,
|
|
||||||
config,
|
|
||||||
JSON.stringify({ param: args[0] }),
|
|
||||||
);
|
|
||||||
|
|
||||||
if (response.status < 200 || response.status >= 300) {
|
|
||||||
return { status: 'failed', command, dryRun, error: `HTTP ${response.status}: ${response.body}` };
|
|
||||||
}
|
|
||||||
|
|
||||||
return { status: 'success', command, dryRun, data: JSON.parse(response.body) };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return { status: 'failed', command, dryRun, error: `unknown command: ${command}` };
|
const [inputPath, targetLanguage = DEFAULT_TARGET_LANGUAGE, outputPath] = args;
|
||||||
|
if (!inputPath) {
|
||||||
|
return {
|
||||||
|
status: 'failed',
|
||||||
|
command,
|
||||||
|
dryRun,
|
||||||
|
error: 'missing image path: translate <image-path> [target-language] [output-path]',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dryRun) {
|
||||||
|
return {
|
||||||
|
status: 'success',
|
||||||
|
command,
|
||||||
|
dryRun,
|
||||||
|
data: {
|
||||||
|
inputPath: resolve(inputPath),
|
||||||
|
outputPath: resolve(outputPath ? outputPath : deriveOutputPath(resolve(inputPath), targetLanguage)),
|
||||||
|
targetLanguage,
|
||||||
|
width: 0,
|
||||||
|
height: 0,
|
||||||
|
regions: [],
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = await translateImage(inputPath, targetLanguage, outputPath);
|
||||||
|
return { status: 'success', command, dryRun, data };
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
status: 'failed',
|
||||||
|
command,
|
||||||
|
dryRun,
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue