refactor: load vision config and search endpoints from auth client-config, no skill-level envs needed

2026-04-20 12:14:43 +08:00 · 2026-04-20 12:14:43 +08:00 · 23bb268d75
parent fa03962216
commit 23bb268d75
5 changed files with 107 additions and 128 deletions
--- a/.env.example
+++ b/.env.example
@ -1,45 +1,29 @@
 # =============================================================================
 # video-product-snapshot 环境变量配置
-# 复制为 .env 并填入真实值：cp .env.example .env
+# =============================================================================
+#
+# 只需在 ~/.openclaw/.env 中配置 CLIENT_KEY：
+#   CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
+#
+# Vision API key、图搜接口等均通过 auth-rt client-config 自动获取，
+# 无需在此手动填写。
 # =============================================================================

 # -----------------------------------------------------------------------------
-# Vision API 配置（用于商品帧检测）
-# 兼容任何 OpenAI 格式接口：OpenAI / Groq / Together / 本地 Ollama 等
+# 可选覆盖（通常不需要）
 # -----------------------------------------------------------------------------

-# API Key（必填）
-VISION_API_KEY=your-api-key-here
-
-# API Base URL（可选，留空则使用 OpenAI 官方地址）
-# VISION_API_BASE=https://api.groq.com/openai/v1
-# VISION_API_BASE=http://localhost:11434/v1
-
-# 模型名称（可选，默认 gpt-4o-mini）
+# 覆盖 Vision 模型（默认来自 client config，fallback 为 gpt-4o-mini）
 # VISION_MODEL=gpt-4o-mini
-# VISION_MODEL=meta-llama/llama-4-scout-17b-16e-instruct
-# VISION_MODEL=llava:13b

-# -----------------------------------------------------------------------------
-# 1688 图搜配置（via woo-data-scrawler 本地服务，端口 3202）
-# 所有 Onebound 调用均通过本地服务代理，无需持有 API 密钥
-# -----------------------------------------------------------------------------
+# 覆盖 Vision API base URL（默认来自 client config metadata.provider.base_url）
+# VISION_API_BASE=https://your-llm-endpoint/v1

-# 上传图片接口（将本地图片上传到公共存储，获取可访问 URL）
-ONEBOUND_UPLOAD_ENDPOINT=http://localhost:3202/api/v1/tasks/upload-image
+# 覆盖 Vision API key（默认来自 client config metadata.provider.api_key）
+# VISION_API_KEY=sk-...

-# 以图搜图接口
-ONEBOUND_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/search-by-image
+# 覆盖 auth-rt 二进制路径
+# AUTH_RT_BIN=/custom/path/to/auth-rt

-# 关键词搜索接口（用于 rerank 二次过滤）
-ONEBOUND_KEYWORD_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/keyword-search
-
-# -----------------------------------------------------------------------------
-# Auth（由 auth-rt 自动处理，配置见 ~/.openclaw/.env）
-# 只需在 ~/.openclaw/.env 中设置 CLIENT_KEY=sk_xxx
-# -----------------------------------------------------------------------------
-
-# -----------------------------------------------------------------------------
-# 遥测（可选）— 上报 skill 执行结果到服务端 Loki，用于调试
-# -----------------------------------------------------------------------------
-# TELEMETRY_ENDPOINT=http://localhost:3202/api/v1/tasks/telemetry
+# 遥测上报（可选）
+# TELEMETRY_ENDPOINT=https://api-gw-test.yuanwei-lnc.com/ecom/tasks/telemetry
--- a/README.md
+++ b/README.md
@ -79,32 +79,23 @@ All commands return JSON to stdout.

 ## Environment variables

-Copy `.env.example` to `.env` and fill in the values.
+The only required configuration is `CLIENT_KEY` in `~/.openclaw/.env`:

-### Vision (required for `detect`)
+```
+CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
+```

-| Variable | Required | Description |
-|----------|----------|-------------|
-| `VISION_API_KEY` | Yes | API key for the vision model |
-| `VISION_API_BASE` | No | OpenAI-compatible base URL (default: OpenAI) |
-| `VISION_MODEL` | No | Model name (default: `gpt-4o-mini`) |
+Everything else — vision API key, image search endpoints — is fetched automatically from the client config via `auth-rt`. No per-skill env vars needed.

-### Image search (required for `search` / `detect-and-search`)
+### Optional overrides

-| Variable | Required | Description |
-|----------|----------|-------------|
-| `ONEBOUND_UPLOAD_ENDPOINT` | Yes | Endpoint to upload a local image and get a public URL |
-| `ONEBOUND_SEARCH_ENDPOINT` | Yes | Reverse image search endpoint |
-| `ONEBOUND_KEYWORD_SEARCH_ENDPOINT` | No | Keyword search endpoint for re-ranking results |
-
-These proxy through a local `woo-data-scrawler` instance — no Onebound API key needed directly.
-
-### Other
-
-| Variable | Required | Description |
-|----------|----------|-------------|
-| `AUTH_RT_BIN` | No | Override path to the `auth-rt` binary |
-| `TELEMETRY_ENDPOINT` | No | POST skill execution results to a Loki-compatible endpoint |
+| Variable | Description |
+|----------|-------------|
+| `VISION_MODEL` | Override model name (default: `gpt-4o-mini`) |
+| `VISION_API_BASE` | Override vision API base URL |
+| `VISION_API_KEY` | Override vision API key |
+| `AUTH_RT_BIN` | Override path to the `auth-rt` binary |
+| `TELEMETRY_ENDPOINT` | POST execution results to a telemetry endpoint |

 ## Prerequisites

--- a/src/auth-cli.ts
+++ b/src/auth-cli.ts
@ -43,6 +43,20 @@ export interface SessionResponse {
  hookToken?: string;
 }

+export interface ClientConfig {
+  clientId: string;
+  name: string;
+  status: string;
+  metadata: {
+    provider?: {
+      api_key?: string;
+      base_url?: string;
+      model?: string;
+    };
+    [key: string]: unknown;
+  };
+}
+
 export interface SkillClientOptions {
  apiBase?: string;
  dryRun?: boolean;
@ -79,6 +93,13 @@ export class SkillClient {
    return JSON.parse(runCli('session'));
  }

+  async clientConfig(): Promise<ClientConfig> {
+    if (this.dryRun) {
+      return { clientId: '<dry-run>', name: '<dry-run>', status: 'active', metadata: {} };
+    }
+    return JSON.parse(runCli('client-config'));
+  }
+
  async get(urlPath: string): Promise<ApiResponse> {
    return this.request('GET', urlPath);
  }
--- a/src/index.ts
+++ b/src/index.ts
@ -8,6 +8,23 @@ import { imageToBase64 } from './frame-extractor.ts';
 import { generateText } from 'ai';
 import { createOpenAI } from '@ai-sdk/openai';

+export interface VisionConfig {
+  apiKey: string;
+  baseURL?: string;
+  model: string;
+}
+
+async function loadVisionConfig(client: ReturnType<typeof createSkillClient>): Promise<VisionConfig> {
+  const cfg = await client.clientConfig();
+  const apiKey = cfg.metadata?.provider?.api_key ?? process.env.VISION_API_KEY;
+  if (!apiKey) throw new Error('Vision API key not found in client config (metadata.provider.api_key)');
+  return {
+    apiKey,
+    baseURL: cfg.metadata?.provider?.base_url ?? process.env.VISION_API_BASE,
+    model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'gpt-4o-mini',
+  };
+}
+
 export async function run(
  command: Command,
  args: string[],
@ -50,8 +67,11 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
    };
  }

+  const client = createSkillClient();
+  const visionConfig = await loadVisionConfig(client);
+
  const frames = extractFrames(videoPath, opts.outputDir, opts.intervalSeconds, opts.maxFrames);
-  const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency);
+  const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency, visionConfig);

  return {
    status: 'success',
@ -64,28 +84,16 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
  };
 }

-async function uploadImage(imagePath: string): Promise<string> {
-  const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
-  if (!searchEndpoint) throw new Error('ONEBOUND_SEARCH_ENDPOINT not set');
-
-  const uploadEndpoint = process.env.ONEBOUND_UPLOAD_ENDPOINT;
-  if (!uploadEndpoint) throw new Error('ONEBOUND_UPLOAD_ENDPOINT not set');
-
+async function uploadImage(client: ReturnType<typeof createSkillClient>, imagePath: string): Promise<string> {
  const imageBuffer = fs.readFileSync(imagePath);
  const filename = `video-snapshot-${Date.now()}.jpg`;
-
-  const response = await fetch(uploadEndpoint, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({
+  const res = await client.post('/ecom/tasks/upload-image', {
    data: imageBuffer.toString('base64'),
    filename,
    contentType: 'image/jpeg',
-    }),
  });
-
-  if (!response.ok) throw new Error(`Upload failed: HTTP ${response.status}`);
-  const json = await response.json() as { url?: string };
+  if (res.status >= 400) throw new Error(`Upload failed: HTTP ${res.status}`);
+  const json = JSON.parse(res.body) as { url?: string };
  if (!json.url) throw new Error('Upload response missing url');
  return json.url;
 }
@ -95,29 +103,22 @@ async function runSearch(args: string[], dryRun: boolean): Promise<SearchResult>
  if (!imagePath) return { status: 'failed', command: 'search', dryRun, error: 'search requires <image-path>' };
  if (!fs.existsSync(imagePath)) return { status: 'failed', command: 'search', dryRun, error: `image not found: ${imagePath}` };

-  const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
-  if (!searchEndpoint) return { status: 'failed', command: 'search', dryRun, error: 'ONEBOUND_SEARCH_ENDPOINT not set' };
-
  if (dryRun) {
    return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus: 0, searchBody: null };
  }

-  // If given a local file, upload it first to get a public URL
+  const client = createSkillClient();
+
  let imgid = imagePath;
  if (!imagePath.startsWith('http')) {
-    imgid = await uploadImage(imagePath);
+    imgid = await uploadImage(client, imagePath);
  }

-  const response = await fetch(searchEndpoint, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ imgid, page: 1 }),
-  });
+  const res = await client.post('/ecom/tasks/search-by-image', { imgid, page: 1 });
+  const searchHttpStatus = res.status;
+  const body = JSON.parse(res.body);

-  const searchHttpStatus = response.status;
-  const body = await response.json();
-
-  if (!response.ok) {
+  if (res.status >= 400) {
    return { status: 'failed', command: 'search', dryRun, imagePath, searchHttpStatus, error: JSON.stringify(body) };
  }

@ -133,23 +134,19 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise<Outp
  }

  const best = detectResult.bestSnapshot;
-  // Use cropped image if available, otherwise full frame
  const imageForSearch = best.croppedImagePath || best.imagePath;
  const searchResult = await runSearch([imageForSearch], dryRun) as SearchResult;

-  // Auto-rerank using product description to generate Chinese keyword
  let rerankResult: any = undefined;
  if (!dryRun && searchResult.status === 'success' && searchResult.searchBody) {
-    // Save search body to temp file for rerank
    const tmpFile = path.join(path.dirname(imageForSearch), `search_body_${Date.now()}.json`);
    try {
      fs.writeFileSync(tmpFile, JSON.stringify(searchResult.searchBody));
-      const rerankArgs = [
+      rerankResult = await runRerank([
        `--image-results=${tmpFile}`,
        `--description=${best.description}`,
        '--top=10',
-      ];
-      rerankResult = await runRerank(rerankArgs, dryRun);
+      ], dryRun);
    } catch (e: any) {
      rerankResult = { error: e.message };
    } finally {
@ -190,17 +187,13 @@ function getFlag(args: string[], flag: string): string | undefined {
  return undefined;
 }

-function createVisionModel() {
-  const apiKey = process.env.VISION_API_KEY;
-  if (!apiKey) throw new Error('VISION_API_KEY not set');
-  const baseURL = process.env.VISION_API_BASE || undefined;
-  const modelName = process.env.VISION_MODEL || 'gpt-4o-mini';
-  const openai = createOpenAI({ apiKey, baseURL });
-  return openai(modelName);
+function createVisionModel(config: VisionConfig) {
+  const openai = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
+  return openai(config.model);
 }

-async function generateChineseKeyword(description: string): Promise<string> {
-  const model = createVisionModel();
+async function generateChineseKeyword(description: string, visionConfig: VisionConfig): Promise<string> {
+  const model = createVisionModel(visionConfig);
  const { text } = await generateText({
    model,
    prompt: `You are generating a 1688.com (Chinese B2B wholesale) product search keyword.
@ -217,16 +210,9 @@ Output only the search query:`,
  return text.trim().replace(/[^\u4e00-\u9fff\u3400-\u4dbf]/g, '').trim();
 }

-async function keywordSearch(keyword: string, page = 1): Promise<SearchItem[]> {
-  const endpoint = process.env.ONEBOUND_KEYWORD_SEARCH_ENDPOINT;
-  if (!endpoint) throw new Error('ONEBOUND_KEYWORD_SEARCH_ENDPOINT not set');
-
-  const res = await fetch(endpoint, {
-    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
-    body: JSON.stringify({ keyword, page }),
-  });
-  const json = await res.json() as any;
+async function keywordSearch(client: ReturnType<typeof createSkillClient>, keyword: string, page = 1): Promise<SearchItem[]> {
+  const res = await client.post('/ecom/tasks/keyword-search', { keyword, page });
+  const json = JSON.parse(res.body) as any;
  return (json?.data?.items?.item ?? []) as SearchItem[];
 }

@ -265,7 +251,9 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>

  if (dryRun) return { status: 'success', command: 'rerank', dryRun } as any;

-  // Load image search results
+  const client = createSkillClient();
+  const visionConfig = await loadVisionConfig(client);
+
  let imageItems: SearchItem[];
  try {
    const raw = fs.existsSync(imageResultsArg)
@ -289,7 +277,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
    // Prefer product description for accurate translation; fall back to image titles
    const sourceText = description || keyword || extractKeywordsFromTitles(imageItems);
    try {
-      autoGeneratedKeyword = await generateChineseKeyword(sourceText);
+      autoGeneratedKeyword = await generateChineseKeyword(sourceText, visionConfig);
    } catch {
      autoGeneratedKeyword = extractKeywordsFromTitles(imageItems);
    }
@ -299,7 +287,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
  // Keyword search on 1688
  let keywordItems: SearchItem[] = [];
  try {
-    keywordItems = await keywordSearch(keyword);
+    keywordItems = await keywordSearch(client, keyword);
  } catch (e: any) {
    return { status: 'failed', command: 'rerank', dryRun, error: `keyword search failed: ${e.message}` };
  }
--- a/src/product-detector.ts
+++ b/src/product-detector.ts
@ -4,6 +4,7 @@ import { z } from 'zod';
 import type { ExtractedFrame } from './frame-extractor.ts';
 import type { ProductFrame } from './types.ts';
 import { imageToBase64 } from './frame-extractor.ts';
+import type { VisionConfig } from './index.ts';

 // Pass 1: quick filter — discard frames that clearly have no product
 const FilterSchema = z.object({
@ -43,16 +44,9 @@ Return:
 - reasoning: one sentence explaining why this frame was chosen
 - boundingBox: tight bounding box of the HERO PRODUCT ONLY in the chosen frame as [x1, y1, x2, y2] normalized 0.0–1.0 (top-left origin). Exclude hands, background, and unrelated objects. The product is assumed to be near the center.`;

-function createVisionModel() {
-  const apiKey = process.env.VISION_API_KEY;
-  if (!apiKey) throw new Error('VISION_API_KEY not set');
-
-  const provider = createOpenAI({
-    apiKey,
-    baseURL: process.env.VISION_API_BASE,
-  });
-
-  return provider(process.env.VISION_MODEL ?? 'gpt-4o-mini');
+function createVisionModel(config: VisionConfig) {
+  const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
+  return provider(config.model);
 }

 async function filterFrame(
@ -145,8 +139,9 @@ export async function detectProductFrames(
  frames: ExtractedFrame[],
  minConfidence: number,
  concurrency: number = 5,
+  visionConfig: VisionConfig,
 ): Promise<ProductFrame[]> {
-  const model = createVisionModel();
+  const model = createVisionModel(visionConfig);

  // Pass 1: parallel filter — discard junk frames
  const keepFlags: boolean[] = [];