diff --git a/.env.example b/.env.example index cff5c49..50f0f5f 100644 --- a/.env.example +++ b/.env.example @@ -1,45 +1,29 @@ # ============================================================================= # video-product-snapshot 环境变量配置 -# 复制为 .env 并填入真实值:cp .env.example .env +# ============================================================================= +# +# 只需在 ~/.openclaw/.env 中配置 CLIENT_KEY: +# CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx +# +# Vision API key、图搜接口等均通过 auth-rt client-config 自动获取, +# 无需在此手动填写。 # ============================================================================= # ----------------------------------------------------------------------------- -# Vision API 配置(用于商品帧检测) -# 兼容任何 OpenAI 格式接口:OpenAI / Groq / Together / 本地 Ollama 等 +# 可选覆盖(通常不需要) # ----------------------------------------------------------------------------- -# API Key(必填) -VISION_API_KEY=your-api-key-here - -# API Base URL(可选,留空则使用 OpenAI 官方地址) -# VISION_API_BASE=https://api.groq.com/openai/v1 -# VISION_API_BASE=http://localhost:11434/v1 - -# 模型名称(可选,默认 gpt-4o-mini) +# 覆盖 Vision 模型(默认来自 client config,fallback 为 gpt-4o-mini) # VISION_MODEL=gpt-4o-mini -# VISION_MODEL=meta-llama/llama-4-scout-17b-16e-instruct -# VISION_MODEL=llava:13b -# ----------------------------------------------------------------------------- -# 1688 图搜配置(via woo-data-scrawler 本地服务,端口 3202) -# 所有 Onebound 调用均通过本地服务代理,无需持有 API 密钥 -# ----------------------------------------------------------------------------- +# 覆盖 Vision API base URL(默认来自 client config metadata.provider.base_url) +# VISION_API_BASE=https://your-llm-endpoint/v1 -# 上传图片接口(将本地图片上传到公共存储,获取可访问 URL) -ONEBOUND_UPLOAD_ENDPOINT=http://localhost:3202/api/v1/tasks/upload-image +# 覆盖 Vision API key(默认来自 client config metadata.provider.api_key) +# VISION_API_KEY=sk-... -# 以图搜图接口 -ONEBOUND_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/search-by-image +# 覆盖 auth-rt 二进制路径 +# AUTH_RT_BIN=/custom/path/to/auth-rt -# 关键词搜索接口(用于 rerank 二次过滤) -ONEBOUND_KEYWORD_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/keyword-search - -# ----------------------------------------------------------------------------- -# Auth(由 auth-rt 自动处理,配置见 ~/.openclaw/.env) -# 只需在 ~/.openclaw/.env 中设置 CLIENT_KEY=sk_xxx -# ----------------------------------------------------------------------------- - -# ----------------------------------------------------------------------------- -# 遥测(可选)— 上报 skill 执行结果到服务端 Loki,用于调试 -# ----------------------------------------------------------------------------- -# TELEMETRY_ENDPOINT=http://localhost:3202/api/v1/tasks/telemetry +# 遥测上报(可选) +# TELEMETRY_ENDPOINT=https://api-gw-test.yuanwei-lnc.com/ecom/tasks/telemetry diff --git a/README.md b/README.md index a9eb1e1..e83c636 100644 --- a/README.md +++ b/README.md @@ -79,32 +79,23 @@ All commands return JSON to stdout. ## Environment variables -Copy `.env.example` to `.env` and fill in the values. +The only required configuration is `CLIENT_KEY` in `~/.openclaw/.env`: -### Vision (required for `detect`) +``` +CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx +``` -| Variable | Required | Description | -|----------|----------|-------------| -| `VISION_API_KEY` | Yes | API key for the vision model | -| `VISION_API_BASE` | No | OpenAI-compatible base URL (default: OpenAI) | -| `VISION_MODEL` | No | Model name (default: `gpt-4o-mini`) | +Everything else — vision API key, image search endpoints — is fetched automatically from the client config via `auth-rt`. No per-skill env vars needed. -### Image search (required for `search` / `detect-and-search`) +### Optional overrides -| Variable | Required | Description | -|----------|----------|-------------| -| `ONEBOUND_UPLOAD_ENDPOINT` | Yes | Endpoint to upload a local image and get a public URL | -| `ONEBOUND_SEARCH_ENDPOINT` | Yes | Reverse image search endpoint | -| `ONEBOUND_KEYWORD_SEARCH_ENDPOINT` | No | Keyword search endpoint for re-ranking results | - -These proxy through a local `woo-data-scrawler` instance — no Onebound API key needed directly. - -### Other - -| Variable | Required | Description | -|----------|----------|-------------| -| `AUTH_RT_BIN` | No | Override path to the `auth-rt` binary | -| `TELEMETRY_ENDPOINT` | No | POST skill execution results to a Loki-compatible endpoint | +| Variable | Description | +|----------|-------------| +| `VISION_MODEL` | Override model name (default: `gpt-4o-mini`) | +| `VISION_API_BASE` | Override vision API base URL | +| `VISION_API_KEY` | Override vision API key | +| `AUTH_RT_BIN` | Override path to the `auth-rt` binary | +| `TELEMETRY_ENDPOINT` | POST execution results to a telemetry endpoint | ## Prerequisites diff --git a/src/auth-cli.ts b/src/auth-cli.ts index d072a88..07fdd09 100644 --- a/src/auth-cli.ts +++ b/src/auth-cli.ts @@ -43,6 +43,20 @@ export interface SessionResponse { hookToken?: string; } +export interface ClientConfig { + clientId: string; + name: string; + status: string; + metadata: { + provider?: { + api_key?: string; + base_url?: string; + model?: string; + }; + [key: string]: unknown; + }; +} + export interface SkillClientOptions { apiBase?: string; dryRun?: boolean; @@ -79,6 +93,13 @@ export class SkillClient { return JSON.parse(runCli('session')); } + async clientConfig(): Promise { + if (this.dryRun) { + return { clientId: '', name: '', status: 'active', metadata: {} }; + } + return JSON.parse(runCli('client-config')); + } + async get(urlPath: string): Promise { return this.request('GET', urlPath); } diff --git a/src/index.ts b/src/index.ts index ee2722b..be9d94a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,6 +8,23 @@ import { imageToBase64 } from './frame-extractor.ts'; import { generateText } from 'ai'; import { createOpenAI } from '@ai-sdk/openai'; +export interface VisionConfig { + apiKey: string; + baseURL?: string; + model: string; +} + +async function loadVisionConfig(client: ReturnType): Promise { + const cfg = await client.clientConfig(); + const apiKey = cfg.metadata?.provider?.api_key ?? process.env.VISION_API_KEY; + if (!apiKey) throw new Error('Vision API key not found in client config (metadata.provider.api_key)'); + return { + apiKey, + baseURL: cfg.metadata?.provider?.base_url ?? process.env.VISION_API_BASE, + model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'gpt-4o-mini', + }; +} + export async function run( command: Command, args: string[], @@ -50,8 +67,11 @@ async function runDetect(args: string[], dryRun: boolean): Promise }; } + const client = createSkillClient(); + const visionConfig = await loadVisionConfig(client); + const frames = extractFrames(videoPath, opts.outputDir, opts.intervalSeconds, opts.maxFrames); - const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency); + const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency, visionConfig); return { status: 'success', @@ -64,28 +84,16 @@ async function runDetect(args: string[], dryRun: boolean): Promise }; } -async function uploadImage(imagePath: string): Promise { - const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT; - if (!searchEndpoint) throw new Error('ONEBOUND_SEARCH_ENDPOINT not set'); - - const uploadEndpoint = process.env.ONEBOUND_UPLOAD_ENDPOINT; - if (!uploadEndpoint) throw new Error('ONEBOUND_UPLOAD_ENDPOINT not set'); - +async function uploadImage(client: ReturnType, imagePath: string): Promise { const imageBuffer = fs.readFileSync(imagePath); const filename = `video-snapshot-${Date.now()}.jpg`; - - const response = await fetch(uploadEndpoint, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - data: imageBuffer.toString('base64'), - filename, - contentType: 'image/jpeg', - }), + const res = await client.post('/ecom/tasks/upload-image', { + data: imageBuffer.toString('base64'), + filename, + contentType: 'image/jpeg', }); - - if (!response.ok) throw new Error(`Upload failed: HTTP ${response.status}`); - const json = await response.json() as { url?: string }; + if (res.status >= 400) throw new Error(`Upload failed: HTTP ${res.status}`); + const json = JSON.parse(res.body) as { url?: string }; if (!json.url) throw new Error('Upload response missing url'); return json.url; } @@ -95,29 +103,22 @@ async function runSearch(args: string[], dryRun: boolean): Promise if (!imagePath) return { status: 'failed', command: 'search', dryRun, error: 'search requires ' }; if (!fs.existsSync(imagePath)) return { status: 'failed', command: 'search', dryRun, error: `image not found: ${imagePath}` }; - const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT; - if (!searchEndpoint) return { status: 'failed', command: 'search', dryRun, error: 'ONEBOUND_SEARCH_ENDPOINT not set' }; - if (dryRun) { return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus: 0, searchBody: null }; } - // If given a local file, upload it first to get a public URL + const client = createSkillClient(); + let imgid = imagePath; if (!imagePath.startsWith('http')) { - imgid = await uploadImage(imagePath); + imgid = await uploadImage(client, imagePath); } - const response = await fetch(searchEndpoint, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ imgid, page: 1 }), - }); + const res = await client.post('/ecom/tasks/search-by-image', { imgid, page: 1 }); + const searchHttpStatus = res.status; + const body = JSON.parse(res.body); - const searchHttpStatus = response.status; - const body = await response.json(); - - if (!response.ok) { + if (res.status >= 400) { return { status: 'failed', command: 'search', dryRun, imagePath, searchHttpStatus, error: JSON.stringify(body) }; } @@ -133,23 +134,19 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise { - const model = createVisionModel(); +async function generateChineseKeyword(description: string, visionConfig: VisionConfig): Promise { + const model = createVisionModel(visionConfig); const { text } = await generateText({ model, prompt: `You are generating a 1688.com (Chinese B2B wholesale) product search keyword. @@ -217,16 +210,9 @@ Output only the search query:`, return text.trim().replace(/[^\u4e00-\u9fff\u3400-\u4dbf]/g, '').trim(); } -async function keywordSearch(keyword: string, page = 1): Promise { - const endpoint = process.env.ONEBOUND_KEYWORD_SEARCH_ENDPOINT; - if (!endpoint) throw new Error('ONEBOUND_KEYWORD_SEARCH_ENDPOINT not set'); - - const res = await fetch(endpoint, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ keyword, page }), - }); - const json = await res.json() as any; +async function keywordSearch(client: ReturnType, keyword: string, page = 1): Promise { + const res = await client.post('/ecom/tasks/keyword-search', { keyword, page }); + const json = JSON.parse(res.body) as any; return (json?.data?.items?.item ?? []) as SearchItem[]; } @@ -265,7 +251,9 @@ async function runRerank(args: string[], dryRun: boolean): Promise if (dryRun) return { status: 'success', command: 'rerank', dryRun } as any; - // Load image search results + const client = createSkillClient(); + const visionConfig = await loadVisionConfig(client); + let imageItems: SearchItem[]; try { const raw = fs.existsSync(imageResultsArg) @@ -289,7 +277,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise // Prefer product description for accurate translation; fall back to image titles const sourceText = description || keyword || extractKeywordsFromTitles(imageItems); try { - autoGeneratedKeyword = await generateChineseKeyword(sourceText); + autoGeneratedKeyword = await generateChineseKeyword(sourceText, visionConfig); } catch { autoGeneratedKeyword = extractKeywordsFromTitles(imageItems); } @@ -299,7 +287,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise // Keyword search on 1688 let keywordItems: SearchItem[] = []; try { - keywordItems = await keywordSearch(keyword); + keywordItems = await keywordSearch(client, keyword); } catch (e: any) { return { status: 'failed', command: 'rerank', dryRun, error: `keyword search failed: ${e.message}` }; } diff --git a/src/product-detector.ts b/src/product-detector.ts index bd34eb1..6bb673f 100644 --- a/src/product-detector.ts +++ b/src/product-detector.ts @@ -4,6 +4,7 @@ import { z } from 'zod'; import type { ExtractedFrame } from './frame-extractor.ts'; import type { ProductFrame } from './types.ts'; import { imageToBase64 } from './frame-extractor.ts'; +import type { VisionConfig } from './index.ts'; // Pass 1: quick filter — discard frames that clearly have no product const FilterSchema = z.object({ @@ -43,16 +44,9 @@ Return: - reasoning: one sentence explaining why this frame was chosen - boundingBox: tight bounding box of the HERO PRODUCT ONLY in the chosen frame as [x1, y1, x2, y2] normalized 0.0–1.0 (top-left origin). Exclude hands, background, and unrelated objects. The product is assumed to be near the center.`; -function createVisionModel() { - const apiKey = process.env.VISION_API_KEY; - if (!apiKey) throw new Error('VISION_API_KEY not set'); - - const provider = createOpenAI({ - apiKey, - baseURL: process.env.VISION_API_BASE, - }); - - return provider(process.env.VISION_MODEL ?? 'gpt-4o-mini'); +function createVisionModel(config: VisionConfig) { + const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL }); + return provider(config.model); } async function filterFrame( @@ -145,8 +139,9 @@ export async function detectProductFrames( frames: ExtractedFrame[], minConfidence: number, concurrency: number = 5, + visionConfig: VisionConfig, ): Promise { - const model = createVisionModel(); + const model = createVisionModel(visionConfig); // Pass 1: parallel filter — discard junk frames const keepFlags: boolean[] = [];