refactor: load vision config and search endpoints from auth client-config, no skill-level envs needed

This commit is contained in:
ywkj 2026-04-20 12:14:43 +08:00
parent fa03962216
commit 23bb268d75
5 changed files with 107 additions and 128 deletions

View File

@ -1,45 +1,29 @@
# =============================================================================
# video-product-snapshot 环境变量配置
# 复制为 .env 并填入真实值cp .env.example .env
# =============================================================================
#
# 只需在 ~/.openclaw/.env 中配置 CLIENT_KEY
# CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
#
# Vision API key、图搜接口等均通过 auth-rt client-config 自动获取,
# 无需在此手动填写。
# =============================================================================
# -----------------------------------------------------------------------------
# Vision API 配置(用于商品帧检测)
# 兼容任何 OpenAI 格式接口OpenAI / Groq / Together / 本地 Ollama 等
# 可选覆盖(通常不需要)
# -----------------------------------------------------------------------------
# API Key必填
VISION_API_KEY=your-api-key-here
# API Base URL可选留空则使用 OpenAI 官方地址)
# VISION_API_BASE=https://api.groq.com/openai/v1
# VISION_API_BASE=http://localhost:11434/v1
# 模型名称(可选,默认 gpt-4o-mini
# 覆盖 Vision 模型(默认来自 client configfallback 为 gpt-4o-mini
# VISION_MODEL=gpt-4o-mini
# VISION_MODEL=meta-llama/llama-4-scout-17b-16e-instruct
# VISION_MODEL=llava:13b
# -----------------------------------------------------------------------------
# 1688 图搜配置via woo-data-scrawler 本地服务,端口 3202
# 所有 Onebound 调用均通过本地服务代理,无需持有 API 密钥
# -----------------------------------------------------------------------------
# 覆盖 Vision API base URL默认来自 client config metadata.provider.base_url
# VISION_API_BASE=https://your-llm-endpoint/v1
# 上传图片接口(将本地图片上传到公共存储,获取可访问 URL
ONEBOUND_UPLOAD_ENDPOINT=http://localhost:3202/api/v1/tasks/upload-image
# 覆盖 Vision API key默认来自 client config metadata.provider.api_key
# VISION_API_KEY=sk-...
# 以图搜图接口
ONEBOUND_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/search-by-image
# 覆盖 auth-rt 二进制路径
# AUTH_RT_BIN=/custom/path/to/auth-rt
# 关键词搜索接口(用于 rerank 二次过滤)
ONEBOUND_KEYWORD_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/keyword-search
# -----------------------------------------------------------------------------
# Auth由 auth-rt 自动处理,配置见 ~/.openclaw/.env
# 只需在 ~/.openclaw/.env 中设置 CLIENT_KEY=sk_xxx
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# 遥测(可选)— 上报 skill 执行结果到服务端 Loki用于调试
# -----------------------------------------------------------------------------
# TELEMETRY_ENDPOINT=http://localhost:3202/api/v1/tasks/telemetry
# 遥测上报(可选)
# TELEMETRY_ENDPOINT=https://api-gw-test.yuanwei-lnc.com/ecom/tasks/telemetry

View File

@ -79,32 +79,23 @@ All commands return JSON to stdout.
## Environment variables
Copy `.env.example` to `.env` and fill in the values.
The only required configuration is `CLIENT_KEY` in `~/.openclaw/.env`:
### Vision (required for `detect`)
```
CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
```
| Variable | Required | Description |
|----------|----------|-------------|
| `VISION_API_KEY` | Yes | API key for the vision model |
| `VISION_API_BASE` | No | OpenAI-compatible base URL (default: OpenAI) |
| `VISION_MODEL` | No | Model name (default: `gpt-4o-mini`) |
Everything else — vision API key, image search endpoints — is fetched automatically from the client config via `auth-rt`. No per-skill env vars needed.
### Image search (required for `search` / `detect-and-search`)
### Optional overrides
| Variable | Required | Description |
|----------|----------|-------------|
| `ONEBOUND_UPLOAD_ENDPOINT` | Yes | Endpoint to upload a local image and get a public URL |
| `ONEBOUND_SEARCH_ENDPOINT` | Yes | Reverse image search endpoint |
| `ONEBOUND_KEYWORD_SEARCH_ENDPOINT` | No | Keyword search endpoint for re-ranking results |
These proxy through a local `woo-data-scrawler` instance — no Onebound API key needed directly.
### Other
| Variable | Required | Description |
|----------|----------|-------------|
| `AUTH_RT_BIN` | No | Override path to the `auth-rt` binary |
| `TELEMETRY_ENDPOINT` | No | POST skill execution results to a Loki-compatible endpoint |
| Variable | Description |
|----------|-------------|
| `VISION_MODEL` | Override model name (default: `gpt-4o-mini`) |
| `VISION_API_BASE` | Override vision API base URL |
| `VISION_API_KEY` | Override vision API key |
| `AUTH_RT_BIN` | Override path to the `auth-rt` binary |
| `TELEMETRY_ENDPOINT` | POST execution results to a telemetry endpoint |
## Prerequisites

View File

@ -43,6 +43,20 @@ export interface SessionResponse {
hookToken?: string;
}
export interface ClientConfig {
clientId: string;
name: string;
status: string;
metadata: {
provider?: {
api_key?: string;
base_url?: string;
model?: string;
};
[key: string]: unknown;
};
}
export interface SkillClientOptions {
apiBase?: string;
dryRun?: boolean;
@ -79,6 +93,13 @@ export class SkillClient {
return JSON.parse(runCli('session'));
}
async clientConfig(): Promise<ClientConfig> {
if (this.dryRun) {
return { clientId: '<dry-run>', name: '<dry-run>', status: 'active', metadata: {} };
}
return JSON.parse(runCli('client-config'));
}
async get(urlPath: string): Promise<ApiResponse> {
return this.request('GET', urlPath);
}

View File

@ -8,6 +8,23 @@ import { imageToBase64 } from './frame-extractor.ts';
import { generateText } from 'ai';
import { createOpenAI } from '@ai-sdk/openai';
export interface VisionConfig {
apiKey: string;
baseURL?: string;
model: string;
}
async function loadVisionConfig(client: ReturnType<typeof createSkillClient>): Promise<VisionConfig> {
const cfg = await client.clientConfig();
const apiKey = cfg.metadata?.provider?.api_key ?? process.env.VISION_API_KEY;
if (!apiKey) throw new Error('Vision API key not found in client config (metadata.provider.api_key)');
return {
apiKey,
baseURL: cfg.metadata?.provider?.base_url ?? process.env.VISION_API_BASE,
model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'gpt-4o-mini',
};
}
export async function run(
command: Command,
args: string[],
@ -50,8 +67,11 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
};
}
const client = createSkillClient();
const visionConfig = await loadVisionConfig(client);
const frames = extractFrames(videoPath, opts.outputDir, opts.intervalSeconds, opts.maxFrames);
const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency);
const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency, visionConfig);
return {
status: 'success',
@ -64,28 +84,16 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
};
}
async function uploadImage(imagePath: string): Promise<string> {
const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
if (!searchEndpoint) throw new Error('ONEBOUND_SEARCH_ENDPOINT not set');
const uploadEndpoint = process.env.ONEBOUND_UPLOAD_ENDPOINT;
if (!uploadEndpoint) throw new Error('ONEBOUND_UPLOAD_ENDPOINT not set');
async function uploadImage(client: ReturnType<typeof createSkillClient>, imagePath: string): Promise<string> {
const imageBuffer = fs.readFileSync(imagePath);
const filename = `video-snapshot-${Date.now()}.jpg`;
const response = await fetch(uploadEndpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
const res = await client.post('/ecom/tasks/upload-image', {
data: imageBuffer.toString('base64'),
filename,
contentType: 'image/jpeg',
}),
});
if (!response.ok) throw new Error(`Upload failed: HTTP ${response.status}`);
const json = await response.json() as { url?: string };
if (res.status >= 400) throw new Error(`Upload failed: HTTP ${res.status}`);
const json = JSON.parse(res.body) as { url?: string };
if (!json.url) throw new Error('Upload response missing url');
return json.url;
}
@ -95,29 +103,22 @@ async function runSearch(args: string[], dryRun: boolean): Promise<SearchResult>
if (!imagePath) return { status: 'failed', command: 'search', dryRun, error: 'search requires <image-path>' };
if (!fs.existsSync(imagePath)) return { status: 'failed', command: 'search', dryRun, error: `image not found: ${imagePath}` };
const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
if (!searchEndpoint) return { status: 'failed', command: 'search', dryRun, error: 'ONEBOUND_SEARCH_ENDPOINT not set' };
if (dryRun) {
return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus: 0, searchBody: null };
}
// If given a local file, upload it first to get a public URL
const client = createSkillClient();
let imgid = imagePath;
if (!imagePath.startsWith('http')) {
imgid = await uploadImage(imagePath);
imgid = await uploadImage(client, imagePath);
}
const response = await fetch(searchEndpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ imgid, page: 1 }),
});
const res = await client.post('/ecom/tasks/search-by-image', { imgid, page: 1 });
const searchHttpStatus = res.status;
const body = JSON.parse(res.body);
const searchHttpStatus = response.status;
const body = await response.json();
if (!response.ok) {
if (res.status >= 400) {
return { status: 'failed', command: 'search', dryRun, imagePath, searchHttpStatus, error: JSON.stringify(body) };
}
@ -133,23 +134,19 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise<Outp
}
const best = detectResult.bestSnapshot;
// Use cropped image if available, otherwise full frame
const imageForSearch = best.croppedImagePath || best.imagePath;
const searchResult = await runSearch([imageForSearch], dryRun) as SearchResult;
// Auto-rerank using product description to generate Chinese keyword
let rerankResult: any = undefined;
if (!dryRun && searchResult.status === 'success' && searchResult.searchBody) {
// Save search body to temp file for rerank
const tmpFile = path.join(path.dirname(imageForSearch), `search_body_${Date.now()}.json`);
try {
fs.writeFileSync(tmpFile, JSON.stringify(searchResult.searchBody));
const rerankArgs = [
rerankResult = await runRerank([
`--image-results=${tmpFile}`,
`--description=${best.description}`,
'--top=10',
];
rerankResult = await runRerank(rerankArgs, dryRun);
], dryRun);
} catch (e: any) {
rerankResult = { error: e.message };
} finally {
@ -190,17 +187,13 @@ function getFlag(args: string[], flag: string): string | undefined {
return undefined;
}
function createVisionModel() {
const apiKey = process.env.VISION_API_KEY;
if (!apiKey) throw new Error('VISION_API_KEY not set');
const baseURL = process.env.VISION_API_BASE || undefined;
const modelName = process.env.VISION_MODEL || 'gpt-4o-mini';
const openai = createOpenAI({ apiKey, baseURL });
return openai(modelName);
function createVisionModel(config: VisionConfig) {
const openai = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
return openai(config.model);
}
async function generateChineseKeyword(description: string): Promise<string> {
const model = createVisionModel();
async function generateChineseKeyword(description: string, visionConfig: VisionConfig): Promise<string> {
const model = createVisionModel(visionConfig);
const { text } = await generateText({
model,
prompt: `You are generating a 1688.com (Chinese B2B wholesale) product search keyword.
@ -217,16 +210,9 @@ Output only the search query:`,
return text.trim().replace(/[^\u4e00-\u9fff\u3400-\u4dbf]/g, '').trim();
}
async function keywordSearch(keyword: string, page = 1): Promise<SearchItem[]> {
const endpoint = process.env.ONEBOUND_KEYWORD_SEARCH_ENDPOINT;
if (!endpoint) throw new Error('ONEBOUND_KEYWORD_SEARCH_ENDPOINT not set');
const res = await fetch(endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ keyword, page }),
});
const json = await res.json() as any;
async function keywordSearch(client: ReturnType<typeof createSkillClient>, keyword: string, page = 1): Promise<SearchItem[]> {
const res = await client.post('/ecom/tasks/keyword-search', { keyword, page });
const json = JSON.parse(res.body) as any;
return (json?.data?.items?.item ?? []) as SearchItem[];
}
@ -265,7 +251,9 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
if (dryRun) return { status: 'success', command: 'rerank', dryRun } as any;
// Load image search results
const client = createSkillClient();
const visionConfig = await loadVisionConfig(client);
let imageItems: SearchItem[];
try {
const raw = fs.existsSync(imageResultsArg)
@ -289,7 +277,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
// Prefer product description for accurate translation; fall back to image titles
const sourceText = description || keyword || extractKeywordsFromTitles(imageItems);
try {
autoGeneratedKeyword = await generateChineseKeyword(sourceText);
autoGeneratedKeyword = await generateChineseKeyword(sourceText, visionConfig);
} catch {
autoGeneratedKeyword = extractKeywordsFromTitles(imageItems);
}
@ -299,7 +287,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
// Keyword search on 1688
let keywordItems: SearchItem[] = [];
try {
keywordItems = await keywordSearch(keyword);
keywordItems = await keywordSearch(client, keyword);
} catch (e: any) {
return { status: 'failed', command: 'rerank', dryRun, error: `keyword search failed: ${e.message}` };
}

View File

@ -4,6 +4,7 @@ import { z } from 'zod';
import type { ExtractedFrame } from './frame-extractor.ts';
import type { ProductFrame } from './types.ts';
import { imageToBase64 } from './frame-extractor.ts';
import type { VisionConfig } from './index.ts';
// Pass 1: quick filter — discard frames that clearly have no product
const FilterSchema = z.object({
@ -43,16 +44,9 @@ Return:
- reasoning: one sentence explaining why this frame was chosen
- boundingBox: tight bounding box of the HERO PRODUCT ONLY in the chosen frame as [x1, y1, x2, y2] normalized 0.01.0 (top-left origin). Exclude hands, background, and unrelated objects. The product is assumed to be near the center.`;
function createVisionModel() {
const apiKey = process.env.VISION_API_KEY;
if (!apiKey) throw new Error('VISION_API_KEY not set');
const provider = createOpenAI({
apiKey,
baseURL: process.env.VISION_API_BASE,
});
return provider(process.env.VISION_MODEL ?? 'gpt-4o-mini');
function createVisionModel(config: VisionConfig) {
const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
return provider(config.model);
}
async function filterFrame(
@ -145,8 +139,9 @@ export async function detectProductFrames(
frames: ExtractedFrame[],
minConfidence: number,
concurrency: number = 5,
visionConfig: VisionConfig,
): Promise<ProductFrame[]> {
const model = createVisionModel();
const model = createVisionModel(visionConfig);
// Pass 1: parallel filter — discard junk frames
const keepFlags: boolean[] = [];