refactor: load vision config and search endpoints from auth client-config, no skill-level envs needed
This commit is contained in:
parent
fa03962216
commit
23bb268d75
50
.env.example
50
.env.example
|
|
@ -1,45 +1,29 @@
|
|||
# =============================================================================
|
||||
# video-product-snapshot 环境变量配置
|
||||
# 复制为 .env 并填入真实值:cp .env.example .env
|
||||
# =============================================================================
|
||||
#
|
||||
# 只需在 ~/.openclaw/.env 中配置 CLIENT_KEY:
|
||||
# CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
|
||||
#
|
||||
# Vision API key、图搜接口等均通过 auth-rt client-config 自动获取,
|
||||
# 无需在此手动填写。
|
||||
# =============================================================================
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Vision API 配置(用于商品帧检测)
|
||||
# 兼容任何 OpenAI 格式接口:OpenAI / Groq / Together / 本地 Ollama 等
|
||||
# 可选覆盖(通常不需要)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# API Key(必填)
|
||||
VISION_API_KEY=your-api-key-here
|
||||
|
||||
# API Base URL(可选,留空则使用 OpenAI 官方地址)
|
||||
# VISION_API_BASE=https://api.groq.com/openai/v1
|
||||
# VISION_API_BASE=http://localhost:11434/v1
|
||||
|
||||
# 模型名称(可选,默认 gpt-4o-mini)
|
||||
# 覆盖 Vision 模型(默认来自 client config,fallback 为 gpt-4o-mini)
|
||||
# VISION_MODEL=gpt-4o-mini
|
||||
# VISION_MODEL=meta-llama/llama-4-scout-17b-16e-instruct
|
||||
# VISION_MODEL=llava:13b
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1688 图搜配置(via woo-data-scrawler 本地服务,端口 3202)
|
||||
# 所有 Onebound 调用均通过本地服务代理,无需持有 API 密钥
|
||||
# -----------------------------------------------------------------------------
|
||||
# 覆盖 Vision API base URL(默认来自 client config metadata.provider.base_url)
|
||||
# VISION_API_BASE=https://your-llm-endpoint/v1
|
||||
|
||||
# 上传图片接口(将本地图片上传到公共存储,获取可访问 URL)
|
||||
ONEBOUND_UPLOAD_ENDPOINT=http://localhost:3202/api/v1/tasks/upload-image
|
||||
# 覆盖 Vision API key(默认来自 client config metadata.provider.api_key)
|
||||
# VISION_API_KEY=sk-...
|
||||
|
||||
# 以图搜图接口
|
||||
ONEBOUND_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/search-by-image
|
||||
# 覆盖 auth-rt 二进制路径
|
||||
# AUTH_RT_BIN=/custom/path/to/auth-rt
|
||||
|
||||
# 关键词搜索接口(用于 rerank 二次过滤)
|
||||
ONEBOUND_KEYWORD_SEARCH_ENDPOINT=http://localhost:3202/api/v1/tasks/keyword-search
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Auth(由 auth-rt 自动处理,配置见 ~/.openclaw/.env)
|
||||
# 只需在 ~/.openclaw/.env 中设置 CLIENT_KEY=sk_xxx
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 遥测(可选)— 上报 skill 执行结果到服务端 Loki,用于调试
|
||||
# -----------------------------------------------------------------------------
|
||||
# TELEMETRY_ENDPOINT=http://localhost:3202/api/v1/tasks/telemetry
|
||||
# 遥测上报(可选)
|
||||
# TELEMETRY_ENDPOINT=https://api-gw-test.yuanwei-lnc.com/ecom/tasks/telemetry
|
||||
|
|
|
|||
35
README.md
35
README.md
|
|
@ -79,32 +79,23 @@ All commands return JSON to stdout.
|
|||
|
||||
## Environment variables
|
||||
|
||||
Copy `.env.example` to `.env` and fill in the values.
|
||||
The only required configuration is `CLIENT_KEY` in `~/.openclaw/.env`:
|
||||
|
||||
### Vision (required for `detect`)
|
||||
```
|
||||
CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
|
||||
```
|
||||
|
||||
| Variable | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `VISION_API_KEY` | Yes | API key for the vision model |
|
||||
| `VISION_API_BASE` | No | OpenAI-compatible base URL (default: OpenAI) |
|
||||
| `VISION_MODEL` | No | Model name (default: `gpt-4o-mini`) |
|
||||
Everything else — vision API key, image search endpoints — is fetched automatically from the client config via `auth-rt`. No per-skill env vars needed.
|
||||
|
||||
### Image search (required for `search` / `detect-and-search`)
|
||||
### Optional overrides
|
||||
|
||||
| Variable | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `ONEBOUND_UPLOAD_ENDPOINT` | Yes | Endpoint to upload a local image and get a public URL |
|
||||
| `ONEBOUND_SEARCH_ENDPOINT` | Yes | Reverse image search endpoint |
|
||||
| `ONEBOUND_KEYWORD_SEARCH_ENDPOINT` | No | Keyword search endpoint for re-ranking results |
|
||||
|
||||
These proxy through a local `woo-data-scrawler` instance — no Onebound API key needed directly.
|
||||
|
||||
### Other
|
||||
|
||||
| Variable | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `AUTH_RT_BIN` | No | Override path to the `auth-rt` binary |
|
||||
| `TELEMETRY_ENDPOINT` | No | POST skill execution results to a Loki-compatible endpoint |
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `VISION_MODEL` | Override model name (default: `gpt-4o-mini`) |
|
||||
| `VISION_API_BASE` | Override vision API base URL |
|
||||
| `VISION_API_KEY` | Override vision API key |
|
||||
| `AUTH_RT_BIN` | Override path to the `auth-rt` binary |
|
||||
| `TELEMETRY_ENDPOINT` | POST execution results to a telemetry endpoint |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
|
|
|||
|
|
@ -43,6 +43,20 @@ export interface SessionResponse {
|
|||
hookToken?: string;
|
||||
}
|
||||
|
||||
export interface ClientConfig {
|
||||
clientId: string;
|
||||
name: string;
|
||||
status: string;
|
||||
metadata: {
|
||||
provider?: {
|
||||
api_key?: string;
|
||||
base_url?: string;
|
||||
model?: string;
|
||||
};
|
||||
[key: string]: unknown;
|
||||
};
|
||||
}
|
||||
|
||||
export interface SkillClientOptions {
|
||||
apiBase?: string;
|
||||
dryRun?: boolean;
|
||||
|
|
@ -79,6 +93,13 @@ export class SkillClient {
|
|||
return JSON.parse(runCli('session'));
|
||||
}
|
||||
|
||||
async clientConfig(): Promise<ClientConfig> {
|
||||
if (this.dryRun) {
|
||||
return { clientId: '<dry-run>', name: '<dry-run>', status: 'active', metadata: {} };
|
||||
}
|
||||
return JSON.parse(runCli('client-config'));
|
||||
}
|
||||
|
||||
async get(urlPath: string): Promise<ApiResponse> {
|
||||
return this.request('GET', urlPath);
|
||||
}
|
||||
|
|
|
|||
106
src/index.ts
106
src/index.ts
|
|
@ -8,6 +8,23 @@ import { imageToBase64 } from './frame-extractor.ts';
|
|||
import { generateText } from 'ai';
|
||||
import { createOpenAI } from '@ai-sdk/openai';
|
||||
|
||||
export interface VisionConfig {
|
||||
apiKey: string;
|
||||
baseURL?: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
async function loadVisionConfig(client: ReturnType<typeof createSkillClient>): Promise<VisionConfig> {
|
||||
const cfg = await client.clientConfig();
|
||||
const apiKey = cfg.metadata?.provider?.api_key ?? process.env.VISION_API_KEY;
|
||||
if (!apiKey) throw new Error('Vision API key not found in client config (metadata.provider.api_key)');
|
||||
return {
|
||||
apiKey,
|
||||
baseURL: cfg.metadata?.provider?.base_url ?? process.env.VISION_API_BASE,
|
||||
model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'gpt-4o-mini',
|
||||
};
|
||||
}
|
||||
|
||||
export async function run(
|
||||
command: Command,
|
||||
args: string[],
|
||||
|
|
@ -50,8 +67,11 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
|
|||
};
|
||||
}
|
||||
|
||||
const client = createSkillClient();
|
||||
const visionConfig = await loadVisionConfig(client);
|
||||
|
||||
const frames = extractFrames(videoPath, opts.outputDir, opts.intervalSeconds, opts.maxFrames);
|
||||
const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency);
|
||||
const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency, visionConfig);
|
||||
|
||||
return {
|
||||
status: 'success',
|
||||
|
|
@ -64,28 +84,16 @@ async function runDetect(args: string[], dryRun: boolean): Promise<DetectResult>
|
|||
};
|
||||
}
|
||||
|
||||
async function uploadImage(imagePath: string): Promise<string> {
|
||||
const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
|
||||
if (!searchEndpoint) throw new Error('ONEBOUND_SEARCH_ENDPOINT not set');
|
||||
|
||||
const uploadEndpoint = process.env.ONEBOUND_UPLOAD_ENDPOINT;
|
||||
if (!uploadEndpoint) throw new Error('ONEBOUND_UPLOAD_ENDPOINT not set');
|
||||
|
||||
async function uploadImage(client: ReturnType<typeof createSkillClient>, imagePath: string): Promise<string> {
|
||||
const imageBuffer = fs.readFileSync(imagePath);
|
||||
const filename = `video-snapshot-${Date.now()}.jpg`;
|
||||
|
||||
const response = await fetch(uploadEndpoint, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
const res = await client.post('/ecom/tasks/upload-image', {
|
||||
data: imageBuffer.toString('base64'),
|
||||
filename,
|
||||
contentType: 'image/jpeg',
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) throw new Error(`Upload failed: HTTP ${response.status}`);
|
||||
const json = await response.json() as { url?: string };
|
||||
if (res.status >= 400) throw new Error(`Upload failed: HTTP ${res.status}`);
|
||||
const json = JSON.parse(res.body) as { url?: string };
|
||||
if (!json.url) throw new Error('Upload response missing url');
|
||||
return json.url;
|
||||
}
|
||||
|
|
@ -95,29 +103,22 @@ async function runSearch(args: string[], dryRun: boolean): Promise<SearchResult>
|
|||
if (!imagePath) return { status: 'failed', command: 'search', dryRun, error: 'search requires <image-path>' };
|
||||
if (!fs.existsSync(imagePath)) return { status: 'failed', command: 'search', dryRun, error: `image not found: ${imagePath}` };
|
||||
|
||||
const searchEndpoint = process.env.ONEBOUND_SEARCH_ENDPOINT;
|
||||
if (!searchEndpoint) return { status: 'failed', command: 'search', dryRun, error: 'ONEBOUND_SEARCH_ENDPOINT not set' };
|
||||
|
||||
if (dryRun) {
|
||||
return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus: 0, searchBody: null };
|
||||
}
|
||||
|
||||
// If given a local file, upload it first to get a public URL
|
||||
const client = createSkillClient();
|
||||
|
||||
let imgid = imagePath;
|
||||
if (!imagePath.startsWith('http')) {
|
||||
imgid = await uploadImage(imagePath);
|
||||
imgid = await uploadImage(client, imagePath);
|
||||
}
|
||||
|
||||
const response = await fetch(searchEndpoint, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ imgid, page: 1 }),
|
||||
});
|
||||
const res = await client.post('/ecom/tasks/search-by-image', { imgid, page: 1 });
|
||||
const searchHttpStatus = res.status;
|
||||
const body = JSON.parse(res.body);
|
||||
|
||||
const searchHttpStatus = response.status;
|
||||
const body = await response.json();
|
||||
|
||||
if (!response.ok) {
|
||||
if (res.status >= 400) {
|
||||
return { status: 'failed', command: 'search', dryRun, imagePath, searchHttpStatus, error: JSON.stringify(body) };
|
||||
}
|
||||
|
||||
|
|
@ -133,23 +134,19 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise<Outp
|
|||
}
|
||||
|
||||
const best = detectResult.bestSnapshot;
|
||||
// Use cropped image if available, otherwise full frame
|
||||
const imageForSearch = best.croppedImagePath || best.imagePath;
|
||||
const searchResult = await runSearch([imageForSearch], dryRun) as SearchResult;
|
||||
|
||||
// Auto-rerank using product description to generate Chinese keyword
|
||||
let rerankResult: any = undefined;
|
||||
if (!dryRun && searchResult.status === 'success' && searchResult.searchBody) {
|
||||
// Save search body to temp file for rerank
|
||||
const tmpFile = path.join(path.dirname(imageForSearch), `search_body_${Date.now()}.json`);
|
||||
try {
|
||||
fs.writeFileSync(tmpFile, JSON.stringify(searchResult.searchBody));
|
||||
const rerankArgs = [
|
||||
rerankResult = await runRerank([
|
||||
`--image-results=${tmpFile}`,
|
||||
`--description=${best.description}`,
|
||||
'--top=10',
|
||||
];
|
||||
rerankResult = await runRerank(rerankArgs, dryRun);
|
||||
], dryRun);
|
||||
} catch (e: any) {
|
||||
rerankResult = { error: e.message };
|
||||
} finally {
|
||||
|
|
@ -190,17 +187,13 @@ function getFlag(args: string[], flag: string): string | undefined {
|
|||
return undefined;
|
||||
}
|
||||
|
||||
function createVisionModel() {
|
||||
const apiKey = process.env.VISION_API_KEY;
|
||||
if (!apiKey) throw new Error('VISION_API_KEY not set');
|
||||
const baseURL = process.env.VISION_API_BASE || undefined;
|
||||
const modelName = process.env.VISION_MODEL || 'gpt-4o-mini';
|
||||
const openai = createOpenAI({ apiKey, baseURL });
|
||||
return openai(modelName);
|
||||
function createVisionModel(config: VisionConfig) {
|
||||
const openai = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
|
||||
return openai(config.model);
|
||||
}
|
||||
|
||||
async function generateChineseKeyword(description: string): Promise<string> {
|
||||
const model = createVisionModel();
|
||||
async function generateChineseKeyword(description: string, visionConfig: VisionConfig): Promise<string> {
|
||||
const model = createVisionModel(visionConfig);
|
||||
const { text } = await generateText({
|
||||
model,
|
||||
prompt: `You are generating a 1688.com (Chinese B2B wholesale) product search keyword.
|
||||
|
|
@ -217,16 +210,9 @@ Output only the search query:`,
|
|||
return text.trim().replace(/[^\u4e00-\u9fff\u3400-\u4dbf]/g, '').trim();
|
||||
}
|
||||
|
||||
async function keywordSearch(keyword: string, page = 1): Promise<SearchItem[]> {
|
||||
const endpoint = process.env.ONEBOUND_KEYWORD_SEARCH_ENDPOINT;
|
||||
if (!endpoint) throw new Error('ONEBOUND_KEYWORD_SEARCH_ENDPOINT not set');
|
||||
|
||||
const res = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ keyword, page }),
|
||||
});
|
||||
const json = await res.json() as any;
|
||||
async function keywordSearch(client: ReturnType<typeof createSkillClient>, keyword: string, page = 1): Promise<SearchItem[]> {
|
||||
const res = await client.post('/ecom/tasks/keyword-search', { keyword, page });
|
||||
const json = JSON.parse(res.body) as any;
|
||||
return (json?.data?.items?.item ?? []) as SearchItem[];
|
||||
}
|
||||
|
||||
|
|
@ -265,7 +251,9 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
|
|||
|
||||
if (dryRun) return { status: 'success', command: 'rerank', dryRun } as any;
|
||||
|
||||
// Load image search results
|
||||
const client = createSkillClient();
|
||||
const visionConfig = await loadVisionConfig(client);
|
||||
|
||||
let imageItems: SearchItem[];
|
||||
try {
|
||||
const raw = fs.existsSync(imageResultsArg)
|
||||
|
|
@ -289,7 +277,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
|
|||
// Prefer product description for accurate translation; fall back to image titles
|
||||
const sourceText = description || keyword || extractKeywordsFromTitles(imageItems);
|
||||
try {
|
||||
autoGeneratedKeyword = await generateChineseKeyword(sourceText);
|
||||
autoGeneratedKeyword = await generateChineseKeyword(sourceText, visionConfig);
|
||||
} catch {
|
||||
autoGeneratedKeyword = extractKeywordsFromTitles(imageItems);
|
||||
}
|
||||
|
|
@ -299,7 +287,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
|
|||
// Keyword search on 1688
|
||||
let keywordItems: SearchItem[] = [];
|
||||
try {
|
||||
keywordItems = await keywordSearch(keyword);
|
||||
keywordItems = await keywordSearch(client, keyword);
|
||||
} catch (e: any) {
|
||||
return { status: 'failed', command: 'rerank', dryRun, error: `keyword search failed: ${e.message}` };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { z } from 'zod';
|
|||
import type { ExtractedFrame } from './frame-extractor.ts';
|
||||
import type { ProductFrame } from './types.ts';
|
||||
import { imageToBase64 } from './frame-extractor.ts';
|
||||
import type { VisionConfig } from './index.ts';
|
||||
|
||||
// Pass 1: quick filter — discard frames that clearly have no product
|
||||
const FilterSchema = z.object({
|
||||
|
|
@ -43,16 +44,9 @@ Return:
|
|||
- reasoning: one sentence explaining why this frame was chosen
|
||||
- boundingBox: tight bounding box of the HERO PRODUCT ONLY in the chosen frame as [x1, y1, x2, y2] normalized 0.0–1.0 (top-left origin). Exclude hands, background, and unrelated objects. The product is assumed to be near the center.`;
|
||||
|
||||
function createVisionModel() {
|
||||
const apiKey = process.env.VISION_API_KEY;
|
||||
if (!apiKey) throw new Error('VISION_API_KEY not set');
|
||||
|
||||
const provider = createOpenAI({
|
||||
apiKey,
|
||||
baseURL: process.env.VISION_API_BASE,
|
||||
});
|
||||
|
||||
return provider(process.env.VISION_MODEL ?? 'gpt-4o-mini');
|
||||
function createVisionModel(config: VisionConfig) {
|
||||
const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
|
||||
return provider(config.model);
|
||||
}
|
||||
|
||||
async function filterFrame(
|
||||
|
|
@ -145,8 +139,9 @@ export async function detectProductFrames(
|
|||
frames: ExtractedFrame[],
|
||||
minConfidence: number,
|
||||
concurrency: number = 5,
|
||||
visionConfig: VisionConfig,
|
||||
): Promise<ProductFrame[]> {
|
||||
const model = createVisionModel();
|
||||
const model = createVisionModel(visionConfig);
|
||||
|
||||
// Pass 1: parallel filter — discard junk frames
|
||||
const keepFlags: boolean[] = [];
|
||||
|
|
|
|||
Loading…
Reference in New Issue