import * as fs from 'fs'; import * as path from 'path'; import type { Command, DetectOptions, DetectResult, SearchResult, OutputResult, SearchItem } from './types.ts'; import { createSkillClient } from './auth-cli.ts'; import { extractFrames } from './frame-extractor.ts'; import { detectProductFrames } from './product-detector.ts'; import { imageToBase64 } from './frame-extractor.ts'; import { generateText } from 'ai'; import { createOpenAI } from '@ai-sdk/openai'; export interface VisionConfig { apiKey: string; baseURL?: string; model: string; } async function loadVisionConfig(client: ReturnType): Promise { const cfg = await client.clientConfig(); const apiKey = cfg.metadata?.provider?.api_key ?? process.env.VISION_API_KEY; if (!apiKey) throw new Error('Vision API key not found in client config (metadata.provider.api_key)'); return { apiKey, baseURL: cfg.metadata?.provider?.base_url ?? process.env.VISION_API_BASE, model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'aliyun-cp-multimodal', }; } export async function run( command: Command, args: string[], dryRun: boolean, ): Promise { switch (command) { case 'session': return runSession(dryRun); case 'detect': return runDetect(args, dryRun); case 'search': return runSearch(args, dryRun); case 'detect-and-search': return runDetectAndSearch(args, dryRun); case 'rerank': return runRerank(args, dryRun); default: return { status: 'failed', command, dryRun, error: `unknown command: ${command}` }; } } async function runSession(dryRun: boolean): Promise { const client = createSkillClient({ dryRun }); const session = await client.session(); return { status: 'success', command: 'session', dryRun, ...session } as any; } async function runDetect(args: string[], dryRun: boolean): Promise { const videoPath = args[0]; if (!videoPath) return { status: 'failed', command: 'detect', dryRun, error: 'detect requires ' }; if (!fs.existsSync(videoPath)) return { status: 'failed', command: 'detect', dryRun, error: `video not found: ${videoPath}` }; const opts = parseDetectOptions(videoPath, args); if (dryRun) { return { status: 'success', command: 'detect', dryRun, videoPath, totalFramesExtracted: 0, productFrames: [], bestSnapshot: undefined, }; } const client = createSkillClient(); const visionConfig = await loadVisionConfig(client); const frames = extractFrames(videoPath, opts.outputDir, opts.intervalSeconds, opts.maxFrames); const productFrames = await detectProductFrames(frames, opts.minConfidence, opts.concurrency, visionConfig); return { status: 'success', command: 'detect', dryRun, videoPath, totalFramesExtracted: frames.length, productFrames, bestSnapshot: productFrames[0], }; } async function uploadImage(client: ReturnType, imagePath: string): Promise { const imageBuffer = fs.readFileSync(imagePath); const filename = `video-snapshot-${Date.now()}.jpg`; const res = await client.post('/ecom/tasks/upload-image', { data: imageBuffer.toString('base64'), filename, contentType: 'image/jpeg', }); if (res.status >= 400) throw new Error(`Upload failed: HTTP ${res.status}`); const json = JSON.parse(res.body) as { url?: string }; if (!json.url) throw new Error('Upload response missing url'); return json.url; } async function runSearch(args: string[], dryRun: boolean): Promise { const imagePath = args[0]; if (!imagePath) return { status: 'failed', command: 'search', dryRun, error: 'search requires ' }; if (!fs.existsSync(imagePath)) return { status: 'failed', command: 'search', dryRun, error: `image not found: ${imagePath}` }; if (dryRun) { return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus: 0, searchBody: null }; } const client = createSkillClient(); let imgid = imagePath; if (!imagePath.startsWith('http')) { imgid = await uploadImage(client, imagePath); } const res = await client.post('/ecom/tasks/search-by-image', { imgid, page: 1 }); const searchHttpStatus = res.status; const body = JSON.parse(res.body); if (res.status >= 400) { return { status: 'failed', command: 'search', dryRun, imagePath, searchHttpStatus, error: JSON.stringify(body) }; } return { status: 'success', command: 'search', dryRun, imagePath, searchHttpStatus, searchBody: body }; } async function runDetectAndSearch(args: string[], dryRun: boolean): Promise { const detectResult = await runDetect(args, dryRun) as DetectResult; if (detectResult.status === 'failed') return detectResult; if (!detectResult.bestSnapshot) { return { ...detectResult, status: 'failed', error: 'no product detected in video' }; } const best = detectResult.bestSnapshot; const imageForSearch = best.croppedImagePath || best.imagePath; const searchResult = await runSearch([imageForSearch], dryRun) as SearchResult; let rerankResult: any = undefined; if (!dryRun && searchResult.status === 'success' && searchResult.searchBody) { const tmpFile = path.join(path.dirname(imageForSearch), `search_body_${Date.now()}.json`); try { fs.writeFileSync(tmpFile, JSON.stringify(searchResult.searchBody)); rerankResult = await runRerank([ `--image-results=${tmpFile}`, `--description=${best.description}`, '--top=10', ], dryRun); } catch (e: any) { rerankResult = { error: e.message }; } finally { try { fs.unlinkSync(tmpFile); } catch {} } } return { ...detectResult, command: 'detect-and-search', searchHttpStatus: searchResult.searchHttpStatus, searchBody: searchResult.searchBody, searchError: searchResult.error, rerank: rerankResult, } as any; } function parseDetectOptions(videoPath: string, args: string[]): DetectOptions { const outputDir = getFlag(args, '--output-dir') || path.join( path.dirname(videoPath), `snapshots_${path.basename(videoPath, path.extname(videoPath))}_${Date.now()}`, ); return { videoPath, intervalSeconds: parseInt(getFlag(args, '--interval') || '1', 10), maxFrames: parseInt(getFlag(args, '--max-frames') || '60', 10), outputDir, minConfidence: parseFloat(getFlag(args, '--min-confidence') || '0.7'), concurrency: parseInt(getFlag(args, '--concurrency') || '5', 10), }; } function getFlag(args: string[], flag: string): string | undefined { for (const arg of args) { if (arg.startsWith(`${flag}=`)) return arg.slice(flag.length + 1); } return undefined; } function createVisionModel(config: VisionConfig) { const openai = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL }); return openai(config.model); } async function generateChineseKeyword(description: string, visionConfig: VisionConfig): Promise { const model = createVisionModel(visionConfig); const { text } = await generateText({ model, prompt: `You are generating a 1688.com (Chinese B2B wholesale) product search keyword. Rules: - Output ONLY 2-4 Chinese words — the product category + 1-2 key material/feature words - Use common Chinese commerce terms, NOT a literal translation - No English, no punctuation, no explanation - Short broad terms work better than long specific phrases (e.g. "金属鞋架" not "黑色Z型金属网格鞋架") Product description: ${description} Output only the search query:`, }); return text.trim().replace(/[^\u4e00-\u9fff\u3400-\u4dbf]/g, '').trim(); } async function keywordSearch(client: ReturnType, keyword: string, page = 1): Promise { const res = await client.post('/ecom/tasks/keyword-search', { keyword, page }); const json = JSON.parse(res.body) as any; return (json?.data?.items?.item ?? []) as SearchItem[]; } function hasChinese(str: string): boolean { return /[\u4e00-\u9fff]/.test(str); } function extractKeywordsFromTitles(items: SearchItem[], topN = 5): string { // Pull the most repeated 2-char Chinese bigrams from top item titles as a fallback keyword const freq: Record = {}; for (const item of items.slice(0, topN)) { const title = item.title || ''; for (let i = 0; i < title.length - 1; i++) { const bigram = title.slice(i, i + 2); if (/[\u4e00-\u9fff]{2}/.test(bigram)) { freq[bigram] = (freq[bigram] || 0) + 1; } } } return Object.entries(freq) .sort((a, b) => b[1] - a[1]) .slice(0, 3) .map(([k]) => k) .join(''); } async function runRerank(args: string[], dryRun: boolean): Promise { // --image-results= --keyword= --top= const imageResultsArg = getFlag(args, '--image-results') || args[0]; const keywordArg = getFlag(args, '--keyword') || args[1]; const topN = parseInt(getFlag(args, '--top') || '10', 10); const description = getFlag(args, '--description') || ''; if (!imageResultsArg) return { status: 'failed', command: 'rerank', dryRun, error: 'rerank requires --image-results=' }; if (dryRun) return { status: 'success', command: 'rerank', dryRun } as any; const client = createSkillClient(); const visionConfig = await loadVisionConfig(client); let imageItems: SearchItem[]; try { const raw = fs.existsSync(imageResultsArg) ? fs.readFileSync(imageResultsArg, 'utf-8') : imageResultsArg; const parsed = JSON.parse(raw); imageItems = parsed?.data?.items?.item ?? parsed?.items?.item ?? (Array.isArray(parsed) ? parsed : []); } catch { return { status: 'failed', command: 'rerank', dryRun, error: 'failed to parse image-results JSON' }; } if (!imageItems.length) { return { status: 'failed', command: 'rerank', dryRun, error: 'no items found in image-results JSON' }; } // Determine Chinese keyword to use let keyword = keywordArg || ''; let autoGeneratedKeyword = ''; if (!hasChinese(keyword)) { // Prefer product description for accurate translation; fall back to image titles const sourceText = description || keyword || extractKeywordsFromTitles(imageItems); try { autoGeneratedKeyword = await generateChineseKeyword(sourceText, visionConfig); } catch { autoGeneratedKeyword = extractKeywordsFromTitles(imageItems); } keyword = autoGeneratedKeyword; } // Keyword search on 1688 let keywordItems: SearchItem[] = []; try { keywordItems = await keywordSearch(client, keyword); } catch (e: any) { return { status: 'failed', command: 'rerank', dryRun, error: `keyword search failed: ${e.message}` }; } // Intersect by num_iid const keywordIds = new Set(keywordItems.map((i) => String(i.num_iid))); const intersected = imageItems.filter((i) => keywordIds.has(String(i.num_iid))); // If still no intersection, fall back to keyword results (at least they match the category) const usedFallback = intersected.length === 0; const results = usedFallback ? keywordItems : intersected; // Sort by turn_head descending (click-through rate signal) const sorted = results .sort((a, b) => parseFloat(String(b.turn_head ?? '0')) - parseFloat(String(a.turn_head ?? '0'))) .slice(0, topN); return { status: 'success', command: 'rerank', dryRun, keyword, autoGeneratedKeyword: autoGeneratedKeyword || undefined, imageResultsCount: imageItems.length, keywordResultsCount: keywordItems.length, intersectedCount: intersected.length, usedFallback, results: sorted, } as any; } function parseJsonSafe(text: string): unknown { try { return JSON.parse(text); } catch { return text; } }