7 changed files with 69 additions and 139 deletions
--- a/README.md
+++ b/README.md
@ -1,22 +1,19 @@
-# video-product-snapshot — 视频商品以图搜图
+# video-product-snapshot — 视频商品截图

-从视频中提取最佳商品帧，以图搜图在 1688 找同款。
+检测视频中的电商商品，提取最佳商品画面，并通过图片搜索在 1688 找同款。

 ## 工作原理

-1. `ffmpeg` 按 0.5s 间隔抽帧（最多 60 帧）
-2. 视觉质量预过滤（亮度/方差剔除模糊帧）
-3. 容器/架子类产品检测 → 自动选择空载帧
-4. 视觉模型多帧对比排序，选出最佳商品帧
-5. 裁剪商品区域 → 上传 → 1688 图搜
-6. 后置过滤（视觉模型判断结果是否同款）→ rerank 排序
+1. 使用 `ffmpeg` 按配置间隔从视频抽帧
+2. 将每帧发给视觉模型，检测是否有商品并评分
+3. 选出置信度最高的帧作为最佳商品截图
+4. 可选：用这张截图调用图片搜索 API 找同款商品

 ## 安装

 ```bash
-./install.sh          # 安装 auth-rt + 依赖
 bun install
-bun run build         # 输出到 dist/run.js
+bun run build        # 输出到 dist/run.js
 ```

 ## 使用方法
@ -29,74 +26,77 @@ bun dist/run.js <command> [options]

 | 命令 | 说明 |
 |------|------|
-| `detect-best-and-search <video>` | **推荐。** 最佳帧 → 图搜 → rerank |
-| `detect-best <video>` | 只提取最佳商品帧，不搜图 |
-| `detect-and-search <video>` | 两阶段过滤后图搜（较慢） |
-| `detect <video>` | 抽帧并逐帧检测商品 |
-| `search <image>` | 用已有图片搜同款 |
-| `rerank` | 关键词对图搜结果交叉过滤 |
-| `session` | 获取当前认证会话 token |
+| `detect <video>` | 抽帧并检测商品画面 |
+| `search <image>` | 用图片搜同款 |
+| `detect-and-search <video>` | 完整流程：检测最佳画面 → 搜图 |
+| `session` | 打印当前认证 session token |

-### 选项（`detect-best` / `detect-best-and-search`）
+### 选项（`detect` / `detect-and-search`）

 | 参数 | 默认值 | 说明 |
 |------|--------|------|
-| `--interval=<秒>` | `0.5` | 帧采样间隔 |
-| `--max-frames=<n>` | `60` | 最大分析帧数 |
-| `--output-dir=<目录>` | 视频同目录 | 截图保存目录 |
-| `--session-id=<id>` | 自动生成 | Langfuse session ID |
-| `--dry-run` | — | 解析参数，不实际执行 |
+| `--interval=<秒>` | `1` | 抽帧间隔（秒） |
+| `--max-frames=<数量>` | `60` | 最多分析帧数 |
+| `--output-dir=<目录>` | 视频所在目录 | 抽帧图片保存目录 |
+| `--min-confidence=<0-1>` | `0.7` | 最低检测置信度 |
+| `--dry-run` | — | 解析参数并打印配置，不实际执行 |
+
+### 示例
+
+```bash
+# 检测商品，每 3 秒抽一帧
+bun dist/run.js detect ./demo.mp4 --interval=3
+
+# 完整流程 + 更高置信度门槛
+bun dist/run.js detect-and-search ./demo.mp4 --interval=5 --min-confidence=0.85
+
+# 用已有截图搜同款
+bun dist/run.js search ./snapshot.jpg
+```

 ## 输出

-所有命令输出 JSON 到 stdout，包含 `sessionId` 字段用于 Langfuse 追踪。
+所有命令输出 JSON 到 stdout。

 ```json
 {
-  "sessionId": "skill-20260426-184345-lb06",
-  "status": "success",
-  "command": "detect-best-and-search",
  "bestSnapshot": {
-    "frameIndex": 7,
-    "timestampSeconds": 3,
-    "imagePath": "/path/to/frame_0007.jpg",
-    "croppedImagePath": "/path/to/frame_0007_cropped.jpg",
-    "description": "黑色金属床底鞋架 可折叠移动"
+    "frameIndex": 4,
+    "timestampSeconds": 9,
+    "imagePath": "/path/to/frame_0004.jpg",
+    "confidence": 0.92,
+    "description": "White sneaker with blue logo, left side view",
+    "boundingHint": "centered"
  },
-  "rerank": {
-    "keyword": "床底鞋架",
-    "results": [
-      { "num_iid": 123, "title": "...", "price": "44.00", "sales": 87, "detail_url": "..." }
-    ]
-  }
+  "productFrames": [...],
+  "searchBody": { ... }
 }
 ```

-## 鉴权架构
-
-```
-~/.openclaw/.env
-  CLIENT_KEY ──→ auth-rt ──→ 业务系统
-                              ├── /session          → access_token
-                              └── /client-config    → provider.api_key
-                                                       provider.base_url
-                                                       provider.model
-```
-
-仅需配置 `CLIENT_KEY`，LLM 凭据和端点均由业务系统下发。
+- `productFrames` — 所有检测到的画面，按置信度排序（最高在前）
+- `bestSnapshot` — 排名第一的画面
+- `searchBody` — 图片搜索 API 的返回（仅 `search` / `detect-and-search`）

 ## 环境变量

+唯一必需配置是 `~/.openclaw/.env` 中的 `CLIENT_KEY`：
+
+```
+CLIENT_KEY=sk_xxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx
+```
+
+所有凭据和接口地址通过 `auth-rt` 从客户端配置自动获取，无需额外配置。
+
+### 可选覆盖
+
 | 变量 | 说明 |
 |------|------|
-| `CLIENT_KEY` | **必需。** 在 `~/.openclaw/.env` 中配置 |
-| `VISION_MODEL` | 覆盖模型名称（默认来自 client config） |
-| `SKILL_SESSION_ID` | Langfuse session ID（自动生成，格式 `skill-YYYYMMDD-HHMMSS-xxxx`） |
+| `VISION_MODEL` | 覆盖模型名称（默认：`aliyun-cp-multimodal`） |
 | `AUTH_RT_BIN` | 覆盖 `auth-rt` 二进制路径 |
-| `TELEMETRY_ENDPOINT` | 遥测上报接口 |
+| `TELEMETRY_ENDPOINT` | 上报执行结果到遥测接口 |

 ## 前置依赖

 - [Bun](https://bun.sh) 运行时
- 系统 PATH 中包含 `ffmpeg` / `ffprobe`（帧提取）
- `auth-rt` CLI（鉴权/API 调用，`install.sh` 自动安装）
+- 系统 PATH 中包含 `ffmpeg` 和 `ffprobe`
+- 系统 PATH 中包含 `auth-rt` CLI（`search` / `detect-and-search` 需要）
--- a/SKILL.md
+++ b/SKILL.md
@ -67,7 +67,7 @@ bun dist/run.js <command> [args] [--dry-run]

 ## 结果展示格式

-将 `rerank.results`（优先）或 `searchBody.data.items.item` 格式化为 markdown 表格，**最多 5 条**：
+将 `rerank.results`（优先）或 `searchBody.data.items.item` 格式化为 markdown 表格，**每页 5 行**：

 | # | 商品名称 | 价格 | 销量 | 链接 |
 |---|----------|------|------|------|
--- a/scripts/run.ts
+++ b/scripts/run.ts
@ -81,8 +81,6 @@ async function main(): Promise<void> {
      dryRun = true;
    } else if (arg.startsWith('--api-base=')) {
      process.env.API_BASE = arg.slice('--api-base='.length).trim();
-    } else if (arg.startsWith('--session-id=')) {
-      process.env.SKILL_SESSION_ID = arg.slice('--session-id='.length).trim();
    } else if (arg === '-h' || arg === '--help') {
      printUsage(); process.exit(0);
    } else {
@ -93,7 +91,6 @@ async function main(): Promise<void> {
  if (positionals.length < 1) { printUsage(); process.exit(1); }

  const command = positionals[0] as Command;
-  const sessionId = process.env.SKILL_SESSION_ID!; // set by auth-cli.ts at module load
  const startMs = Date.now();
  let result: Awaited<ReturnType<typeof run>>;

@ -101,14 +98,13 @@ async function main(): Promise<void> {
    result = await run(command, positionals.slice(1), dryRun);
  } catch (err) {
    const error = err instanceof Error ? err.message : String(err);
-    console.log(JSON.stringify({ status: 'failed', command, dryRun, sessionId, error }, null, 2));
-    if (!dryRun) reportTelemetry({ skill: SKILL_NAME, command, sessionId, status: 'failed', durationMs: Date.now() - startMs, error });
+    console.log(JSON.stringify({ status: 'failed', command, dryRun, error }, null, 2));
+    if (!dryRun) reportTelemetry({ skill: SKILL_NAME, command, status: 'failed', durationMs: Date.now() - startMs, error });
    process.exit(1);
  }

-  const output = { ...result, sessionId } as Record<string, unknown>;
-  console.log(JSON.stringify(output, null, 2));
-  if (!dryRun) reportTelemetry({ skill: SKILL_NAME, command, sessionId, status: result.status, durationMs: Date.now() - startMs, error: (result as any).error });
+  console.log(JSON.stringify(result, null, 2));
+  if (!dryRun) reportTelemetry({ skill: SKILL_NAME, command, status: result.status, durationMs: Date.now() - startMs, error: (result as any).error });
 }

 main().catch((err) => {
--- a/src/auth-cli.ts
+++ b/src/auth-cli.ts
@ -20,18 +20,6 @@ import * as path from 'path';
 import * as os from 'os';

 const home = process.env.HOME || os.homedir();
-
-// ── session ID (Langfuse tracing) ──
-// Priority: SKILL_SESSION_ID env > auto-generate
-const SESSION_ID = process.env.SKILL_SESSION_ID || (() => {
-  const ts = new Date();
-  const pad = (n: number) => String(n).padStart(2, '0');
-  const tsPart = `${ts.getFullYear()}${pad(ts.getMonth()+1)}${pad(ts.getDate())}-${pad(ts.getHours())}${pad(ts.getMinutes())}${pad(ts.getSeconds())}`;
-  const rand = Math.random().toString(36).slice(2, 6);
-  return `skill-${tsPart}-${rand}`;
-})();
-process.env.SKILL_SESSION_ID = SESSION_ID;
-
 const AUTH_RT_BIN = process.env.AUTH_RT_BIN
  || (() => {
    // Check if auth-rt is in PATH
--- a/src/index.ts
+++ b/src/index.ts
@ -12,7 +12,6 @@ export interface VisionConfig {
  apiKey: string;
  baseURL?: string;
  model: string;
-  sessionId?: string;
 }

 async function loadVisionConfig(client: ReturnType<typeof createSkillClient>): Promise<VisionConfig> {
@ -23,7 +22,6 @@ async function loadVisionConfig(client: ReturnType<typeof createSkillClient>): P
    apiKey,
    baseURL: cfg.metadata?.provider?.base_url,
    model: process.env.VISION_MODEL ?? cfg.metadata?.provider?.model ?? 'aliyun-cp-multimodal',
-    sessionId: process.env.SKILL_SESSION_ID || `skill_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
  };
 }

@ -212,7 +210,7 @@ async function runDetectBestAndSearch(args: string[], dryRun: boolean): Promise<
  // Otherwise fall back to the keyword-intersection rerank.
  if (!dryRun && postFilter && !postFilter.error && postFilter.keptCount > 0) {
    const items: SearchItem[] = (searchResult.searchBody as any)?.data?.items?.item ?? [];
-    const sorted = [...items].sort((a, b) => (b.sales ?? 0) - (a.sales ?? 0)).slice(0, 5);
+    const sorted = [...items].sort((a, b) => (b.sales ?? 0) - (a.sales ?? 0)).slice(0, 10);
    rerankResult = {
      source: 'post-filter',
      results: sorted,
@ -225,7 +223,7 @@ async function runDetectBestAndSearch(args: string[], dryRun: boolean): Promise<
      rerankResult = await runRerank([
        `--image-results=${tmpFile}`,
        `--description=${best.description}`,
-        '--top=5',
+        '--top=10',
      ], dryRun);
    } catch (e: any) {
      rerankResult = { error: e.message };
@ -360,7 +358,7 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise<Outp
  // Otherwise fall back to the keyword-intersection rerank.
  if (!dryRun && postFilter && !postFilter.error && postFilter.keptCount > 0) {
    const items: SearchItem[] = (searchResult.searchBody as any)?.data?.items?.item ?? [];
-    const sorted = [...items].sort((a, b) => (b.sales ?? 0) - (a.sales ?? 0)).slice(0, 5);
+    const sorted = [...items].sort((a, b) => (b.sales ?? 0) - (a.sales ?? 0)).slice(0, 10);
    rerankResult = {
      source: 'post-filter',
      results: sorted,
@ -373,7 +371,7 @@ async function runDetectAndSearch(args: string[], dryRun: boolean): Promise<Outp
      rerankResult = await runRerank([
        `--image-results=${tmpFile}`,
        `--description=${best.description}`,
-        '--top=5',
+        '--top=10',
      ], dryRun);
    } catch (e: any) {
      rerankResult = { error: e.message };
@ -417,25 +415,7 @@ function getFlag(args: string[], flag: string): string | undefined {
 }

 function createVisionModel(config: VisionConfig) {
-  const sessionId = config.sessionId || '';
-  const originFetch = globalThis.fetch;
-  // Inject metadata.session_id into request body so LiteLLM → Langfuse creates sessions
-  const wrapped = async (input: RequestInfo | URL, init?: RequestInit) => {
-    if (init?.body && typeof init.body === 'string') {
-      try {
-        const body = JSON.parse(init.body);
-        if (!body.metadata) body.metadata = {};
-        if (!body.metadata.session_id) body.metadata.session_id = sessionId;
-        body.metadata.tags = ['skill:video-product-snapshot'];
-        init = { ...init, body: JSON.stringify(body) };
-      } catch {}
-    }
-    return originFetch(input, init);
-  };
-  const openai = createOpenAI({
-    apiKey: config.apiKey, baseURL: config.baseURL,
-    fetch: wrapped as typeof globalThis.fetch,
-  });
+  const openai = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
  return openai(config.model);
 }

@ -493,7 +473,7 @@ async function runRerank(args: string[], dryRun: boolean): Promise<OutputResult>
  const positionals = args.filter((a) => !a.startsWith('--'));
  const imageResultsArg = getFlag(args, '--image-results') || positionals[0];
  const keywordArg = getFlag(args, '--keyword') || positionals[1];
-  const topN = parseInt(getFlag(args, '--top') || '5', 10);
+  const topN = parseInt(getFlag(args, '--top') || '10', 10);

  const description = getFlag(args, '--description') || '';

--- a/src/post-filter.ts
+++ b/src/post-filter.ts
@ -34,24 +34,7 @@ const FILTER_PROMPT = (count: number, description?: string) => {
 };

 function createModel(config: VisionConfig) {
-  const sessionId = config.sessionId || '';
-  const originFetch = globalThis.fetch;
-  const wrapped = async (input: RequestInfo | URL, init?: RequestInit) => {
-    if (init?.body && typeof init.body === 'string') {
-      try {
-        const body = JSON.parse(init.body);
-        if (!body.metadata) body.metadata = {};
-        if (!body.metadata.session_id) body.metadata.session_id = sessionId;
-        body.metadata.tags = ['skill:video-product-snapshot'];
-        init = { ...init, body: JSON.stringify(body) };
-      } catch {}
-    }
-    return originFetch(input, init);
-  };
-  const provider = createOpenAI({
-    apiKey: config.apiKey, baseURL: config.baseURL,
-    fetch: wrapped as typeof globalThis.fetch,
-  });
+  const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
  return provider(config.model);
 }

--- a/src/product-detector.ts
+++ b/src/product-detector.ts
@ -78,24 +78,7 @@ Return:
 - boundingBox: tight box of the PRODUCT ONLY as [x1, y1, x2, y2] normalized 0.0–1.0, top-left origin. Exclude hands, background, and unrelated objects. The product is near the center of the frame.`;

 function createVisionModel(config: VisionConfig) {
-  const sessionId = config.sessionId || '';
-  const originFetch = globalThis.fetch;
-  const wrapped = async (input: RequestInfo | URL, init?: RequestInit) => {
-    if (init?.body && typeof init.body === 'string') {
-      try {
-        const body = JSON.parse(init.body);
-        if (!body.metadata) body.metadata = {};
-        if (!body.metadata.session_id) body.metadata.session_id = sessionId;
-        body.metadata.tags = ['skill:video-product-snapshot'];
-        init = { ...init, body: JSON.stringify(body) };
-      } catch {}
-    }
-    return originFetch(input, init);
-  };
-  const provider = createOpenAI({
-    apiKey: config.apiKey, baseURL: config.baseURL,
-    fetch: wrapped as typeof globalThis.fetch,
-  });
+  const provider = createOpenAI({ apiKey: config.apiKey, baseURL: config.baseURL });
  return provider(config.model);
 }