Compare commits

..

No commits in common. "main" and "v0.0.1" have entirely different histories.
main ... v0.0.1

5 changed files with 276 additions and 379 deletions

118
SKILL.md
View File

@ -1,97 +1,75 @@
---
name: 1688-logistics-scraper
description: "Scrape 1688 product pages via Chrome, capture full-page screenshots and detail images for vision-based extraction of weight/size data. Use when the user provides a 1688 product URL and needs logistics specs."
description: "Extract product weight/size/logistics data from 1688 product pages via Chrome browser, output structured JSON. Use when the user provides a 1688 product URL and needs logistics specs."
---
# 1688 Logistics Scraper
Capture 1688 product pages and extract weight/size data via vision.
Extract product weight, size, and logistics data from 1688 product pages.
## Run
```bash
bun scripts/run.ts scrape <url> [--dry-run] [--port=9222]
bun scripts/run.ts scrape <url> [--dry-run]
```
### Examples
```bash
bun scripts/run.ts scrape 'https://detail.1688.com/offer/852504650877.html'
bun scripts/run.ts scrape 'https://detail.1688.com/offer/852504650877.html' --dry-run
```
## What It Does
1. Opens the 1688 product URL in the browser (default port 18800)
2. Scrolls through the entire page, capturing full-page screenshots
3. Downloads all product detail images
4. Saves to `/tmp/1688-logistics/<offer-id>/`
1. Opens the 1688 product URL in the browser
2. Extracts weight/size data from wherever it appears on the page — product attributes, variant specs, logistics section
3. Downloads detail images (商品详情图片) for analysis — weight/size is often only in images
4. Outputs structured JSON
## After Running — MUST follow
## Where To Look For Data
Read ALL screenshots and detail images, then output the following JSON structure. This is the final output for API consumption.
Weight/size data on 1688 pages hides in multiple places. Check all before giving up:
1. **Product attributes** (商品属性 / 商品参数) — key-value table, most reliable
2. **Variant/SKU specs** — per-variant weight or size
3. **Logistics section** — shipping weight, volume, freight info
4. **Detail images** — downloaded to `/tmp/1688-logistics/<offer-id>/`, read them to find weight/size text baked into images
## Output
```json
{
"offerId": "966107271425",
"url": "https://detail.1688.com/offer/966107271425.html",
"title": "商品标题",
"weight": {
"value": 0.15,
"unit": "kg",
"source": "商品属性"
"status": "success",
"url": "https://detail.1688.com/offer/...",
"product": {
"title": "产品标题",
"logistics": {
"weight": { "value": 0.5, "unit": "kg", "source": "attributes" },
"dimensions": { "length": 30, "width": 20, "height": 10, "unit": "cm", "source": "attributes" },
"grossWeight": null,
"netWeight": null,
"packageWeight": null,
"volume": null,
"shippingMethod": null,
"shippingCost": null,
"origin": null
},
"variants": [
{ "name": "颜色: 红色", "weight": null, "dimensions": null }
]
},
"grossWeight": {
"value": 0.2,
"unit": "kg",
"source": "商品件重尺"
},
"netWeight": {
"value": 0.15,
"unit": "kg",
"source": "商品属性"
},
"dimensions": {
"length": 10,
"width": 8,
"height": 1.8,
"unit": "cm",
"source": "商品属性"
},
"volume": {
"value": 0.000144,
"unit": "m³",
"source": "商品件重尺"
},
"packageWeight": {
"value": 5.0,
"unit": "kg",
"source": "包装信息"
},
"packageDimensions": {
"length": 40,
"width": 30,
"height": 20,
"unit": "cm",
"source": "包装信息"
},
"unitsPerPackage": 50,
"variants": [
{
"name": "12支装",
"weight": { "value": 0.12, "unit": "kg" },
"dimensions": { "length": 9.5, "width": 6, "height": 2.2, "unit": "cm" }
}
]
"detailImages": ["/tmp/1688-logistics/852504650877/img_001.jpg"],
"rawAttributes": { "重量": "0.5kg", "尺寸": "30*20*10cm" }
}
```
### Field rules
- **All weight values normalized to kg** (克÷1000, 斤×0.5)
- **All dimension values normalized to cm** (mm÷10)
- **`source`**: where on the page the data was found (商品属性 / 商品件重尺 / 包装信息 / 详情图片)
- **`variants`**: only include if weight/size differs per SKU. Omit if all variants share the same specs.
- **Omit fields that are `null`** — do not include fields where no data was found
- **Do not guess.** Only include values actually visible on the page or in images.
`null` = not found in text. Check `detailImages` — the data may be in the images.
## Rules
1. If the browser is not running, report the error. Do not try to launch it.
2. No retries. If it fails, report as-is.
3. Read ALL screenshots — logistics data can appear anywhere on the page.
4. Read detail images too — weight/size is often baked into product photos.
5. Output ONLY the structured JSON above. No extra commentary.
2. Check all data sources before reporting `null`.
3. Normalize units: 克→kg, 毫米→cm. Keep raw values in `rawAttributes`.
4. No retries. If it fails, report as-is.
5. Trust page content. Do not guess values.

View File

@ -2,26 +2,8 @@
set -euo pipefail
cd "$(dirname "$0")"
# Auto-install auth-rt if not found
if ! command -v auth-rt &>/dev/null && [ ! -x "$HOME/.local/bin/auth-rt" ]; then
echo "auth-rt not found, installing..."
_FORGEJO="http://192.168.0.108:3030"
_OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
_ARCH="$(uname -m)"; case "$_ARCH" in x86_64) _ARCH="amd64";; aarch64) _ARCH="arm64";; esac
_URL="$_FORGEJO/agent-skills/auth-runtime/releases/download/latest/auth-rt-${_OS}-${_ARCH}"
mkdir -p "$HOME/.local/bin"
if curl -fsSL "$_URL" -o "$HOME/.local/bin/auth-rt" 2>/dev/null; then
chmod +x "$HOME/.local/bin/auth-rt"
echo "auth-rt installed (downloaded)"
else
echo "Download failed, building from source..."
_SRC="$HOME/.local/share/auth-runtime"
if [ -d "$_SRC/.git" ]; then git -C "$_SRC" pull --ff-only
else git clone --depth 1 "$_FORGEJO/agent-skills/auth-runtime.git" "$_SRC"
fi
bash "$_SRC/install.sh"
fi
fi
bun install
echo "1688-logistics-scraper installed."
echo ""
echo "Prerequisites: Chrome must be running with remote debugging:"
echo " /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222"

View File

@ -12,14 +12,17 @@ Commands:
Examples:
bun scripts/run.ts scrape 'https://detail.1688.com/offer/852504650877.html'
bun scripts/run.ts scrape 'https://detail.1688.com/offer/852504650877.html' --dry-run
bun scripts/run.ts --port=18801 scrape 'https://detail.1688.com/offer/852504650877.html'
bun scripts/run.ts --port=9223 scrape 'https://detail.1688.com/offer/852504650877.html'
Prerequisites:
Chrome must be running with --remote-debugging-port=9222
`);
}
async function main(): Promise<void> {
const positionals: string[] = [];
let dryRun = false;
let port = 18800;
let port = 9222;
for (const arg of process.argv.slice(2)) {
if (arg === '--dry-run') {

View File

@ -1,119 +0,0 @@
/**
* Thin CLI wrapper for auth-runtime.
*
* Copy this file into your skill's src/ directory. It calls the
* `auth-rt` binary (a standalone Go executable), so the skill has
* zero npm/runtime dependency on auth-runtime.
*
* Prerequisites:
* `auth-rt` must be in PATH or at ~/.local/bin/auth-rt
* (install.sh handles this automatically)
*
* Usage:
* import { createSkillClient } from './auth-cli.ts';
* const client = createSkillClient();
* const res = await client.post('/ecom/tasks/scrape', { url: '...' });
*/
import { spawnSync } from 'child_process';
import * as path from 'path';
import * as os from 'os';
const home = process.env.HOME || os.homedir();
const AUTH_RT_BIN = process.env.AUTH_RT_BIN
|| (() => {
// Check if auth-rt is in PATH
const which = spawnSync('which', ['auth-rt'], { encoding: 'utf-8' });
if (which.status === 0 && which.stdout.trim()) {
return which.stdout.trim();
}
return path.join(home, '.local', 'bin', 'auth-rt');
})();
export interface ApiResponse {
status: number;
body: string;
}
export interface SessionResponse {
accessToken: string;
expiresIn: number;
ownerSessionToken?: string;
hookUrl?: string;
hookToken?: string;
}
export interface SkillClientOptions {
apiBase?: string;
dryRun?: boolean;
}
function runCli(...args: string[]): string {
const result = spawnSync(AUTH_RT_BIN, args, {
encoding: 'utf-8',
timeout: 60_000,
});
if (result.error) {
throw new Error(`auth-rt spawn failed: ${result.error.message}`);
}
if (result.status !== 0) {
throw new Error(`auth-rt failed (exit ${result.status}): ${(result.stderr || '').trim()}`);
}
return (result.stdout || '').trim();
}
export class SkillClient {
private readonly apiBase?: string;
private readonly dryRun: boolean;
constructor(options: SkillClientOptions = {}) {
this.apiBase = options.apiBase;
this.dryRun = options.dryRun ?? false;
}
async session(): Promise<SessionResponse> {
if (this.dryRun) {
return { accessToken: '<dry-run-token>', expiresIn: 900 };
}
return JSON.parse(runCli('session'));
}
async get(urlPath: string): Promise<ApiResponse> {
return this.request('GET', urlPath);
}
async post(urlPath: string, body?: unknown): Promise<ApiResponse> {
return this.request('POST', urlPath, body);
}
async put(urlPath: string, body?: unknown): Promise<ApiResponse> {
return this.request('PUT', urlPath, body);
}
async patch(urlPath: string, body?: unknown): Promise<ApiResponse> {
return this.request('PATCH', urlPath, body);
}
async delete(urlPath: string, body?: unknown): Promise<ApiResponse> {
return this.request('DELETE', urlPath, body);
}
private async request(method: string, urlPath: string, body?: unknown): Promise<ApiResponse> {
if (this.dryRun) {
return { status: 200, body: JSON.stringify({ dryRun: true, method, path: urlPath }) };
}
const args = ['request', method, urlPath];
if (body != null) {
args.push('--body', JSON.stringify(body));
}
if (this.apiBase) {
args.push('--api-base', this.apiBase);
}
return JSON.parse(runCli(...args));
}
}
export function createSkillClient(options?: SkillClientOptions): SkillClient {
return new SkillClient(options);
}

View File

@ -3,16 +3,50 @@ import * as path from 'path';
export type Command = 'scrape';
export interface LogisticsValue {
value: number | null;
unit: string | null;
source: string;
}
export interface Dimensions {
length: number | null;
width: number | null;
height: number | null;
unit: string | null;
source: string;
}
export interface LogisticsData {
weight: LogisticsValue | null;
dimensions: Dimensions | null;
grossWeight: LogisticsValue | null;
netWeight: LogisticsValue | null;
packageWeight: LogisticsValue | null;
volume: LogisticsValue | null;
shippingMethod: string | null;
shippingCost: string | null;
origin: string | null;
}
export interface VariantInfo {
name: string;
weight: LogisticsValue | null;
dimensions: Dimensions | null;
}
export interface ScrapeResult {
status: 'success' | 'failed';
url: string;
command: Command;
dryRun: boolean;
offerId: string;
productPackInfo?: unknown;
windowContext?: unknown;
screenshots?: string[];
product?: {
title: string;
logistics: LogisticsData;
variants: VariantInfo[];
};
detailImages?: string[];
rawAttributes?: Record<string, string>;
error?: string;
}
@ -28,10 +62,9 @@ class CdpSession {
private ws!: WebSocket;
private msgId = 0;
private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>();
private eventListeners = new Map<string, Array<(params: any) => void>>();
static async connect(port: number): Promise<CdpSession> {
const resp = await fetch(`http://localhost:${port}/json`);
const resp = await fetch(`http://127.0.0.1:${port}/json`);
const targets = (await resp.json()) as Array<{ webSocketDebuggerUrl: string; type: string }>;
const page = targets.find(t => t.type === 'page');
if (!page) throw new Error('No Chrome page tab found. Open a tab first.');
@ -46,20 +79,13 @@ class CdpSession {
this.ws.onopen = () => resolve();
this.ws.onerror = (e: any) => reject(new Error(`WebSocket error: ${e.message || e}`));
this.ws.onmessage = (ev: MessageEvent) => {
const msg = JSON.parse(typeof ev.data === 'string' ? ev.data : ev.data.toString());
// Handle command responses
const msg: CdpResult = JSON.parse(typeof ev.data === 'string' ? ev.data : ev.data.toString());
if (msg.id != null && this.pending.has(msg.id)) {
const p = this.pending.get(msg.id)!;
this.pending.delete(msg.id);
if (msg.error) p.reject(new Error(msg.error.message));
else p.resolve(msg.result);
}
// Handle events
if (msg.method && this.eventListeners.has(msg.method)) {
for (const fn of this.eventListeners.get(msg.method)!) {
fn(msg.params);
}
}
};
});
}
@ -72,117 +98,145 @@ class CdpSession {
});
}
waitForEvent(event: string, timeoutMs: number = 30000): Promise<any> {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
cleanup();
reject(new Error(`Timeout waiting for ${event}`));
}, timeoutMs);
const handler = (params: any) => {
cleanup();
resolve(params);
};
const cleanup = () => {
clearTimeout(timer);
const listeners = this.eventListeners.get(event);
if (listeners) {
const idx = listeners.indexOf(handler);
if (idx >= 0) listeners.splice(idx, 1);
}
};
if (!this.eventListeners.has(event)) this.eventListeners.set(event, []);
this.eventListeners.get(event)!.push(handler);
});
}
async evaluate(expression: string): Promise<any> {
const res = await this.send('Runtime.evaluate', { expression, returnByValue: true });
return res?.result?.value;
}
async captureScreenshot(format: string = 'png'): Promise<Buffer> {
const res = await this.send('Page.captureScreenshot', {
format,
captureBeyondViewport: false,
});
return Buffer.from(res.data, 'base64');
}
close() {
try { this.ws.close(); } catch {}
}
}
// --- Helpers ---
// --- Parsers ---
const WEIGHT_KEYS = ['重量', '毛重', '净重', '单件重量', '包装重量', '产品重量', '单品重量', 'weight'];
const DIMENSION_KEYS = ['尺寸', '规格', '长宽高', '外箱尺寸', '包装尺寸', '产品尺寸', '大小', 'size', 'dimensions'];
const VOLUME_KEYS = ['体积', '容积', 'volume'];
function extractOfferId(url: string): string {
return url.match(/offer\/(\d+)/)?.[1] || 'unknown';
}
async function scrollAndCapture(
cdp: CdpSession,
outputDir: string,
): Promise<string[]> {
fs.mkdirSync(outputDir, { recursive: true });
const saved: string[] = [];
// Get page height
const pageHeight: number = await cdp.evaluate(
'Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)'
) || 0;
const viewportHeight: number = await cdp.evaluate('window.innerHeight') || 900;
// Scroll through the page and capture viewport-sized screenshots
// Use 80% step to overlap slightly and avoid missing content at boundaries
const step = Math.floor(viewportHeight * 0.8);
let scrollY = 0;
let idx = 1;
while (scrollY < pageHeight) {
await cdp.evaluate(`window.scrollTo(0, ${scrollY})`);
await new Promise(r => setTimeout(r, 800)); // wait for lazy-load render
const buf = await cdp.captureScreenshot('png');
const filePath = path.join(outputDir, `page_${String(idx).padStart(3, '0')}.png`);
fs.writeFileSync(filePath, buf);
saved.push(filePath);
scrollY += step;
idx++;
}
return saved;
function parseWeight(raw: string): LogisticsValue | null {
const m = raw.match(/([\d.]+)\s*(kg|g|克|千克|公斤|斤)/i);
if (!m) return null;
let value = parseFloat(m[1]);
let unit = m[2].toLowerCase();
if (unit === 'g' || unit === '克') { value /= 1000; unit = 'kg'; }
if (unit === '千克' || unit === '公斤') unit = 'kg';
if (unit === '斤') { value *= 0.5; unit = 'kg'; }
return { value, unit, source: '' };
}
async function downloadDetailImages(
cdp: CdpSession,
outputDir: string,
): Promise<string[]> {
// Get all detail image URLs from the page
const imgUrls: string[] = JSON.parse(await cdp.evaluate(`
(function() {
const imgs = [], seen = new Set();
document.querySelectorAll('img').forEach(img => {
const src = img.src || img.dataset.src || img.dataset.lazySrc || '';
if (src && !seen.has(src) && (src.startsWith('http') || src.startsWith('//'))) {
// Filter for product detail images (skip tiny icons/avatars)
if (img.naturalWidth > 200 || img.width > 200 || !img.complete) {
seen.add(src);
imgs.push(src.startsWith('//') ? 'https:' + src : src);
}
}
});
return JSON.stringify(imgs);
})()
`) || '[]');
function parseDimensions(raw: string): Dimensions | null {
const m = raw.match(/([\d.]+)\s*[*xX×]\s*([\d.]+)\s*[*xX×]\s*([\d.]+)\s*(cm|mm|毫米|厘米|m|米)?/i);
if (!m) return null;
let [l, w, h] = [parseFloat(m[1]), parseFloat(m[2]), parseFloat(m[3])];
let unit = (m[4] || 'cm').toLowerCase();
if (unit === 'mm' || unit === '毫米') { l /= 10; w /= 10; h /= 10; unit = 'cm'; }
if (unit === '厘米') unit = 'cm';
if (unit === 'm' || unit === '米') { l *= 100; w *= 100; h *= 100; unit = 'cm'; }
return { length: l, width: w, height: h, unit, source: '' };
}
function parseVolume(raw: string): LogisticsValue | null {
const m = raw.match(/([\d.]+)\s*(m³|cm³|L|ml|升|毫升|立方米|立方厘米)/i);
if (!m) return null;
return { value: parseFloat(m[1]), unit: m[2], source: '' };
}
function matchKey(text: string, keys: string[]): boolean {
const lower = text.toLowerCase();
return keys.some(k => lower.includes(k.toLowerCase()));
}
// --- Page extraction ---
const JS_EXTRACT_ATTRS = `
(function() {
const attrs = {};
const sels = [
'.detail-attributes-list .attributes-item',
'.obj-leading .obj-content li',
'#mod-detail-attributes .attribute-item',
'.detail-info table tr',
'[class*="attribute"] li',
'[class*="param"] li',
'.offer-attr-list .offer-attr-item',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(el => {
const parts = el.textContent.trim().split(/[:]/);
if (parts.length >= 2) attrs[parts[0].trim()] = parts.slice(1).join(':').trim();
});
}
document.querySelectorAll('table tr, .detail-attributes-list tr').forEach(tr => {
const cells = tr.querySelectorAll('td, th');
if (cells.length >= 2) attrs[cells[0].textContent.trim()] = cells[1].textContent.trim();
});
return JSON.stringify(attrs);
})()`;
const JS_EXTRACT_VARIANTS = `
(function() {
const variants = [];
const sels = [
'.sku-item-wrapper .sku-item',
'[class*="sku"] [class*="item"]',
'.obj-sku .obj-content li',
'.unit-detail-spec-operator .spec-item',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(el => {
const name = el.textContent.trim().replace(/\\s+/g, ' ');
if (name && name.length < 200) variants.push({ name, text: el.textContent });
});
}
return JSON.stringify(variants);
})()`;
const JS_EXTRACT_TITLE = `
(function() {
for (const sel of ['.title-text','.detail-title-text','h1[class*="title"]','.mod-detail-title h1','.d-title']) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return document.title || '';
})()`;
const JS_EXTRACT_IMAGES = `
(function() {
const imgs = [], seen = new Set();
const sels = [
'#desc-lazyload-container img',
'.detail-desc-decorate-richtext img',
'[class*="detail-desc"] img',
'.mod-detail-description img',
'.offer-attr-item img',
'.desc-img-loaded img',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(img => {
const src = img.src || img.dataset.src || img.dataset.lazySrc || '';
if (src && !seen.has(src) && (src.startsWith('http') || src.startsWith('//'))) {
seen.add(src);
imgs.push(src.startsWith('//') ? 'https:' + src : src);
}
});
}
return JSON.stringify(imgs);
})()`;
async function downloadImages(urls: string[], outputDir: string): Promise<string[]> {
fs.mkdirSync(outputDir, { recursive: true });
const saved: string[] = [];
for (let i = 0; i < imgUrls.length; i++) {
for (let i = 0; i < urls.length; i++) {
try {
const resp = await fetch(imgUrls[i]);
const resp = await fetch(urls[i]);
if (!resp.ok) continue;
const buf = Buffer.from(await resp.arrayBuffer());
const ext = imgUrls[i].match(/\.(jpg|jpeg|png|webp|gif)/i)?.[1] || 'jpg';
const ext = urls[i].match(/\.(jpg|jpeg|png|webp|gif)/i)?.[1] || 'jpg';
const p = path.join(outputDir, `img_${String(i + 1).padStart(3, '0')}.${ext}`);
fs.writeFileSync(p, buf);
saved.push(p);
@ -197,24 +251,30 @@ export async function run(
command: Command,
args: string[],
dryRun: boolean,
cdpPort: number = 18800,
cdpPort: number = 9222,
): Promise<ScrapeResult> {
if (command !== 'scrape') {
return { status: 'failed', url: '', command, dryRun, offerId: '', error: `unknown command: ${command}` };
return { status: 'failed', url: '', command, dryRun, error: `unknown command: ${command}` };
}
const url = args[0];
if (!url) {
return { status: 'failed', url: '', command, dryRun, offerId: '', error: 'scrape requires <url>' };
return { status: 'failed', url: '', command, dryRun, error: 'scrape requires <url>' };
}
const offerId = extractOfferId(url);
if (dryRun) {
return {
status: 'success', url, command, dryRun, offerId,
screenshots: [],
status: 'success', url, command, dryRun,
product: {
title: '<dry-run>',
logistics: {
weight: null, dimensions: null, grossWeight: null, netWeight: null,
packageWeight: null, volume: null, shippingMethod: null, shippingCost: null, origin: null,
},
variants: [],
},
detailImages: [],
rawAttributes: {},
};
}
@ -224,76 +284,69 @@ export async function run(
await cdp.send('Page.enable');
await cdp.send('Runtime.enable');
await cdp.send('Page.navigate', { url });
// Set wide PC viewport to ensure tables fit without horizontal overflow
await cdp.send('Emulation.setDeviceMetricsOverride', {
width: 1920,
height: 1080,
deviceScaleFactor: 2,
mobile: false,
// Wait for load
await new Promise(r => setTimeout(r, 5000));
const title: string = await cdp.evaluate(JS_EXTRACT_TITLE) || '';
const rawAttributes: Record<string, string> = JSON.parse(await cdp.evaluate(JS_EXTRACT_ATTRS) || '{}');
const rawVariants: Array<{ name: string; text: string }> = JSON.parse(await cdp.evaluate(JS_EXTRACT_VARIANTS) || '[]');
const imgUrls: string[] = JSON.parse(await cdp.evaluate(JS_EXTRACT_IMAGES) || '[]');
const variants: VariantInfo[] = rawVariants.map(v => {
const weight = parseWeight(v.text);
const dimensions = parseDimensions(v.text);
if (weight) weight.source = 'variant';
if (dimensions) dimensions.source = 'variant';
return { name: v.name, weight, dimensions };
});
// Navigate and wait for page load event
const loadPromise = cdp.waitForEvent('Page.loadEventFired', 30000);
await cdp.send('Page.navigate', { url });
await loadPromise;
const logistics: LogisticsData = {
weight: null, dimensions: null, grossWeight: null, netWeight: null,
packageWeight: null, volume: null, shippingMethod: null, shippingCost: null, origin: null,
};
// Wait for networkIdle — poll until no pending requests for 1s
await cdp.evaluate(`
new Promise(resolve => {
let timer;
const reset = () => { clearTimeout(timer); timer = setTimeout(resolve, 1000); };
const observer = new PerformanceObserver(() => reset());
observer.observe({ entryTypes: ['resource'] });
reset();
})
`);
// Extract window.context.result.data
let productPackInfo: unknown = null;
let windowContext: unknown = null;
const ctx = await cdp.evaluate(`
(function() {
try {
const d = window.context && window.context.result && window.context.result.data;
if (d && d.productPackInfo) {
return JSON.stringify({
productPackInfo: d.productPackInfo,
productTitle: d.productTitle || null,
productAttributes: d.productAttributes || null,
skuSelection: d.skuSelection || null,
});
}
} catch(e) {}
return null;
})()
`);
if (ctx) {
const parsed = JSON.parse(ctx);
productPackInfo = parsed.productPackInfo;
windowContext = parsed;
for (const [key, val] of Object.entries(rawAttributes)) {
if (matchKey(key, ['毛重'])) {
logistics.grossWeight = parseWeight(val);
if (logistics.grossWeight) logistics.grossWeight.source = 'attributes';
} else if (matchKey(key, ['净重'])) {
logistics.netWeight = parseWeight(val);
if (logistics.netWeight) logistics.netWeight.source = 'attributes';
} else if (matchKey(key, ['包装重量'])) {
logistics.packageWeight = parseWeight(val);
if (logistics.packageWeight) logistics.packageWeight.source = 'attributes';
} else if (matchKey(key, WEIGHT_KEYS)) {
logistics.weight = parseWeight(val);
if (logistics.weight) logistics.weight.source = 'attributes';
}
if (matchKey(key, DIMENSION_KEYS)) {
logistics.dimensions = parseDimensions(val);
if (logistics.dimensions) logistics.dimensions.source = 'attributes';
}
if (matchKey(key, VOLUME_KEYS)) {
logistics.volume = parseVolume(val);
if (logistics.volume) logistics.volume.source = 'attributes';
}
if (matchKey(key, ['产地', '发货地', '所在地'])) {
logistics.origin = val;
}
}
const outputDir = path.join('/tmp', '1688-logistics', offerId);
// Capture full-page screenshots (scrolling)
const screenshotDir = path.join(outputDir, 'screenshots');
const screenshots = await scrollAndCapture(cdp, screenshotDir);
// Download detail images
const imgDir = path.join(outputDir, 'images');
const detailImages = await downloadDetailImages(cdp, imgDir);
const offerId = extractOfferId(url);
const imgDir = path.join('/tmp', '1688-logistics', offerId);
const detailImages = await downloadImages(imgUrls, imgDir);
return {
status: 'success', url, command, dryRun, offerId,
productPackInfo,
windowContext,
screenshots,
status: 'success', url, command, dryRun,
product: { title, logistics, variants },
detailImages,
rawAttributes,
};
} catch (error) {
return {
status: 'failed', url, command, dryRun, offerId,
status: 'failed', url, command, dryRun,
error: error instanceof Error ? error.message : String(error),
};
} finally {