refactor: replace DOM parsing with vision-based approach

Remove all CSS selectors, regex parsers, and structured extraction.
Instead, capture full-page screenshots (scrolling) and download detail
images. The model reads these directly with vision to extract logistics
data — no fragile DOM dependencies.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
ywkj 2026-03-30 12:11:24 +08:00
parent bff990628b
commit 87920a9503
2 changed files with 119 additions and 420 deletions

View File

@ -1,16 +1,16 @@
--- ---
name: 1688-logistics-scraper name: 1688-logistics-scraper
description: "Extract product weight/size/logistics data from 1688 product pages via Chrome browser, output structured JSON. Use when the user provides a 1688 product URL and needs logistics specs." description: "Scrape 1688 product pages via Chrome, capture full-page screenshots and detail images for vision-based extraction of weight/size/logistics data. Use when the user provides a 1688 product URL and needs logistics specs."
--- ---
# 1688 Logistics Scraper # 1688 Logistics Scraper
Extract product weight, size, and logistics data from 1688 product pages. Capture 1688 product pages for vision-based extraction of weight, size, and logistics data.
## Run ## Run
```bash ```bash
bun scripts/run.ts scrape <url> [--dry-run] bun scripts/run.ts scrape <url> [--dry-run] [--port=9222]
``` ```
### Examples ### Examples
@ -22,21 +22,12 @@ bun scripts/run.ts scrape 'https://detail.1688.com/offer/852504650877.html' --dr
## What It Does ## What It Does
1. Opens the 1688 product URL in the browser (port 18800) 1. Opens the 1688 product URL in the browser (default port 18800)
2. Extracts weight/size data from wherever it appears on the page — product attributes, variant specs, 包装信息, 商品件重尺 table, logistics section 2. Scrolls through the entire page, capturing **full-page screenshots** section by section
3. Downloads detail images (商品详情图片) for analysis — weight/size is often only in images 3. Downloads all **product detail images** (large images only, skips icons)
4. Outputs structured JSON 4. Saves everything to `/tmp/1688-logistics/<offer-id>/`
## Where To Look For Data **No DOM parsing or regex.** The model reads the screenshots and images directly to extract logistics data.
Weight/size data on 1688 pages hides in multiple places. Check all before giving up:
1. **Product attributes** (商品属性 / 商品参数) — key-value table, most reliable
2. **商品件重尺 table** — dedicated weight/dimensions/volume table for logistics
3. **包装信息 section** — packaging type, box weight, box dimensions, units per box
4. **Variant/SKU specs** — per-variant weight or size
5. **Logistics section** — shipping weight, volume, freight info
6. **Detail images** — downloaded to `/tmp/1688-logistics/<offer-id>/`, read them to find weight/size text baked into images
## Output ## Output
@ -44,47 +35,36 @@ Weight/size data on 1688 pages hides in multiple places. Check all before giving
{ {
"status": "success", "status": "success",
"url": "https://detail.1688.com/offer/...", "url": "https://detail.1688.com/offer/...",
"product": { "offerId": "852504650877",
"title": "产品标题", "screenshots": [
"logistics": { "/tmp/1688-logistics/852504650877/screenshots/page_001.png",
"weight": { "value": 0.5, "unit": "kg", "source": "attributes" }, "/tmp/1688-logistics/852504650877/screenshots/page_002.png",
"dimensions": { "length": 30, "width": 20, "height": 10, "unit": "cm", "source": "attributes" }, "/tmp/1688-logistics/852504650877/screenshots/page_003.png"
"grossWeight": null,
"netWeight": null,
"packageWeight": { "value": 2.0, "unit": "kg", "source": "packageInfo" },
"volume": null,
"shippingMethod": null,
"shippingCost": null,
"origin": null
},
"variants": [
{ "name": "颜色: 红色", "weight": null, "dimensions": null }
], ],
"packageInfo": { "detailImages": [
"packagingType": "纸箱", "/tmp/1688-logistics/852504650877/images/img_001.jpg",
"packagingWeight": { "value": 2.0, "unit": "kg", "source": "packageInfo" }, "/tmp/1688-logistics/852504650877/images/img_002.jpg"
"packagingDimensions": { "length": 40, "width": 30, "height": 20, "unit": "cm", "source": "packageInfo" }, ]
"unitsPerPackage": 50,
"raw": { "包装方式": "纸箱", "箱规": "40*30*20cm", "装箱数": "50" }
},
"pieceWeightSize": {
"weight": { "value": 0.5, "unit": "kg", "source": "pieceWeightSize" },
"dimensions": { "length": 30, "width": 20, "height": 10, "unit": "cm", "source": "pieceWeightSize" },
"volume": null,
"raw": { "重量": "500g", "尺寸": "30*20*10cm" }
}
},
"detailImages": ["/tmp/1688-logistics/852504650877/img_001.jpg"],
"rawAttributes": { "重量": "0.5kg", "尺寸": "30*20*10cm" }
} }
``` ```
`null` = not found in text. Check `detailImages` — the data may be in the images. ## After Running
Read the screenshots and images to extract:
- **Weight** (重量/毛重/净重/单件重量) — normalize to kg
- **Dimensions** (尺寸/长宽高) — normalize to cm
- **Volume** (体积/容积)
- **Package info** (包装信息) — packaging type, box weight, box dimensions, units per box
- **Piece weight/size** (商品件重尺) — per-piece logistics specs
- **Variant-specific** weight/size if shown per SKU
- **Shipping info** — method, cost, origin
Output the extracted data as structured JSON.
## Rules ## Rules
1. If the browser is not running, report the error. Do not try to launch it. 1. If the browser is not running, report the error. Do not try to launch it.
2. Check all data sources before reporting `null`. 2. No retries. If it fails, report as-is.
3. Normalize units: 克→kg, 毫米→cm. Keep raw values in `rawAttributes` and `raw` fields. 3. Read ALL screenshots — logistics data can appear anywhere on the page.
4. No retries. If it fails, report as-is. 4. Read detail images too — weight/size is often baked into product photos.
5. Trust page content. Do not guess values.

View File

@ -3,67 +3,14 @@ import * as path from 'path';
export type Command = 'scrape'; export type Command = 'scrape';
export interface LogisticsValue {
value: number | null;
unit: string | null;
source: string;
}
export interface Dimensions {
length: number | null;
width: number | null;
height: number | null;
unit: string | null;
source: string;
}
export interface LogisticsData {
weight: LogisticsValue | null;
dimensions: Dimensions | null;
grossWeight: LogisticsValue | null;
netWeight: LogisticsValue | null;
packageWeight: LogisticsValue | null;
volume: LogisticsValue | null;
shippingMethod: string | null;
shippingCost: string | null;
origin: string | null;
}
export interface VariantInfo {
name: string;
weight: LogisticsValue | null;
dimensions: Dimensions | null;
}
export interface PackageInfo {
packagingType: string | null;
packagingWeight: LogisticsValue | null;
packagingDimensions: Dimensions | null;
unitsPerPackage: number | null;
raw: Record<string, string>;
}
export interface PieceWeightSize {
weight: LogisticsValue | null;
dimensions: Dimensions | null;
volume: LogisticsValue | null;
raw: Record<string, string>;
}
export interface ScrapeResult { export interface ScrapeResult {
status: 'success' | 'failed'; status: 'success' | 'failed';
url: string; url: string;
command: Command; command: Command;
dryRun: boolean; dryRun: boolean;
product?: { offerId: string;
title: string; screenshots?: string[];
logistics: LogisticsData;
variants: VariantInfo[];
packageInfo: PackageInfo;
pieceWeightSize: PieceWeightSize;
};
detailImages?: string[]; detailImages?: string[];
rawAttributes?: Record<string, string>;
error?: string; error?: string;
} }
@ -120,213 +67,87 @@ class CdpSession {
return res?.result?.value; return res?.result?.value;
} }
async captureScreenshot(format: string = 'png'): Promise<Buffer> {
const res = await this.send('Page.captureScreenshot', {
format,
captureBeyondViewport: true,
});
return Buffer.from(res.data, 'base64');
}
close() { close() {
try { this.ws.close(); } catch {} try { this.ws.close(); } catch {}
} }
} }
// --- Parsers --- // --- Helpers ---
const WEIGHT_KEYS = ['重量', '毛重', '净重', '单件重量', '包装重量', '产品重量', '单品重量', 'weight'];
const DIMENSION_KEYS = ['尺寸', '规格', '长宽高', '外箱尺寸', '包装尺寸', '产品尺寸', '大小', 'size', 'dimensions'];
const VOLUME_KEYS = ['体积', '容积', 'volume'];
function extractOfferId(url: string): string { function extractOfferId(url: string): string {
return url.match(/offer\/(\d+)/)?.[1] || 'unknown'; return url.match(/offer\/(\d+)/)?.[1] || 'unknown';
} }
function parseWeight(raw: string): LogisticsValue | null { async function scrollAndCapture(
const m = raw.match(/([\d.]+)\s*(kg|g|克|千克|公斤|斤)/i); cdp: CdpSession,
if (!m) return null; outputDir: string,
let value = parseFloat(m[1]); ): Promise<string[]> {
let unit = m[2].toLowerCase(); fs.mkdirSync(outputDir, { recursive: true });
if (unit === 'g' || unit === '克') { value /= 1000; unit = 'kg'; } const saved: string[] = [];
if (unit === '千克' || unit === '公斤') unit = 'kg';
if (unit === '斤') { value *= 0.5; unit = 'kg'; } // Get page height
return { value, unit, source: '' }; const pageHeight: number = await cdp.evaluate(
'Math.max(document.body.scrollHeight, document.documentElement.scrollHeight)'
) || 0;
const viewportHeight: number = await cdp.evaluate('window.innerHeight') || 900;
// Scroll through the page and capture screenshots
let scrollY = 0;
let idx = 1;
while (scrollY < pageHeight) {
await cdp.evaluate(`window.scrollTo(0, ${scrollY})`);
await new Promise(r => setTimeout(r, 500)); // wait for render
const buf = await cdp.captureScreenshot('png');
const filePath = path.join(outputDir, `page_${String(idx).padStart(3, '0')}.png`);
fs.writeFileSync(filePath, buf);
saved.push(filePath);
scrollY += viewportHeight;
idx++;
} }
function parseDimensions(raw: string): Dimensions | null { return saved;
const m = raw.match(/([\d.]+)\s*[*xX×]\s*([\d.]+)\s*[*xX×]\s*([\d.]+)\s*(cm|mm|毫米|厘米|m|米)?/i);
if (!m) return null;
let [l, w, h] = [parseFloat(m[1]), parseFloat(m[2]), parseFloat(m[3])];
let unit = (m[4] || 'cm').toLowerCase();
if (unit === 'mm' || unit === '毫米') { l /= 10; w /= 10; h /= 10; unit = 'cm'; }
if (unit === '厘米') unit = 'cm';
if (unit === 'm' || unit === '米') { l *= 100; w *= 100; h *= 100; unit = 'cm'; }
return { length: l, width: w, height: h, unit, source: '' };
} }
function parseVolume(raw: string): LogisticsValue | null { async function downloadDetailImages(
const m = raw.match(/([\d.]+)\s*(m³|cm³|L|ml|升|毫升|立方米|立方厘米)/i); cdp: CdpSession,
if (!m) return null; outputDir: string,
return { value: parseFloat(m[1]), unit: m[2], source: '' }; ): Promise<string[]> {
} // Get all detail image URLs from the page
const imgUrls: string[] = JSON.parse(await cdp.evaluate(`
function matchKey(text: string, keys: string[]): boolean {
const lower = text.toLowerCase();
return keys.some(k => lower.includes(k.toLowerCase()));
}
// --- Page extraction ---
const JS_EXTRACT_ATTRS = `
(function() {
const attrs = {};
const sels = [
'.detail-attributes-list .attributes-item',
'.obj-leading .obj-content li',
'#mod-detail-attributes .attribute-item',
'.detail-info table tr',
'[class*="attribute"] li',
'[class*="param"] li',
'.offer-attr-list .offer-attr-item',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(el => {
const parts = el.textContent.trim().split(/[:]/);
if (parts.length >= 2) attrs[parts[0].trim()] = parts.slice(1).join(':').trim();
});
}
document.querySelectorAll('table tr, .detail-attributes-list tr').forEach(tr => {
const cells = tr.querySelectorAll('td, th');
if (cells.length >= 2) attrs[cells[0].textContent.trim()] = cells[1].textContent.trim();
});
return JSON.stringify(attrs);
})()`;
const JS_EXTRACT_VARIANTS = `
(function() {
const variants = [];
const sels = [
'.sku-item-wrapper .sku-item',
'[class*="sku"] [class*="item"]',
'.obj-sku .obj-content li',
'.unit-detail-spec-operator .spec-item',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(el => {
const name = el.textContent.trim().replace(/\\s+/g, ' ');
if (name && name.length < 200) variants.push({ name, text: el.textContent });
});
}
return JSON.stringify(variants);
})()`;
const JS_EXTRACT_TITLE = `
(function() {
for (const sel of ['.title-text','.detail-title-text','h1[class*="title"]','.mod-detail-title h1','.d-title']) {
const el = document.querySelector(sel);
if (el && el.textContent.trim()) return el.textContent.trim();
}
return document.title || '';
})()`;
const JS_EXTRACT_IMAGES = `
(function() { (function() {
const imgs = [], seen = new Set(); const imgs = [], seen = new Set();
const sels = [ document.querySelectorAll('img').forEach(img => {
'#desc-lazyload-container img',
'.detail-desc-decorate-richtext img',
'[class*="detail-desc"] img',
'.mod-detail-description img',
'.offer-attr-item img',
'.desc-img-loaded img',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(img => {
const src = img.src || img.dataset.src || img.dataset.lazySrc || ''; const src = img.src || img.dataset.src || img.dataset.lazySrc || '';
if (src && !seen.has(src) && (src.startsWith('http') || src.startsWith('//'))) { if (src && !seen.has(src) && (src.startsWith('http') || src.startsWith('//'))) {
// Filter for product detail images (skip tiny icons/avatars)
if (img.naturalWidth > 200 || img.width > 200 || !img.complete) {
seen.add(src); seen.add(src);
imgs.push(src.startsWith('//') ? 'https:' + src : src); imgs.push(src.startsWith('//') ? 'https:' + src : src);
} }
});
} }
});
return JSON.stringify(imgs); return JSON.stringify(imgs);
})()`; })()
`) || '[]');
const JS_EXTRACT_PACKAGE_INFO = `
(function() {
const data = {};
// 包装信息 section — various selector patterns on 1688
const sels = [
'[class*="package-info"] li',
'[class*="packaging"] li',
'[class*="pack-info"] li',
'[class*="baozhuang"] li',
'.detail-packing li',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(el => {
const parts = el.textContent.trim().split(/[:]/);
if (parts.length >= 2) data[parts[0].trim()] = parts.slice(1).join(':').trim();
});
}
// Also look for table rows inside 包装 sections
document.querySelectorAll('table').forEach(table => {
const header = table.previousElementSibling;
if (header && /包装/.test(header.textContent || '')) {
table.querySelectorAll('tr').forEach(tr => {
const cells = tr.querySelectorAll('td, th');
if (cells.length >= 2) data[cells[0].textContent.trim()] = cells[1].textContent.trim();
});
}
});
// Scan all key-value pairs for 包装 related keys
document.querySelectorAll('[class*="attribute"] li, [class*="param"] li, .offer-attr-list .offer-attr-item').forEach(el => {
const text = el.textContent.trim();
if (/包装/.test(text)) {
const parts = text.split(/[:]/);
if (parts.length >= 2) data[parts[0].trim()] = parts.slice(1).join(':').trim();
}
});
return JSON.stringify(data);
})()`;
const JS_EXTRACT_PIECE_WEIGHT_SIZE = `
(function() {
const data = {};
// 商品件重尺 table — dedicated logistics spec table on 1688
const sels = [
'[class*="piece-weight"] tr',
'[class*="jianzhongchi"] tr',
'[class*="weight-size"] tr',
'[class*="logistics-info"] tr',
'[class*="freight-info"] tr',
];
for (const sel of sels) {
document.querySelectorAll(sel).forEach(tr => {
const cells = tr.querySelectorAll('td, th');
if (cells.length >= 2) data[cells[0].textContent.trim()] = cells[1].textContent.trim();
});
}
// Scan tables preceded by headers containing 件重尺
document.querySelectorAll('table').forEach(table => {
const header = table.previousElementSibling;
if (header && /件重尺|物流|运费/.test(header.textContent || '')) {
table.querySelectorAll('tr').forEach(tr => {
const cells = tr.querySelectorAll('td, th');
if (cells.length >= 2) data[cells[0].textContent.trim()] = cells[1].textContent.trim();
});
}
});
// Also check spans/divs in logistics area
document.querySelectorAll('[class*="logistics"] [class*="item"], [class*="freight"] [class*="item"]').forEach(el => {
const parts = el.textContent.trim().split(/[:]/);
if (parts.length >= 2) data[parts[0].trim()] = parts.slice(1).join(':').trim();
});
return JSON.stringify(data);
})()`;
async function downloadImages(urls: string[], outputDir: string): Promise<string[]> {
fs.mkdirSync(outputDir, { recursive: true }); fs.mkdirSync(outputDir, { recursive: true });
const saved: string[] = []; const saved: string[] = [];
for (let i = 0; i < urls.length; i++) { for (let i = 0; i < imgUrls.length; i++) {
try { try {
const resp = await fetch(urls[i]); const resp = await fetch(imgUrls[i]);
if (!resp.ok) continue; if (!resp.ok) continue;
const buf = Buffer.from(await resp.arrayBuffer()); const buf = Buffer.from(await resp.arrayBuffer());
const ext = urls[i].match(/\.(jpg|jpeg|png|webp|gif)/i)?.[1] || 'jpg'; const ext = imgUrls[i].match(/\.(jpg|jpeg|png|webp|gif)/i)?.[1] || 'jpg';
const p = path.join(outputDir, `img_${String(i + 1).padStart(3, '0')}.${ext}`); const p = path.join(outputDir, `img_${String(i + 1).padStart(3, '0')}.${ext}`);
fs.writeFileSync(p, buf); fs.writeFileSync(p, buf);
saved.push(p); saved.push(p);
@ -344,29 +165,21 @@ export async function run(
cdpPort: number = 18800, cdpPort: number = 18800,
): Promise<ScrapeResult> { ): Promise<ScrapeResult> {
if (command !== 'scrape') { if (command !== 'scrape') {
return { status: 'failed', url: '', command, dryRun, error: `unknown command: ${command}` }; return { status: 'failed', url: '', command, dryRun, offerId: '', error: `unknown command: ${command}` };
} }
const url = args[0]; const url = args[0];
if (!url) { if (!url) {
return { status: 'failed', url: '', command, dryRun, error: 'scrape requires <url>' }; return { status: 'failed', url: '', command, dryRun, offerId: '', error: 'scrape requires <url>' };
} }
const offerId = extractOfferId(url);
if (dryRun) { if (dryRun) {
return { return {
status: 'success', url, command, dryRun, status: 'success', url, command, dryRun, offerId,
product: { screenshots: [],
title: '<dry-run>',
logistics: {
weight: null, dimensions: null, grossWeight: null, netWeight: null,
packageWeight: null, volume: null, shippingMethod: null, shippingCost: null, origin: null,
},
variants: [],
packageInfo: { packagingType: null, packagingWeight: null, packagingDimensions: null, unitsPerPackage: null, raw: {} },
pieceWeightSize: { weight: null, dimensions: null, volume: null, raw: {} },
},
detailImages: [], detailImages: [],
rawAttributes: {},
}; };
} }
@ -378,121 +191,27 @@ export async function run(
await cdp.send('Runtime.enable'); await cdp.send('Runtime.enable');
await cdp.send('Page.navigate', { url }); await cdp.send('Page.navigate', { url });
// Wait for load // Wait for page load + dynamic content
await new Promise(r => setTimeout(r, 5000)); await new Promise(r => setTimeout(r, 5000));
const title: string = await cdp.evaluate(JS_EXTRACT_TITLE) || ''; const outputDir = path.join('/tmp', '1688-logistics', offerId);
const rawAttributes: Record<string, string> = JSON.parse(await cdp.evaluate(JS_EXTRACT_ATTRS) || '{}');
const rawVariants: Array<{ name: string; text: string }> = JSON.parse(await cdp.evaluate(JS_EXTRACT_VARIANTS) || '[]');
const imgUrls: string[] = JSON.parse(await cdp.evaluate(JS_EXTRACT_IMAGES) || '[]');
const variants: VariantInfo[] = rawVariants.map(v => { // Capture full-page screenshots (scrolling)
const weight = parseWeight(v.text); const screenshotDir = path.join(outputDir, 'screenshots');
const dimensions = parseDimensions(v.text); const screenshots = await scrollAndCapture(cdp, screenshotDir);
if (weight) weight.source = 'variant';
if (dimensions) dimensions.source = 'variant';
return { name: v.name, weight, dimensions };
});
const logistics: LogisticsData = { // Download detail images
weight: null, dimensions: null, grossWeight: null, netWeight: null, const imgDir = path.join(outputDir, 'images');
packageWeight: null, volume: null, shippingMethod: null, shippingCost: null, origin: null, const detailImages = await downloadDetailImages(cdp, imgDir);
};
for (const [key, val] of Object.entries(rawAttributes)) {
if (matchKey(key, ['毛重'])) {
logistics.grossWeight = parseWeight(val);
if (logistics.grossWeight) logistics.grossWeight.source = 'attributes';
} else if (matchKey(key, ['净重'])) {
logistics.netWeight = parseWeight(val);
if (logistics.netWeight) logistics.netWeight.source = 'attributes';
} else if (matchKey(key, ['包装重量'])) {
logistics.packageWeight = parseWeight(val);
if (logistics.packageWeight) logistics.packageWeight.source = 'attributes';
} else if (matchKey(key, WEIGHT_KEYS)) {
logistics.weight = parseWeight(val);
if (logistics.weight) logistics.weight.source = 'attributes';
}
if (matchKey(key, DIMENSION_KEYS)) {
const parsed = parseDimensions(val);
if (parsed) { parsed.source = 'attributes'; logistics.dimensions = parsed; }
}
if (matchKey(key, VOLUME_KEYS)) {
const parsed = parseVolume(val);
if (parsed) { parsed.source = 'attributes'; logistics.volume = parsed; }
}
if (matchKey(key, ['产地', '发货地', '所在地'])) {
logistics.origin = val;
}
}
// Extract 包装信息
const rawPkgInfo: Record<string, string> = JSON.parse(await cdp.evaluate(JS_EXTRACT_PACKAGE_INFO) || '{}');
const packageInfo: PackageInfo = {
packagingType: null,
packagingWeight: null,
packagingDimensions: null,
unitsPerPackage: null,
raw: rawPkgInfo,
};
for (const [key, val] of Object.entries(rawPkgInfo)) {
if (matchKey(key, ['包装方式', '包装类型', '包装形式'])) packageInfo.packagingType = val;
if (matchKey(key, ['包装重量', '箱重'])) {
packageInfo.packagingWeight = parseWeight(val);
if (packageInfo.packagingWeight) packageInfo.packagingWeight.source = 'packageInfo';
}
if (matchKey(key, ['包装尺寸', '外箱尺寸', '箱规'])) {
packageInfo.packagingDimensions = parseDimensions(val);
if (packageInfo.packagingDimensions) packageInfo.packagingDimensions.source = 'packageInfo';
}
if (matchKey(key, ['装箱数', '每箱数量', '入数'])) {
const n = parseInt(val, 10);
if (!isNaN(n)) packageInfo.unitsPerPackage = n;
}
}
// Extract 商品件重尺
const rawPws: Record<string, string> = JSON.parse(await cdp.evaluate(JS_EXTRACT_PIECE_WEIGHT_SIZE) || '{}');
const pieceWeightSize: PieceWeightSize = {
weight: null,
dimensions: null,
volume: null,
raw: rawPws,
};
for (const [key, val] of Object.entries(rawPws)) {
if (matchKey(key, WEIGHT_KEYS)) {
pieceWeightSize.weight = parseWeight(val);
if (pieceWeightSize.weight) pieceWeightSize.weight.source = 'pieceWeightSize';
}
if (matchKey(key, DIMENSION_KEYS)) {
pieceWeightSize.dimensions = parseDimensions(val);
if (pieceWeightSize.dimensions) pieceWeightSize.dimensions.source = 'pieceWeightSize';
}
if (matchKey(key, VOLUME_KEYS)) {
pieceWeightSize.volume = parseVolume(val);
if (pieceWeightSize.volume) pieceWeightSize.volume.source = 'pieceWeightSize';
}
}
// Backfill logistics from pieceWeightSize if not found in attributes
if (!logistics.weight && pieceWeightSize.weight) logistics.weight = pieceWeightSize.weight;
if (!logistics.dimensions && pieceWeightSize.dimensions) logistics.dimensions = pieceWeightSize.dimensions;
if (!logistics.volume && pieceWeightSize.volume) logistics.volume = pieceWeightSize.volume;
if (!logistics.packageWeight && packageInfo.packagingWeight) logistics.packageWeight = packageInfo.packagingWeight;
const offerId = extractOfferId(url);
const imgDir = path.join('/tmp', '1688-logistics', offerId);
const detailImages = await downloadImages(imgUrls, imgDir);
return { return {
status: 'success', url, command, dryRun, status: 'success', url, command, dryRun, offerId,
product: { title, logistics, variants, packageInfo, pieceWeightSize }, screenshots,
detailImages, detailImages,
rawAttributes,
}; };
} catch (error) { } catch (error) {
return { return {
status: 'failed', url, command, dryRun, status: 'failed', url, command, dryRun, offerId,
error: error instanceof Error ? error.message : String(error), error: error instanceof Error ? error.message : String(error),
}; };
} finally { } finally {