commit 8b73c50a43527a97b11b1db139db176dbaee0441 Author: ivanberry Date: Thu Mar 12 07:36:59 2026 +0800 feat: initial commit diff --git a/.forgejo/workflows/register-skill-release.yml b/.forgejo/workflows/register-skill-release.yml new file mode 100644 index 0000000..38bc37a --- /dev/null +++ b/.forgejo/workflows/register-skill-release.yml @@ -0,0 +1,125 @@ +name: register-skill-release + +on: + release: + types: [published] + workflow_dispatch: + inputs: + skill_slug: + description: Skill slug override (optional) + required: false + skill_subpath: + description: Skill folder path override (optional) + required: false + skill_doc_path: + description: Skill doc path override + required: false + default: SKILL.md + skill_version: + description: Version override (default tag name) + required: false + +jobs: + register-skill-version: + runs-on: ubuntu-latest + env: + API_BASE: ${{ vars.API_BASE || secrets.API_BASE }} + CLIENT_KEY: ${{ secrets.CLIENT_KEY }} + SKILL_VERSION: ${{ github.event.inputs.skill_version || github.ref_name }} + SKILL_SUBPATH: ${{ github.event.inputs.skill_subpath || vars.SKILL_SUBPATH || secrets.SKILL_SUBPATH }} + SKILL_DOC_PATH: ${{ github.event.inputs.skill_doc_path || vars.SKILL_DOC_PATH || secrets.SKILL_DOC_PATH || 'SKILL.md' }} + SKILL_SLUG: ${{ github.event.inputs.skill_slug || vars.SKILL_SLUG || secrets.SKILL_SLUG }} + RELEASE_NOTE: ${{ github.event.release.body }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Load skill doc content + shell: bash + run: | + set -euo pipefail + DOC_ABS_PATH="${SKILL_SUBPATH:+$SKILL_SUBPATH/}${SKILL_DOC_PATH}" + if [ ! -f "$DOC_ABS_PATH" ]; then + if [ -f "${SKILL_SUBPATH:+$SKILL_SUBPATH/}README.md" ]; then + DOC_ABS_PATH="${SKILL_SUBPATH:+$SKILL_SUBPATH/}README.md" + export SKILL_DOC_PATH="README.md" + else + echo "skill doc not found: $DOC_ABS_PATH" + exit 1 + fi + fi + + jq -Rs . < "$DOC_ABS_PATH" > /tmp/skill_doc.json + + - name: Register version to business system + shell: bash + run: | + set -euo pipefail + + if [ -z "${API_BASE:-}" ]; then + echo "API_BASE is required (global/repo var or secret)." + exit 1 + fi + if [ -z "${CLIENT_KEY:-}" ]; then + echo "CLIENT_KEY is required (secret)." + exit 1 + fi + + SKILL_BASE_DIR="${SKILL_SUBPATH:-.}" + + if [ -z "${SKILL_SLUG:-}" ]; then + if [ -f "${SKILL_BASE_DIR}/package.json" ]; then + PKG_NAME=$(jq -r '.name // empty' "${SKILL_BASE_DIR}/package.json") + if [ -n "$PKG_NAME" ]; then + # Strip npm scope: @scope/skill-name -> skill-name + SKILL_SLUG="${PKG_NAME##*/}" + fi + fi + fi + + if [ -z "${SKILL_SLUG:-}" ]; then + if [ -f "${SKILL_BASE_DIR}/pyproject.toml" ]; then + PYPROJECT_NAME=$(python3 -c "import sys,tomllib; p=sys.argv[1]; d=tomllib.load(open(p,'rb')); print((d.get('project',{}).get('name') or d.get('tool',{}).get('poetry',{}).get('name') or ''))" "${SKILL_BASE_DIR}/pyproject.toml" 2>/dev/null || true) + if [ -n "$PYPROJECT_NAME" ]; then + SKILL_SLUG="${PYPROJECT_NAME##*/}" + fi + fi + fi + + if [ -z "${SKILL_SLUG:-}" ]; then + SKILL_SLUG="${GITHUB_REPOSITORY##*/}" + fi + + SESSION_RES=$(curl -sS -X POST "${API_BASE}/auth/skill-credit/session" \ + -H "Content-Type: application/json" \ + -d "{\"clientKey\":\"${CLIENT_KEY}\"}") + ACCESS_TOKEN=$(printf '%s' "$SESSION_RES" | jq -r '.accessToken // empty') + if [ -z "$ACCESS_TOKEN" ]; then + echo "failed to exchange access token from client key" + echo "$SESSION_RES" + exit 1 + fi + + RUNTIME_META=$(jq -nc --arg entry "${SKILL_SUBPATH:+$SKILL_SUBPATH/}scripts" '{entry_hint:$entry, provider:"forgejo"}') + + cat > /tmp/register_payload.json < [args] [--dry-run] +``` + +### Commands + +| Command | Description | +|---------|-------------| +| `session` | Get session token | +| `scrape-url [translate]` | Scrape a 1688 URL | +| `scrape-payload ` | Scrape with custom payload | + +### Examples + +```bash +# Scrape a product URL +bun dist/run.js scrape-url 'https://detail.1688.com/offer/852504650877.html' + +# With translation +bun dist/run.js scrape-url 'https://detail.1688.com/offer/852504650877.html' true + +# Dry run +bun dist/run.js scrape-url 'https://detail.1688.com/offer/852504650877.html' --dry-run +``` + +## Output + +Returns structured JSON with product data: +- Product info (title, price, description) +- Images (optimized) +- Variants/SKUs +- Supplier info + +## Reference + +See [references/1688-product-master.md](references/1688-product-master.md). diff --git a/agents/openai.yaml b/agents/openai.yaml new file mode 100644 index 0000000..4fca8d0 --- /dev/null +++ b/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "1688 Product Master" + short_description: "Run 1688 scrape via client key" + default_prompt: "[skill:1688-product-master] For agents that already have CLIENT_KEY: exchange token via /auth/skill-credit/session, then call /ecom/tasks/scrape with scrape payload." diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..75eec14 --- /dev/null +++ b/bun.lock @@ -0,0 +1,15 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "1688-product-master", + "dependencies": { + "@clawd/auth-runtime": "git+http://192.168.0.108:3030/agent-skills/auth-runtime.git", + }, + }, + }, + "packages": { + "@clawd/auth-runtime": ["@clawd/auth-runtime@git+http://192.168.0.108:3030/agent-skills/auth-runtime.git#70cf86889eecbe9c4649bb072cd971c3a560e889", {}, "70cf86889eecbe9c4649bb072cd971c3a560e889"], + } +} diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..316c7d9 --- /dev/null +++ b/install.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Install 1688-product-master to a target directory. +# Bundles the skill + auth-runtime into a single self-contained file. +# +# Usage: +# ./install.sh # installs to ~/.openclaw/skills/ +# ./install.sh /custom/path/ + +set -euo pipefail + +SKILL_NAME="1688-product-master" +DEST="${1:-$HOME/.openclaw/skills}" + +cd "$(dirname "$0")" + +echo "Building $SKILL_NAME..." +bun install --frozen-lockfile +bun build scripts/run.ts --outfile dist/run.js --target bun + +mkdir -p "$DEST/$SKILL_NAME" +cp dist/run.js "$DEST/$SKILL_NAME/run.js" + +echo "Installed: $DEST/$SKILL_NAME/run.js" +echo "Run with: bun $DEST/$SKILL_NAME/run.js [args...]" diff --git a/package.json b/package.json new file mode 100644 index 0000000..73c8c22 --- /dev/null +++ b/package.json @@ -0,0 +1,13 @@ +{ + "name": "1688-product-master", + "version": "1.0.0", + "type": "module", + "scripts": { + "run": "bun run scripts/run.ts", + "build": "bun build scripts/run.ts --outfile dist/run.js --target bun", + "package": "bun run build && cd .. && zip -r 1688-product-master.skill 1688-product-master/SKILL.md 1688-product-master/dist/run.js && echo 'Created: 1688-product-master.skill'" + }, + "dependencies": { + "@clawd/auth-runtime": "git+http://192.168.0.108:3030/agent-skills/auth-runtime.git" + } +} diff --git a/references/1688-product-master.md b/references/1688-product-master.md new file mode 100644 index 0000000..835a7fe --- /dev/null +++ b/references/1688-product-master.md @@ -0,0 +1,33 @@ +# 1688 Product Master Reference + +## 1. Runtime scrape mapping from original curl + +Original browser call target: + +- `POST /ecom/tasks/scrape` + +Runtime script behavior: + +1. Exchange client key: + - `POST /auth/skill-credit/session` + - body: `{ "clientKey": "" }` +2. Use returned `accessToken`: + - `Authorization: Bearer ` +3. Call scrape: + - `POST /ecom/tasks/scrape` + - `Content-Type: application/json` + - payload fields: + - `url` + - `optimizeImages` + - `optimizeTitles` + - `optimizeVariants` + - `needTranslate` +4. If runtime session is expired (`401/403`), `@clawd/auth-runtime` will refresh token and retry once automatically. + +The extra browser headers in the original curl (`sec-*`, `origin`, cookies, etc.) are not required by this skill flow. + +## 2. Notes + +- `clientKey` plaintext is only returned at key creation time. +- Store the returned `clientKey` securely and inject it as `CLIENT_KEY`. +- `/auth/skill-credit/clients*` endpoints are owner management APIs and are out of this runtime skill scope. diff --git a/scripts/1688-product-master.sh b/scripts/1688-product-master.sh new file mode 100755 index 0000000..5182a47 --- /dev/null +++ b/scripts/1688-product-master.sh @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + 1688-product-master.sh [args...] [--dry-run] + +Commands: + session + scrape-url <1688-url> [need-translate:true|false] + scrape-payload + +Examples: + CLIENT_KEY= 1688-product-master.sh scrape-url 'https://detail.1688.com/offer/852504650877.html' + CLIENT_KEY= 1688-product-master.sh scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html","optimizeImages":true,"optimizeTitles":true,"optimizeVariants":true,"needTranslate":false}' +EOF +} + +AUTH_BASE="${AUTH_BASE:-https://api-gw-test.yuanwei-lnc.com}" +AUTH_BASE="${AUTH_BASE%/}" +ECOM_BASE="${ECOM_BASE:-$AUTH_BASE}" +ECOM_BASE="${ECOM_BASE%/}" +CLIENT_KEY="${CLIENT_KEY:-}" + +DEFAULT_OPTIMIZE_IMAGES="${DEFAULT_OPTIMIZE_IMAGES:-true}" +DEFAULT_OPTIMIZE_TITLES="${DEFAULT_OPTIMIZE_TITLES:-true}" +DEFAULT_OPTIMIZE_VARIANTS="${DEFAULT_OPTIMIZE_VARIANTS:-true}" +DEFAULT_NEED_TRANSLATE="${DEFAULT_NEED_TRANSLATE:-false}" + +DRY_RUN=0 +POSITIONALS=() +for arg in "$@"; do + case "$arg" in + --dry-run) + DRY_RUN=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + POSITIONALS+=("$arg") + ;; + esac +done + +if [ "${#POSITIONALS[@]}" -lt 1 ]; then + usage + exit 1 +fi + +COMMAND="${POSITIONALS[0]}" + +if [ -z "$CLIENT_KEY" ]; then + echo "Missing CLIENT_KEY." >&2 + exit 1 +fi + +request_api() { + local method="$1" + local url="$2" + local auth_header="${3:-}" + local body="${4:-}" + local tmp_body status + tmp_body="$(mktemp)" + local curl_args=(-sS -o "$tmp_body" -w "%{http_code}" -X "$method" "$url") + if [ -n "$auth_header" ]; then + curl_args+=(-H "Authorization: Bearer $auth_header") + fi + if [ -n "$body" ]; then + curl_args+=(-H "Content-Type: application/json" --data "$body") + fi + status="$(curl "${curl_args[@]}")" + local response + response="$(cat "$tmp_body")" + rm -f "$tmp_body" + printf '%s\t%s\n' "$status" "$response" +} + +extract_status() { printf '%s' "${1%%$'\t'*}"; } +extract_body() { printf '%s' "${1#*$'\t'}"; } + +require_2xx() { + local status="$1" + local body="$2" + local context="$3" + if [ "$status" -lt 200 ] || [ "$status" -ge 300 ]; then + echo "Request failed at $context: HTTP $status" >&2 + echo "$body" >&2 + exit 1 + fi +} + +to_bool_json() { + local raw="$1" + python3 - "$raw" <<'PY' +import sys +v = (sys.argv[1] or "").strip().lower() +print("true" if v in ("1", "true", "yes", "y") else "false") +PY +} + +build_payload_from_url() { + local url="$1" + local need_translate_override="${2:-}" + python3 - "$url" "$DEFAULT_OPTIMIZE_IMAGES" "$DEFAULT_OPTIMIZE_TITLES" "$DEFAULT_OPTIMIZE_VARIANTS" "$DEFAULT_NEED_TRANSLATE" "$need_translate_override" <<'PY' +import json +import sys +url = (sys.argv[1] or "").strip() +if not url: + raise SystemExit("url is required") +def as_bool(raw): + return str(raw).strip().lower() in ("1", "true", "yes", "y") +payload = { + "url": url, + "optimizeImages": as_bool(sys.argv[2]), + "optimizeTitles": as_bool(sys.argv[3]), + "optimizeVariants": as_bool(sys.argv[4]), + "needTranslate": as_bool(sys.argv[5]), +} +override = (sys.argv[6] or "").strip() +if override: + payload["needTranslate"] = as_bool(override) +print(json.dumps(payload, ensure_ascii=False)) +PY +} + +validate_payload_json() { + local raw="$1" + python3 - "$raw" <<'PY' +import json +import sys +raw = sys.argv[1] +try: + data = json.loads(raw) +except Exception as exc: + raise SystemExit(f"invalid payload json: {exc}") +if not isinstance(data, dict): + raise SystemExit("payload must be a JSON object") +if not data.get("url"): + raise SystemExit("payload.url is required") +print(json.dumps(data, ensure_ascii=False)) +PY +} + +get_access_token() { + local session_payload + session_payload="$(python3 - "$CLIENT_KEY" <<'PY' +import json,sys +print(json.dumps({"clientKey": sys.argv[1]}, ensure_ascii=False)) +PY +)" + + if [ "$DRY_RUN" -eq 1 ]; then + echo '{"accessToken":"","ownerSessionToken":"","expiresAt":"2099-01-01T00:00:00.000Z"}' + return + fi + + local session_result session_status session_body + session_result="$(request_api "POST" "$AUTH_BASE/auth/skill-credit/session" "" "$session_payload")" + session_status="$(extract_status "$session_result")" + session_body="$(extract_body "$session_result")" + require_2xx "$session_status" "$session_body" "skill session" + echo "$session_body" +} + +json_get() { + local raw="$1" + local key="$2" + python3 - "$raw" "$key" <<'PY' +import json,sys +raw = sys.argv[1] +key = sys.argv[2] +try: + data = json.loads(raw) +except Exception: + print("") + raise SystemExit(0) +value = data.get(key, "") +if value is None: + value = "" +print(value) +PY +} + +cmd_session() { + local session_json + session_json="$(get_access_token)" + echo "$session_json" +} + +cmd_scrape_url() { + local url="${POSITIONALS[1]:-}" + local need_translate="${POSITIONALS[2]:-}" + if [ -z "$url" ]; then + echo "scrape-url requires <1688-url>" >&2 + exit 1 + fi + local payload + payload="$(build_payload_from_url "$url" "$need_translate")" + run_scrape_with_payload "$payload" +} + +cmd_scrape_payload() { + local raw_payload="${POSITIONALS[1]:-}" + if [ -z "$raw_payload" ]; then + echo "scrape-payload requires " >&2 + exit 1 + fi + local payload + payload="$(validate_payload_json "$raw_payload")" + run_scrape_with_payload "$payload" +} + +run_scrape_with_payload() { + local payload="$1" + local session_json access_token + session_json="$(get_access_token)" + access_token="$(json_get "$session_json" "accessToken")" + if [ -z "$access_token" ]; then + echo "missing accessToken from /auth/skill-credit/session response" >&2 + echo "$session_json" >&2 + exit 1 + fi + + if [ "$DRY_RUN" -eq 1 ]; then + echo "curl -sS -X POST \"$ECOM_BASE/ecom/tasks/scrape\" -H \"Authorization: Bearer \" -H \"Content-Type: application/json\" --data '$payload'" + return + fi + + local scrape_result scrape_status scrape_body + scrape_result="$(request_api "POST" "$ECOM_BASE/ecom/tasks/scrape" "$access_token" "$payload")" + scrape_status="$(extract_status "$scrape_result")" + scrape_body="$(extract_body "$scrape_result")" + require_2xx "$scrape_status" "$scrape_body" "ecom scrape" + + python3 - "$session_json" "$scrape_status" "$scrape_body" "$payload" <<'PY' +import json +import sys +session_raw, scrape_status, scrape_body_raw, payload_raw = sys.argv[1:] + +def parse_json(raw): + try: + return json.loads(raw) + except Exception: + return {"raw": raw} + +result = { + "status": "SUCCESS", + "requestPayload": parse_json(payload_raw), + "session": parse_json(session_raw), + "scrape": { + "httpStatus": int(scrape_status), + "body": parse_json(scrape_body_raw), + } +} +print(json.dumps(result, ensure_ascii=False)) +PY +} + +case "$COMMAND" in + session) cmd_session ;; + scrape-url) cmd_scrape_url ;; + scrape-payload) cmd_scrape_payload ;; + *) + echo "Unknown command: $COMMAND" >&2 + usage + exit 1 + ;; +esac diff --git a/scripts/1688-product-master.sh.bak b/scripts/1688-product-master.sh.bak new file mode 100755 index 0000000..5182a47 --- /dev/null +++ b/scripts/1688-product-master.sh.bak @@ -0,0 +1,271 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + 1688-product-master.sh [args...] [--dry-run] + +Commands: + session + scrape-url <1688-url> [need-translate:true|false] + scrape-payload + +Examples: + CLIENT_KEY= 1688-product-master.sh scrape-url 'https://detail.1688.com/offer/852504650877.html' + CLIENT_KEY= 1688-product-master.sh scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html","optimizeImages":true,"optimizeTitles":true,"optimizeVariants":true,"needTranslate":false}' +EOF +} + +AUTH_BASE="${AUTH_BASE:-https://api-gw-test.yuanwei-lnc.com}" +AUTH_BASE="${AUTH_BASE%/}" +ECOM_BASE="${ECOM_BASE:-$AUTH_BASE}" +ECOM_BASE="${ECOM_BASE%/}" +CLIENT_KEY="${CLIENT_KEY:-}" + +DEFAULT_OPTIMIZE_IMAGES="${DEFAULT_OPTIMIZE_IMAGES:-true}" +DEFAULT_OPTIMIZE_TITLES="${DEFAULT_OPTIMIZE_TITLES:-true}" +DEFAULT_OPTIMIZE_VARIANTS="${DEFAULT_OPTIMIZE_VARIANTS:-true}" +DEFAULT_NEED_TRANSLATE="${DEFAULT_NEED_TRANSLATE:-false}" + +DRY_RUN=0 +POSITIONALS=() +for arg in "$@"; do + case "$arg" in + --dry-run) + DRY_RUN=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + POSITIONALS+=("$arg") + ;; + esac +done + +if [ "${#POSITIONALS[@]}" -lt 1 ]; then + usage + exit 1 +fi + +COMMAND="${POSITIONALS[0]}" + +if [ -z "$CLIENT_KEY" ]; then + echo "Missing CLIENT_KEY." >&2 + exit 1 +fi + +request_api() { + local method="$1" + local url="$2" + local auth_header="${3:-}" + local body="${4:-}" + local tmp_body status + tmp_body="$(mktemp)" + local curl_args=(-sS -o "$tmp_body" -w "%{http_code}" -X "$method" "$url") + if [ -n "$auth_header" ]; then + curl_args+=(-H "Authorization: Bearer $auth_header") + fi + if [ -n "$body" ]; then + curl_args+=(-H "Content-Type: application/json" --data "$body") + fi + status="$(curl "${curl_args[@]}")" + local response + response="$(cat "$tmp_body")" + rm -f "$tmp_body" + printf '%s\t%s\n' "$status" "$response" +} + +extract_status() { printf '%s' "${1%%$'\t'*}"; } +extract_body() { printf '%s' "${1#*$'\t'}"; } + +require_2xx() { + local status="$1" + local body="$2" + local context="$3" + if [ "$status" -lt 200 ] || [ "$status" -ge 300 ]; then + echo "Request failed at $context: HTTP $status" >&2 + echo "$body" >&2 + exit 1 + fi +} + +to_bool_json() { + local raw="$1" + python3 - "$raw" <<'PY' +import sys +v = (sys.argv[1] or "").strip().lower() +print("true" if v in ("1", "true", "yes", "y") else "false") +PY +} + +build_payload_from_url() { + local url="$1" + local need_translate_override="${2:-}" + python3 - "$url" "$DEFAULT_OPTIMIZE_IMAGES" "$DEFAULT_OPTIMIZE_TITLES" "$DEFAULT_OPTIMIZE_VARIANTS" "$DEFAULT_NEED_TRANSLATE" "$need_translate_override" <<'PY' +import json +import sys +url = (sys.argv[1] or "").strip() +if not url: + raise SystemExit("url is required") +def as_bool(raw): + return str(raw).strip().lower() in ("1", "true", "yes", "y") +payload = { + "url": url, + "optimizeImages": as_bool(sys.argv[2]), + "optimizeTitles": as_bool(sys.argv[3]), + "optimizeVariants": as_bool(sys.argv[4]), + "needTranslate": as_bool(sys.argv[5]), +} +override = (sys.argv[6] or "").strip() +if override: + payload["needTranslate"] = as_bool(override) +print(json.dumps(payload, ensure_ascii=False)) +PY +} + +validate_payload_json() { + local raw="$1" + python3 - "$raw" <<'PY' +import json +import sys +raw = sys.argv[1] +try: + data = json.loads(raw) +except Exception as exc: + raise SystemExit(f"invalid payload json: {exc}") +if not isinstance(data, dict): + raise SystemExit("payload must be a JSON object") +if not data.get("url"): + raise SystemExit("payload.url is required") +print(json.dumps(data, ensure_ascii=False)) +PY +} + +get_access_token() { + local session_payload + session_payload="$(python3 - "$CLIENT_KEY" <<'PY' +import json,sys +print(json.dumps({"clientKey": sys.argv[1]}, ensure_ascii=False)) +PY +)" + + if [ "$DRY_RUN" -eq 1 ]; then + echo '{"accessToken":"","ownerSessionToken":"","expiresAt":"2099-01-01T00:00:00.000Z"}' + return + fi + + local session_result session_status session_body + session_result="$(request_api "POST" "$AUTH_BASE/auth/skill-credit/session" "" "$session_payload")" + session_status="$(extract_status "$session_result")" + session_body="$(extract_body "$session_result")" + require_2xx "$session_status" "$session_body" "skill session" + echo "$session_body" +} + +json_get() { + local raw="$1" + local key="$2" + python3 - "$raw" "$key" <<'PY' +import json,sys +raw = sys.argv[1] +key = sys.argv[2] +try: + data = json.loads(raw) +except Exception: + print("") + raise SystemExit(0) +value = data.get(key, "") +if value is None: + value = "" +print(value) +PY +} + +cmd_session() { + local session_json + session_json="$(get_access_token)" + echo "$session_json" +} + +cmd_scrape_url() { + local url="${POSITIONALS[1]:-}" + local need_translate="${POSITIONALS[2]:-}" + if [ -z "$url" ]; then + echo "scrape-url requires <1688-url>" >&2 + exit 1 + fi + local payload + payload="$(build_payload_from_url "$url" "$need_translate")" + run_scrape_with_payload "$payload" +} + +cmd_scrape_payload() { + local raw_payload="${POSITIONALS[1]:-}" + if [ -z "$raw_payload" ]; then + echo "scrape-payload requires " >&2 + exit 1 + fi + local payload + payload="$(validate_payload_json "$raw_payload")" + run_scrape_with_payload "$payload" +} + +run_scrape_with_payload() { + local payload="$1" + local session_json access_token + session_json="$(get_access_token)" + access_token="$(json_get "$session_json" "accessToken")" + if [ -z "$access_token" ]; then + echo "missing accessToken from /auth/skill-credit/session response" >&2 + echo "$session_json" >&2 + exit 1 + fi + + if [ "$DRY_RUN" -eq 1 ]; then + echo "curl -sS -X POST \"$ECOM_BASE/ecom/tasks/scrape\" -H \"Authorization: Bearer \" -H \"Content-Type: application/json\" --data '$payload'" + return + fi + + local scrape_result scrape_status scrape_body + scrape_result="$(request_api "POST" "$ECOM_BASE/ecom/tasks/scrape" "$access_token" "$payload")" + scrape_status="$(extract_status "$scrape_result")" + scrape_body="$(extract_body "$scrape_result")" + require_2xx "$scrape_status" "$scrape_body" "ecom scrape" + + python3 - "$session_json" "$scrape_status" "$scrape_body" "$payload" <<'PY' +import json +import sys +session_raw, scrape_status, scrape_body_raw, payload_raw = sys.argv[1:] + +def parse_json(raw): + try: + return json.loads(raw) + except Exception: + return {"raw": raw} + +result = { + "status": "SUCCESS", + "requestPayload": parse_json(payload_raw), + "session": parse_json(session_raw), + "scrape": { + "httpStatus": int(scrape_status), + "body": parse_json(scrape_body_raw), + } +} +print(json.dumps(result, ensure_ascii=False)) +PY +} + +case "$COMMAND" in + session) cmd_session ;; + scrape-url) cmd_scrape_url ;; + scrape-payload) cmd_scrape_payload ;; + *) + echo "Unknown command: $COMMAND" >&2 + usage + exit 1 + ;; +esac diff --git a/scripts/run.ts b/scripts/run.ts new file mode 100755 index 0000000..de08db8 --- /dev/null +++ b/scripts/run.ts @@ -0,0 +1,107 @@ +#!/usr/bin/env bun +import type { Command } from '../src/types.js'; +import { run1688 } from '../src/index.js'; + +/** + * 注意:从 v2.0 开始,不再需要 .env.local 文件 + * 配置已迁移到全局文件 ~/.openclaw/.env + * + * 所有 skill 共享同一份配置,无需在每个 skill 中重复配置。 + * + * 创建全局配置: + * cp ~/.openclaw/.env.example ~/.openclaw/.env + * vi ~/.openclaw/.env # 填入 CLIENT_KEY + */ + +function printUsage(): void { + console.error(`Usage: + bun run scripts/run.ts [--client-key=] [--auth-base=] [--ecom-base=] [args...] [--dry-run] + +Commands: + session + scrape-url <1688-url> [translate] + scrape-payload + +Examples: + bun run scripts/run.ts scrape-url 'https://detail.1688.com/offer/852504650877.html' + bun run scripts/run.ts scrape-url 'https://detail.1688.com/offer/852504650877.html' true + bun run scripts/run.ts scrape-url 'https://detail.1688.com/offer/852504650877.html' --dry-run + bun run scripts/run.ts scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html"}' + +配置: + 全局配置文件:~/.openclaw/.env + 命令行参数优先级高于全局配置 +`); +} + +type CliArgs = { + command: Command; + args: string[]; + dryRun: boolean; + clientKey?: string; + authBase?: string; + ecomBase?: string; +}; + +function parseArgs(argv: string[]): CliArgs | null { + const positionals: string[] = []; + let dryRun = false; + let clientKey: string | undefined; + let authBase: string | undefined; + let ecomBase: string | undefined; + + for (const arg of argv) { + if (arg === '--dry-run') { + dryRun = true; + } else if (arg.startsWith('--client-key=')) { + clientKey = arg.slice('--client-key='.length).trim(); + } else if (arg.startsWith('--auth-base=')) { + authBase = arg.slice('--auth-base='.length).trim().replace(/\/$/, ''); + } else if (arg.startsWith('--ecom-base=')) { + ecomBase = arg.slice('--ecom-base='.length).trim().replace(/\/$/, ''); + } else if (arg === '-h' || arg === '--help') { + printUsage(); + process.exit(0); + } else { + positionals.push(arg); + } + } + + if (positionals.length < 1) { + return null; + } + + const command = positionals[0] as Command; + const args = positionals.slice(1); + return { command, args, dryRun, clientKey, authBase, ecomBase }; +} + +async function main(): Promise { + // 不再加载 .env.local,直接使用全局配置 ~/.openclaw/.env + // auth-runtime 会自动加载全局配置 + + const parsed = parseArgs(process.argv.slice(2)); + if (!parsed) { + printUsage(); + process.exit(1); + } + + // 命令行参数覆盖全局配置 + if (parsed.clientKey) process.env.CLIENT_KEY = parsed.clientKey; + if (parsed.authBase) process.env.AUTH_BASE = parsed.authBase; + if (parsed.ecomBase) process.env.ECOM_BASE = parsed.ecomBase; + + const result = await run1688(parsed.command, parsed.args, parsed.dryRun); + + console.log(JSON.stringify(result, null, 2)); +} + +main().catch((error) => { + console.error(JSON.stringify({ + status: 'failed', + error: error instanceof Error ? error.message : String(error), + command: '', + dryRun: false, + }, null, 2)); + process.exit(1); +}); diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..cce9543 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,149 @@ +import type { Command, OutputResult, ScrapePayload } from './types.js'; +import { createEnvConfig, getAccessToken, fetchSessionJson } from '@clawd/auth-runtime'; +import { buildPayloadFromUrl, validatePayloadJson, scrapeProduct } from './scrape.js'; + +export async function run1688( + command: Command, + args: string[], + dryRun: boolean = false, +): Promise { + const config = createEnvConfig(); + const ecomBase = (process.env.ECOM_BASE || config.authBase).replace(/\/$/, ''); + + if (!config.clientKey) { + return failed(command, dryRun, 'missing required env: CLIENT_KEY'); + } + + switch (command) { + case 'session': + return runSession(command, dryRun, config); + case 'scrape-url': + return runScrapeUrl(command, dryRun, config, ecomBase, args); + case 'scrape-payload': + return runScrapePayload(command, dryRun, config, ecomBase, args); + default: + return failed(command, dryRun, `unknown command: ${command}`); + } +} + +async function runSession( + command: string, + dryRun: boolean, + config: ReturnType, +): Promise { + const session = await fetchSessionJson(dryRun, config); + return { status: 'success', error: null, command, dryRun, session }; +} + +async function runScrapeUrl( + command: string, + dryRun: boolean, + config: ReturnType, + ecomBase: string, + args: string[], +): Promise { + const url = args[0]; + if (!url) { + return failed(command, dryRun, 'scrape-url requires <1688-url>'); + } + + const defaults = readDefaults(); + const payload = buildPayloadFromUrl(url, args[1] || '', defaults); + return runScrape(command, dryRun, config, ecomBase, payload); +} + +async function runScrapePayload( + command: string, + dryRun: boolean, + config: ReturnType, + ecomBase: string, + args: string[], +): Promise { + const rawPayload = args[0]; + if (!rawPayload) { + return failed(command, dryRun, 'scrape-payload requires '); + } + + let payload: ScrapePayload; + try { + payload = validatePayloadJson(rawPayload); + } catch (error) { + return failed(command, dryRun, error instanceof Error ? error.message : String(error)); + } + + return runScrape(command, dryRun, config, ecomBase, payload); +} + +async function runScrape( + command: string, + dryRun: boolean, + config: ReturnType, + ecomBase: string, + payload: ScrapePayload, +): Promise { + if (dryRun) { + return { status: 'success', error: null, command, dryRun, requestPayload: payload, scrapeHttpStatus: 0, scrapeBody: null }; + } + + let accessToken: string; + try { + accessToken = await getAccessToken(dryRun, config); + } catch (error) { + return failed(command, dryRun, error instanceof Error ? error.message : 'failed to get access token'); + } + + const result = await scrapeProduct(config, ecomBase, payload, dryRun, accessToken); + + if (result.status < 200 || result.status >= 300) { + return failed(command, dryRun, `scrape failed: HTTP ${result.status}: ${result.body}`, payload, result.status); + } + + return { + status: 'success', + error: null, + command, + dryRun, + requestPayload: payload, + scrapeHttpStatus: result.status, + scrapeBody: parseJsonSafe(result.body), + }; +} + +function readDefaults() { + return { + optimizeImages: parseBoolean(process.env.DEFAULT_OPTIMIZE_IMAGES ?? 'true'), + optimizeTitles: parseBoolean(process.env.DEFAULT_OPTIMIZE_TITLES ?? 'true'), + optimizeVariants: parseBoolean(process.env.DEFAULT_OPTIMIZE_VARIANTS ?? 'true'), + needTranslate: parseBoolean(process.env.DEFAULT_NEED_TRANSLATE ?? 'false'), + }; +} + +function parseBoolean(value: unknown): boolean { + const str = String(value).trim().toLowerCase(); + return ['1', 'true', 'yes', 'y'].includes(str); +} + +function parseJsonSafe(raw: string): unknown { + try { + return JSON.parse(raw); + } catch { + return { raw }; + } +} + +function failed( + command: string, + dryRun: boolean, + error: string, + requestPayload?: ScrapePayload, + scrapeHttpStatus?: number, +): OutputResult { + return { + status: 'failed', + error, + command, + dryRun, + ...(requestPayload && { requestPayload }), + ...(scrapeHttpStatus !== undefined && { scrapeHttpStatus }), + }; +} diff --git a/src/scrape.ts b/src/scrape.ts new file mode 100644 index 0000000..88519dc --- /dev/null +++ b/src/scrape.ts @@ -0,0 +1,82 @@ +import type { ScrapePayload } from './types.js'; +import { requestApiWithAutoRefresh } from '@clawd/auth-runtime'; +import type { ApiResponse, EnvConfig } from '@clawd/auth-runtime'; + +type Defaults = { + optimizeImages: boolean; + optimizeTitles: boolean; + optimizeVariants: boolean; + needTranslate: boolean; +}; + +export function buildPayloadFromUrl( + url: string, + needTranslateOverride: string, + defaults: Defaults, +): ScrapePayload { + if (!url || url.trim() === '') { + throw new Error('url is required'); + } + + const payload: ScrapePayload = { + url: url.trim(), + optimizeImages: defaults.optimizeImages, + optimizeTitles: defaults.optimizeTitles, + optimizeVariants: defaults.optimizeVariants, + needTranslate: defaults.needTranslate, + }; + + if (needTranslateOverride && needTranslateOverride.trim() !== '') { + payload.needTranslate = parseBoolean(needTranslateOverride); + } + + return payload; +} + +export function validatePayloadJson(raw: string): ScrapePayload { + let data: unknown; + try { + data = JSON.parse(raw); + } catch (error) { + throw new Error(`invalid payload json: ${(error as SyntaxError).message}`); + } + + if (typeof data !== 'object' || data === null || Array.isArray(data)) { + throw new Error('payload must be a JSON object'); + } + + const obj = data as Record; + if (!obj.url) { + throw new Error('payload.url is required'); + } + + return { + url: obj.url as string, + optimizeImages: parseBoolean(obj.optimizeImages ?? true), + optimizeTitles: parseBoolean(obj.optimizeTitles ?? true), + optimizeVariants: parseBoolean(obj.optimizeVariants ?? true), + needTranslate: parseBoolean(obj.needTranslate ?? false), + }; +} + +export async function scrapeProduct( + config: EnvConfig, + ecomBase: string, + payload: ScrapePayload, + dryRun: boolean, + accessToken?: string, +): Promise { + return requestApiWithAutoRefresh( + 'POST', + `${ecomBase}/ecom/tasks/scrape`, + dryRun, + config, + JSON.stringify(payload), + accessToken, + ); +} + +function parseBoolean(value: unknown): boolean { + const str = String(value).trim().toLowerCase(); + return ['1', 'true', 'yes', 'y'].includes(str); +} diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..5efbe5b --- /dev/null +++ b/src/types.ts @@ -0,0 +1,26 @@ +export interface ScrapePayload { + url: string; + optimizeImages: boolean; + optimizeTitles: boolean; + optimizeVariants: boolean; + needTranslate: boolean; +} + +export interface ScrapeResponse { + [key: string]: unknown; +} + +export type ApiResponse = import("@clawd/auth-runtime").ApiResponse; + +export type Command = "session" | "scrape-url" | "scrape-payload"; + +export interface OutputResult { + status: 'success' | 'failed'; + error: string | null; + command: string; + dryRun: boolean; + session?: unknown; + requestPayload?: ScrapePayload; + scrapeHttpStatus?: number; + scrapeBody?: unknown; +}