#!/usr/bin/env bash set -euo pipefail usage() { cat <<'EOF' Usage: 1688-product-master.sh [args...] [--dry-run] Commands: session scrape-url <1688-url> [need-translate:true|false] scrape-payload Examples: CLIENT_KEY= 1688-product-master.sh scrape-url 'https://detail.1688.com/offer/852504650877.html' CLIENT_KEY= 1688-product-master.sh scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html","optimizeImages":true,"optimizeTitles":true,"optimizeVariants":true,"needTranslate":false}' EOF } AUTH_BASE="${AUTH_BASE:-https://api-gw-test.yuanwei-lnc.com}" AUTH_BASE="${AUTH_BASE%/}" ECOM_BASE="${ECOM_BASE:-$AUTH_BASE}" ECOM_BASE="${ECOM_BASE%/}" CLIENT_KEY="${CLIENT_KEY:-}" DEFAULT_OPTIMIZE_IMAGES="${DEFAULT_OPTIMIZE_IMAGES:-true}" DEFAULT_OPTIMIZE_TITLES="${DEFAULT_OPTIMIZE_TITLES:-true}" DEFAULT_OPTIMIZE_VARIANTS="${DEFAULT_OPTIMIZE_VARIANTS:-true}" DEFAULT_NEED_TRANSLATE="${DEFAULT_NEED_TRANSLATE:-false}" DRY_RUN=0 POSITIONALS=() for arg in "$@"; do case "$arg" in --dry-run) DRY_RUN=1 ;; -h|--help) usage exit 0 ;; *) POSITIONALS+=("$arg") ;; esac done if [ "${#POSITIONALS[@]}" -lt 1 ]; then usage exit 1 fi COMMAND="${POSITIONALS[0]}" if [ -z "$CLIENT_KEY" ]; then echo "Missing CLIENT_KEY." >&2 exit 1 fi request_api() { local method="$1" local url="$2" local auth_header="${3:-}" local body="${4:-}" local tmp_body status tmp_body="$(mktemp)" local curl_args=(-sS -o "$tmp_body" -w "%{http_code}" -X "$method" "$url") if [ -n "$auth_header" ]; then curl_args+=(-H "Authorization: Bearer $auth_header") fi if [ -n "$body" ]; then curl_args+=(-H "Content-Type: application/json" --data "$body") fi status="$(curl "${curl_args[@]}")" local response response="$(cat "$tmp_body")" rm -f "$tmp_body" printf '%s\t%s\n' "$status" "$response" } extract_status() { printf '%s' "${1%%$'\t'*}"; } extract_body() { printf '%s' "${1#*$'\t'}"; } require_2xx() { local status="$1" local body="$2" local context="$3" if [ "$status" -lt 200 ] || [ "$status" -ge 300 ]; then echo "Request failed at $context: HTTP $status" >&2 echo "$body" >&2 exit 1 fi } to_bool_json() { local raw="$1" python3 - "$raw" <<'PY' import sys v = (sys.argv[1] or "").strip().lower() print("true" if v in ("1", "true", "yes", "y") else "false") PY } build_payload_from_url() { local url="$1" local need_translate_override="${2:-}" python3 - "$url" "$DEFAULT_OPTIMIZE_IMAGES" "$DEFAULT_OPTIMIZE_TITLES" "$DEFAULT_OPTIMIZE_VARIANTS" "$DEFAULT_NEED_TRANSLATE" "$need_translate_override" <<'PY' import json import sys url = (sys.argv[1] or "").strip() if not url: raise SystemExit("url is required") def as_bool(raw): return str(raw).strip().lower() in ("1", "true", "yes", "y") payload = { "url": url, "optimizeImages": as_bool(sys.argv[2]), "optimizeTitles": as_bool(sys.argv[3]), "optimizeVariants": as_bool(sys.argv[4]), "needTranslate": as_bool(sys.argv[5]), } override = (sys.argv[6] or "").strip() if override: payload["needTranslate"] = as_bool(override) print(json.dumps(payload, ensure_ascii=False)) PY } validate_payload_json() { local raw="$1" python3 - "$raw" <<'PY' import json import sys raw = sys.argv[1] try: data = json.loads(raw) except Exception as exc: raise SystemExit(f"invalid payload json: {exc}") if not isinstance(data, dict): raise SystemExit("payload must be a JSON object") if not data.get("url"): raise SystemExit("payload.url is required") print(json.dumps(data, ensure_ascii=False)) PY } get_access_token() { local session_payload session_payload="$(python3 - "$CLIENT_KEY" <<'PY' import json,sys print(json.dumps({"clientKey": sys.argv[1]}, ensure_ascii=False)) PY )" if [ "$DRY_RUN" -eq 1 ]; then echo '{"accessToken":"","ownerSessionToken":"","expiresAt":"2099-01-01T00:00:00.000Z"}' return fi local session_result session_status session_body session_result="$(request_api "POST" "$AUTH_BASE/auth/skill-credit/session" "" "$session_payload")" session_status="$(extract_status "$session_result")" session_body="$(extract_body "$session_result")" require_2xx "$session_status" "$session_body" "skill session" echo "$session_body" } json_get() { local raw="$1" local key="$2" python3 - "$raw" "$key" <<'PY' import json,sys raw = sys.argv[1] key = sys.argv[2] try: data = json.loads(raw) except Exception: print("") raise SystemExit(0) value = data.get(key, "") if value is None: value = "" print(value) PY } cmd_session() { local session_json session_json="$(get_access_token)" echo "$session_json" } cmd_scrape_url() { local url="${POSITIONALS[1]:-}" local need_translate="${POSITIONALS[2]:-}" if [ -z "$url" ]; then echo "scrape-url requires <1688-url>" >&2 exit 1 fi local payload payload="$(build_payload_from_url "$url" "$need_translate")" run_scrape_with_payload "$payload" } cmd_scrape_payload() { local raw_payload="${POSITIONALS[1]:-}" if [ -z "$raw_payload" ]; then echo "scrape-payload requires " >&2 exit 1 fi local payload payload="$(validate_payload_json "$raw_payload")" run_scrape_with_payload "$payload" } run_scrape_with_payload() { local payload="$1" local session_json access_token session_json="$(get_access_token)" access_token="$(json_get "$session_json" "accessToken")" if [ -z "$access_token" ]; then echo "missing accessToken from /auth/skill-credit/session response" >&2 echo "$session_json" >&2 exit 1 fi if [ "$DRY_RUN" -eq 1 ]; then echo "curl -sS -X POST \"$ECOM_BASE/ecom/tasks/scrape\" -H \"Authorization: Bearer \" -H \"Content-Type: application/json\" --data '$payload'" return fi local scrape_result scrape_status scrape_body scrape_result="$(request_api "POST" "$ECOM_BASE/ecom/tasks/scrape" "$access_token" "$payload")" scrape_status="$(extract_status "$scrape_result")" scrape_body="$(extract_body "$scrape_result")" require_2xx "$scrape_status" "$scrape_body" "ecom scrape" python3 - "$session_json" "$scrape_status" "$scrape_body" "$payload" <<'PY' import json import sys session_raw, scrape_status, scrape_body_raw, payload_raw = sys.argv[1:] def parse_json(raw): try: return json.loads(raw) except Exception: return {"raw": raw} result = { "status": "SUCCESS", "requestPayload": parse_json(payload_raw), "session": parse_json(session_raw), "scrape": { "httpStatus": int(scrape_status), "body": parse_json(scrape_body_raw), } } print(json.dumps(result, ensure_ascii=False)) PY } case "$COMMAND" in session) cmd_session ;; scrape-url) cmd_scrape_url ;; scrape-payload) cmd_scrape_payload ;; *) echo "Unknown command: $COMMAND" >&2 usage exit 1 ;; esac