1688-product-master/scripts/1688-product-master.sh

272 lines
6.8 KiB
Bash
Raw Permalink Normal View History

2026-03-11 23:36:59 +00:00
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<'EOF'
Usage:
1688-product-master.sh <command> [args...] [--dry-run]
Commands:
session
scrape-url <1688-url> [need-translate:true|false]
scrape-payload <payload-json>
Examples:
CLIENT_KEY=<sk_xxx.yyy> 1688-product-master.sh scrape-url 'https://detail.1688.com/offer/852504650877.html'
CLIENT_KEY=<sk_xxx.yyy> 1688-product-master.sh scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html","optimizeImages":true,"optimizeTitles":true,"optimizeVariants":true,"needTranslate":false}'
EOF
}
AUTH_BASE="${AUTH_BASE:-https://api-gw-test.yuanwei-lnc.com}"
AUTH_BASE="${AUTH_BASE%/}"
ECOM_BASE="${ECOM_BASE:-$AUTH_BASE}"
ECOM_BASE="${ECOM_BASE%/}"
CLIENT_KEY="${CLIENT_KEY:-}"
DEFAULT_OPTIMIZE_IMAGES="${DEFAULT_OPTIMIZE_IMAGES:-true}"
DEFAULT_OPTIMIZE_TITLES="${DEFAULT_OPTIMIZE_TITLES:-true}"
DEFAULT_OPTIMIZE_VARIANTS="${DEFAULT_OPTIMIZE_VARIANTS:-true}"
DEFAULT_NEED_TRANSLATE="${DEFAULT_NEED_TRANSLATE:-false}"
DRY_RUN=0
POSITIONALS=()
for arg in "$@"; do
case "$arg" in
--dry-run)
DRY_RUN=1
;;
-h|--help)
usage
exit 0
;;
*)
POSITIONALS+=("$arg")
;;
esac
done
if [ "${#POSITIONALS[@]}" -lt 1 ]; then
usage
exit 1
fi
COMMAND="${POSITIONALS[0]}"
if [ -z "$CLIENT_KEY" ]; then
echo "Missing CLIENT_KEY." >&2
exit 1
fi
request_api() {
local method="$1"
local url="$2"
local auth_header="${3:-}"
local body="${4:-}"
local tmp_body status
tmp_body="$(mktemp)"
local curl_args=(-sS -o "$tmp_body" -w "%{http_code}" -X "$method" "$url")
if [ -n "$auth_header" ]; then
curl_args+=(-H "Authorization: Bearer $auth_header")
fi
if [ -n "$body" ]; then
curl_args+=(-H "Content-Type: application/json" --data "$body")
fi
status="$(curl "${curl_args[@]}")"
local response
response="$(cat "$tmp_body")"
rm -f "$tmp_body"
printf '%s\t%s\n' "$status" "$response"
}
extract_status() { printf '%s' "${1%%$'\t'*}"; }
extract_body() { printf '%s' "${1#*$'\t'}"; }
require_2xx() {
local status="$1"
local body="$2"
local context="$3"
if [ "$status" -lt 200 ] || [ "$status" -ge 300 ]; then
echo "Request failed at $context: HTTP $status" >&2
echo "$body" >&2
exit 1
fi
}
to_bool_json() {
local raw="$1"
python3 - "$raw" <<'PY'
import sys
v = (sys.argv[1] or "").strip().lower()
print("true" if v in ("1", "true", "yes", "y") else "false")
PY
}
build_payload_from_url() {
local url="$1"
local need_translate_override="${2:-}"
python3 - "$url" "$DEFAULT_OPTIMIZE_IMAGES" "$DEFAULT_OPTIMIZE_TITLES" "$DEFAULT_OPTIMIZE_VARIANTS" "$DEFAULT_NEED_TRANSLATE" "$need_translate_override" <<'PY'
import json
import sys
url = (sys.argv[1] or "").strip()
if not url:
raise SystemExit("url is required")
def as_bool(raw):
return str(raw).strip().lower() in ("1", "true", "yes", "y")
payload = {
"url": url,
"optimizeImages": as_bool(sys.argv[2]),
"optimizeTitles": as_bool(sys.argv[3]),
"optimizeVariants": as_bool(sys.argv[4]),
"needTranslate": as_bool(sys.argv[5]),
}
override = (sys.argv[6] or "").strip()
if override:
payload["needTranslate"] = as_bool(override)
print(json.dumps(payload, ensure_ascii=False))
PY
}
validate_payload_json() {
local raw="$1"
python3 - "$raw" <<'PY'
import json
import sys
raw = sys.argv[1]
try:
data = json.loads(raw)
except Exception as exc:
raise SystemExit(f"invalid payload json: {exc}")
if not isinstance(data, dict):
raise SystemExit("payload must be a JSON object")
if not data.get("url"):
raise SystemExit("payload.url is required")
print(json.dumps(data, ensure_ascii=False))
PY
}
get_access_token() {
local session_payload
session_payload="$(python3 - "$CLIENT_KEY" <<'PY'
import json,sys
print(json.dumps({"clientKey": sys.argv[1]}, ensure_ascii=False))
PY
)"
if [ "$DRY_RUN" -eq 1 ]; then
echo '{"accessToken":"<dry-run-token>","ownerSessionToken":"<dry-run-owner-token>","expiresAt":"2099-01-01T00:00:00.000Z"}'
return
fi
local session_result session_status session_body
session_result="$(request_api "POST" "$AUTH_BASE/auth/skill-credit/session" "" "$session_payload")"
session_status="$(extract_status "$session_result")"
session_body="$(extract_body "$session_result")"
require_2xx "$session_status" "$session_body" "skill session"
echo "$session_body"
}
json_get() {
local raw="$1"
local key="$2"
python3 - "$raw" "$key" <<'PY'
import json,sys
raw = sys.argv[1]
key = sys.argv[2]
try:
data = json.loads(raw)
except Exception:
print("")
raise SystemExit(0)
value = data.get(key, "")
if value is None:
value = ""
print(value)
PY
}
cmd_session() {
local session_json
session_json="$(get_access_token)"
echo "$session_json"
}
cmd_scrape_url() {
local url="${POSITIONALS[1]:-}"
local need_translate="${POSITIONALS[2]:-}"
if [ -z "$url" ]; then
echo "scrape-url requires <1688-url>" >&2
exit 1
fi
local payload
payload="$(build_payload_from_url "$url" "$need_translate")"
run_scrape_with_payload "$payload"
}
cmd_scrape_payload() {
local raw_payload="${POSITIONALS[1]:-}"
if [ -z "$raw_payload" ]; then
echo "scrape-payload requires <payload-json>" >&2
exit 1
fi
local payload
payload="$(validate_payload_json "$raw_payload")"
run_scrape_with_payload "$payload"
}
run_scrape_with_payload() {
local payload="$1"
local session_json access_token
session_json="$(get_access_token)"
access_token="$(json_get "$session_json" "accessToken")"
if [ -z "$access_token" ]; then
echo "missing accessToken from /auth/skill-credit/session response" >&2
echo "$session_json" >&2
exit 1
fi
if [ "$DRY_RUN" -eq 1 ]; then
echo "curl -sS -X POST \"$ECOM_BASE/ecom/tasks/scrape\" -H \"Authorization: Bearer <accessToken>\" -H \"Content-Type: application/json\" --data '$payload'"
return
fi
local scrape_result scrape_status scrape_body
scrape_result="$(request_api "POST" "$ECOM_BASE/ecom/tasks/scrape" "$access_token" "$payload")"
scrape_status="$(extract_status "$scrape_result")"
scrape_body="$(extract_body "$scrape_result")"
require_2xx "$scrape_status" "$scrape_body" "ecom scrape"
python3 - "$session_json" "$scrape_status" "$scrape_body" "$payload" <<'PY'
import json
import sys
session_raw, scrape_status, scrape_body_raw, payload_raw = sys.argv[1:]
def parse_json(raw):
try:
return json.loads(raw)
except Exception:
return {"raw": raw}
result = {
"status": "SUCCESS",
"requestPayload": parse_json(payload_raw),
"session": parse_json(session_raw),
"scrape": {
"httpStatus": int(scrape_status),
"body": parse_json(scrape_body_raw),
}
}
print(json.dumps(result, ensure_ascii=False))
PY
}
case "$COMMAND" in
session) cmd_session ;;
scrape-url) cmd_scrape_url ;;
scrape-payload) cmd_scrape_payload ;;
*)
echo "Unknown command: $COMMAND" >&2
usage
exit 1
;;
esac