272 lines
6.8 KiB
Bash
Executable File
272 lines
6.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage:
|
|
1688-product-master.sh <command> [args...] [--dry-run]
|
|
|
|
Commands:
|
|
session
|
|
scrape-url <1688-url> [need-translate:true|false]
|
|
scrape-payload <payload-json>
|
|
|
|
Examples:
|
|
CLIENT_KEY=<sk_xxx.yyy> 1688-product-master.sh scrape-url 'https://detail.1688.com/offer/852504650877.html'
|
|
CLIENT_KEY=<sk_xxx.yyy> 1688-product-master.sh scrape-payload '{"url":"https://detail.1688.com/offer/852504650877.html","optimizeImages":true,"optimizeTitles":true,"optimizeVariants":true,"needTranslate":false}'
|
|
EOF
|
|
}
|
|
|
|
AUTH_BASE="${AUTH_BASE:-https://api-gw-test.yuanwei-lnc.com}"
|
|
AUTH_BASE="${AUTH_BASE%/}"
|
|
ECOM_BASE="${ECOM_BASE:-$AUTH_BASE}"
|
|
ECOM_BASE="${ECOM_BASE%/}"
|
|
CLIENT_KEY="${CLIENT_KEY:-}"
|
|
|
|
DEFAULT_OPTIMIZE_IMAGES="${DEFAULT_OPTIMIZE_IMAGES:-true}"
|
|
DEFAULT_OPTIMIZE_TITLES="${DEFAULT_OPTIMIZE_TITLES:-true}"
|
|
DEFAULT_OPTIMIZE_VARIANTS="${DEFAULT_OPTIMIZE_VARIANTS:-true}"
|
|
DEFAULT_NEED_TRANSLATE="${DEFAULT_NEED_TRANSLATE:-false}"
|
|
|
|
DRY_RUN=0
|
|
POSITIONALS=()
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--dry-run)
|
|
DRY_RUN=1
|
|
;;
|
|
-h|--help)
|
|
usage
|
|
exit 0
|
|
;;
|
|
*)
|
|
POSITIONALS+=("$arg")
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ "${#POSITIONALS[@]}" -lt 1 ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
COMMAND="${POSITIONALS[0]}"
|
|
|
|
if [ -z "$CLIENT_KEY" ]; then
|
|
echo "Missing CLIENT_KEY." >&2
|
|
exit 1
|
|
fi
|
|
|
|
request_api() {
|
|
local method="$1"
|
|
local url="$2"
|
|
local auth_header="${3:-}"
|
|
local body="${4:-}"
|
|
local tmp_body status
|
|
tmp_body="$(mktemp)"
|
|
local curl_args=(-sS -o "$tmp_body" -w "%{http_code}" -X "$method" "$url")
|
|
if [ -n "$auth_header" ]; then
|
|
curl_args+=(-H "Authorization: Bearer $auth_header")
|
|
fi
|
|
if [ -n "$body" ]; then
|
|
curl_args+=(-H "Content-Type: application/json" --data "$body")
|
|
fi
|
|
status="$(curl "${curl_args[@]}")"
|
|
local response
|
|
response="$(cat "$tmp_body")"
|
|
rm -f "$tmp_body"
|
|
printf '%s\t%s\n' "$status" "$response"
|
|
}
|
|
|
|
extract_status() { printf '%s' "${1%%$'\t'*}"; }
|
|
extract_body() { printf '%s' "${1#*$'\t'}"; }
|
|
|
|
require_2xx() {
|
|
local status="$1"
|
|
local body="$2"
|
|
local context="$3"
|
|
if [ "$status" -lt 200 ] || [ "$status" -ge 300 ]; then
|
|
echo "Request failed at $context: HTTP $status" >&2
|
|
echo "$body" >&2
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
to_bool_json() {
|
|
local raw="$1"
|
|
python3 - "$raw" <<'PY'
|
|
import sys
|
|
v = (sys.argv[1] or "").strip().lower()
|
|
print("true" if v in ("1", "true", "yes", "y") else "false")
|
|
PY
|
|
}
|
|
|
|
build_payload_from_url() {
|
|
local url="$1"
|
|
local need_translate_override="${2:-}"
|
|
python3 - "$url" "$DEFAULT_OPTIMIZE_IMAGES" "$DEFAULT_OPTIMIZE_TITLES" "$DEFAULT_OPTIMIZE_VARIANTS" "$DEFAULT_NEED_TRANSLATE" "$need_translate_override" <<'PY'
|
|
import json
|
|
import sys
|
|
url = (sys.argv[1] or "").strip()
|
|
if not url:
|
|
raise SystemExit("url is required")
|
|
def as_bool(raw):
|
|
return str(raw).strip().lower() in ("1", "true", "yes", "y")
|
|
payload = {
|
|
"url": url,
|
|
"optimizeImages": as_bool(sys.argv[2]),
|
|
"optimizeTitles": as_bool(sys.argv[3]),
|
|
"optimizeVariants": as_bool(sys.argv[4]),
|
|
"needTranslate": as_bool(sys.argv[5]),
|
|
}
|
|
override = (sys.argv[6] or "").strip()
|
|
if override:
|
|
payload["needTranslate"] = as_bool(override)
|
|
print(json.dumps(payload, ensure_ascii=False))
|
|
PY
|
|
}
|
|
|
|
validate_payload_json() {
|
|
local raw="$1"
|
|
python3 - "$raw" <<'PY'
|
|
import json
|
|
import sys
|
|
raw = sys.argv[1]
|
|
try:
|
|
data = json.loads(raw)
|
|
except Exception as exc:
|
|
raise SystemExit(f"invalid payload json: {exc}")
|
|
if not isinstance(data, dict):
|
|
raise SystemExit("payload must be a JSON object")
|
|
if not data.get("url"):
|
|
raise SystemExit("payload.url is required")
|
|
print(json.dumps(data, ensure_ascii=False))
|
|
PY
|
|
}
|
|
|
|
get_access_token() {
|
|
local session_payload
|
|
session_payload="$(python3 - "$CLIENT_KEY" <<'PY'
|
|
import json,sys
|
|
print(json.dumps({"clientKey": sys.argv[1]}, ensure_ascii=False))
|
|
PY
|
|
)"
|
|
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
echo '{"accessToken":"<dry-run-token>","ownerSessionToken":"<dry-run-owner-token>","expiresAt":"2099-01-01T00:00:00.000Z"}'
|
|
return
|
|
fi
|
|
|
|
local session_result session_status session_body
|
|
session_result="$(request_api "POST" "$AUTH_BASE/auth/skill-credit/session" "" "$session_payload")"
|
|
session_status="$(extract_status "$session_result")"
|
|
session_body="$(extract_body "$session_result")"
|
|
require_2xx "$session_status" "$session_body" "skill session"
|
|
echo "$session_body"
|
|
}
|
|
|
|
json_get() {
|
|
local raw="$1"
|
|
local key="$2"
|
|
python3 - "$raw" "$key" <<'PY'
|
|
import json,sys
|
|
raw = sys.argv[1]
|
|
key = sys.argv[2]
|
|
try:
|
|
data = json.loads(raw)
|
|
except Exception:
|
|
print("")
|
|
raise SystemExit(0)
|
|
value = data.get(key, "")
|
|
if value is None:
|
|
value = ""
|
|
print(value)
|
|
PY
|
|
}
|
|
|
|
cmd_session() {
|
|
local session_json
|
|
session_json="$(get_access_token)"
|
|
echo "$session_json"
|
|
}
|
|
|
|
cmd_scrape_url() {
|
|
local url="${POSITIONALS[1]:-}"
|
|
local need_translate="${POSITIONALS[2]:-}"
|
|
if [ -z "$url" ]; then
|
|
echo "scrape-url requires <1688-url>" >&2
|
|
exit 1
|
|
fi
|
|
local payload
|
|
payload="$(build_payload_from_url "$url" "$need_translate")"
|
|
run_scrape_with_payload "$payload"
|
|
}
|
|
|
|
cmd_scrape_payload() {
|
|
local raw_payload="${POSITIONALS[1]:-}"
|
|
if [ -z "$raw_payload" ]; then
|
|
echo "scrape-payload requires <payload-json>" >&2
|
|
exit 1
|
|
fi
|
|
local payload
|
|
payload="$(validate_payload_json "$raw_payload")"
|
|
run_scrape_with_payload "$payload"
|
|
}
|
|
|
|
run_scrape_with_payload() {
|
|
local payload="$1"
|
|
local session_json access_token
|
|
session_json="$(get_access_token)"
|
|
access_token="$(json_get "$session_json" "accessToken")"
|
|
if [ -z "$access_token" ]; then
|
|
echo "missing accessToken from /auth/skill-credit/session response" >&2
|
|
echo "$session_json" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
echo "curl -sS -X POST \"$ECOM_BASE/ecom/tasks/scrape\" -H \"Authorization: Bearer <accessToken>\" -H \"Content-Type: application/json\" --data '$payload'"
|
|
return
|
|
fi
|
|
|
|
local scrape_result scrape_status scrape_body
|
|
scrape_result="$(request_api "POST" "$ECOM_BASE/ecom/tasks/scrape" "$access_token" "$payload")"
|
|
scrape_status="$(extract_status "$scrape_result")"
|
|
scrape_body="$(extract_body "$scrape_result")"
|
|
require_2xx "$scrape_status" "$scrape_body" "ecom scrape"
|
|
|
|
python3 - "$session_json" "$scrape_status" "$scrape_body" "$payload" <<'PY'
|
|
import json
|
|
import sys
|
|
session_raw, scrape_status, scrape_body_raw, payload_raw = sys.argv[1:]
|
|
|
|
def parse_json(raw):
|
|
try:
|
|
return json.loads(raw)
|
|
except Exception:
|
|
return {"raw": raw}
|
|
|
|
result = {
|
|
"status": "SUCCESS",
|
|
"requestPayload": parse_json(payload_raw),
|
|
"session": parse_json(session_raw),
|
|
"scrape": {
|
|
"httpStatus": int(scrape_status),
|
|
"body": parse_json(scrape_body_raw),
|
|
}
|
|
}
|
|
print(json.dumps(result, ensure_ascii=False))
|
|
PY
|
|
}
|
|
|
|
case "$COMMAND" in
|
|
session) cmd_session ;;
|
|
scrape-url) cmd_scrape_url ;;
|
|
scrape-payload) cmd_scrape_payload ;;
|
|
*)
|
|
echo "Unknown command: $COMMAND" >&2
|
|
usage
|
|
exit 1
|
|
;;
|
|
esac
|