From 1c8df130a719a02117a5c217f1c1943b1e7e5964 Mon Sep 17 00:00:00 2001 From: Nice Guy Date: Thu, 19 Mar 2026 18:59:59 +0800 Subject: [PATCH] feat: init --- README.md | 71 +++++++++++++++++++ app/__pycache__/main.cpython-314.pyc | Bin 0 -> 5603 bytes app/main.py | 102 +++++++++++++++++++++++++++ requirements.txt | 6 ++ 4 files changed, 179 insertions(+) create mode 100644 README.md create mode 100644 app/__pycache__/main.cpython-314.pyc create mode 100644 app/main.py create mode 100644 requirements.txt diff --git a/README.md b/README.md new file mode 100644 index 0000000..f21e420 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# fastapi-vision-ocr + +一个更轻的 macOS OCR API 示例: + +- `FastAPI` 提供 HTTP 接口 +- `pyobjc-framework-Vision` 直接调用 `VNRecognizeTextRequest` +- 接收 `multipart/form-data` 图片上传 + +## 要求 + +- macOS +- Python 3.11+ +- Xcode Command Line Tools + +## 安装 + +```bash +cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +## 启动 + +```bash +cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr +source .venv/bin/activate +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +## 测试 + +健康检查: + +```bash +curl http://127.0.0.1:8000/health +``` + +OCR: + +```bash +curl -X POST http://127.0.0.1:8000/ocr \ + -F "image=@/absolute/path/to/test.png" \ + -F "recognition_level=accurate" \ + -F "languages=zh-Hans" \ + -F "languages=en-US" +``` + +返回: + +```json +{ + "text": "hello\nworld", + "lines": [ + { + "text": "hello", + "confidence": 0.99 + }, + { + "text": "world", + "confidence": 0.98 + } + ] +} +``` + +## 说明 + +- 这个服务只适合跑在 macOS,因为底层依赖 Apple Vision.framework。 +- 这是 MVP 方案,目标是先把“上传图片返回文本”快速跑通。 diff --git a/app/__pycache__/main.cpython-314.pyc b/app/__pycache__/main.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff58e5440dabae7e54e40bb9a184276cc962dca7 GIT binary patch literal 5603 zcmbVQU2IcF7M|<->%VI!vEu}i*pMFxq)vcDgwj$$AS6pj-Ai!0YQQ!2P2$$E-MQB^ zM163(D{W9$BGq=)^hYYvzOYZrs{4SnFD$U{HV(3Tn+m!`TJR?IRA=G2&F6Bcdw$(K*`= zBykci2?s64jH*j&kZe9A*?lPKPExA-oGa9Bs#Pl`M?YvrlH>$wkx2I&45ZdWpIw#8 zhC*_gea_ioUU#+TfSPqv&0^LL&{7?YUvFqroxvkn+EKUXTQYh#rP-sYR9cOyiF8UC zhdL9Asxwg1M^l%&S&bi#D(cbDsKyK@k}~9JJgYfJCMH5B=3;V&bkl5SGs$!`P6X!b zEAoMGLV>oxi$l^p+ZWsy?CZ8@teQ|^5dNZ!Nuwr2W3eobs&b4Nq+%2#08K-6Z8s1( z6h`MQgTF>74!joh5;a3flw^@;iIG@i19}-)ko8-#4Vo=68=aOT8CV2n2%`?mvGjB* zK{Smd<%@DsqZJiv)?_p_odsFtdv`LSsILO^T;T1%cseC(9G2BAPJJ)N6EVG|CjjO+ z_eWwdZVF5gRTZM{PB334(>ohYq=K1Cnj;daSW}Kf@HP+-;ji>TmP3!xL5h1sva;K= zN<;a;w{4Y$a+O2wz$#BD0dby3R-o3P4~oH(tBIU=-7K%cx^if;+7POozbXR=I2CA; zH928qo{11|@@l0-nXQvnll!#3rj%sqCzgejp*Luu-$;XR<~vZBmQa61zYG&*rr@@= zph>4GA<^cjGt`E$h}pIz7>j}Xi25rw$*^JeiP4x1qq)riLiHw9D?{EIZBph93HeM; ztu)&*+yobDuJKJOOiAqED{x5b^!3%|CWl~4I_;XQwYO|(gsQm)KMHdp|C8+!w?V>U zI*T~0vW4{6@nny1)hGDThPih>DfyFj>3mW?%x>6UKFmMOcZIniRg-!PP6NIi7EHaT zL0{0@YS!b%38|gks&_k0NqojO*==eOwwZRAp8V^xj^boUxCTwrkCHyj109}lSMKpfm`TsGQ+8Z!l9 zV`9F(76k%Rx+pZM@|>#KV(HXWA}*(5vPRi-|8MF>E;uZmTH6oAPUHt?H-U!1WxQD@ zZ8G*8%CUZQ%2in^)s6<}1l4W9urX*HlWPn?#}e$*m}DX)E5r|V1u(h3n)SJ9IVI0! z@DXiCB|^;05u#e~xnw#PO)5u%Rgod+p?Dzs7IeusANupawf?1xpY?p)^ZCHP72*uv z_4+7{)o1B$(YTkgQLN5uyM~6(N@GJW4V@e%k?c%5i(|4e#LEeF2FBBD;b<}|pTIbcHUCrO^o~c>(NtWX(>P-b zw8rqbWJJPwc>|Uk}Y%)0Vl-@Ep;+;ce3A%F-)8$lL z;{XyQQqvkc1@K12#1(7I>*++Q+l`3_$KdP{ZXj%{elH?foYWi!spuQ039;#j&d^{w z!{Yo*WdXdBjm2a|(O6wSjh}@I>^VM6#_b~G+S3;l8DG@l8QhtwlA5(f6IHspx&j-M zamdBZCmWA}q>f1b0pKNv)@`WHS8^Y@>poHz94mr-;TP|@N zyd~GbUDv?%nJf@1*Yq& zy3F+alXsT|klbAmc3*2=uFSKiAnqxNeFd?vDDE$brwZb!vfy6qSfed2E_V`8SYvC+ zd!XPwaQ)@$qc_x|_xL;Gxs&(p_4fmvAG9yZAMDBRePMxL5w|TJEQ;-Uq5XmAy^_A1 zE{gt=*qIkQukHu6cXs4XeCcaju$R4olDD_u?Y&MFz55qz_g&3jI6P%f+tQoAKfPcD zlU{LLc06!5eo8NmmRfoWEj`7SV6myU>~8r~zB*d!7%X%Q7CW9TwjEw&X{Y!d&01}b z?5N4Vx(!UR$b4vh-&!F4sMvg{=spCCUi5;bgIh&3%i-;?-}8L$j2um>Gn+6rafRS# zsWpN>ehg#xf=BJ9G5A-630|7t0EY=$Haskao1o2r!}_t)fv=v8k_K`=2>PX{mPaI8 z6!k(^E(UGYRMm+f$s9mEiPAN&kc${qvx;U(zoE}*WMp;5Ip>TO;+KFk0)K_z%N%;( z@GQpj_U(CQyI~5ZHyy*lO@{bC9~-(+G*-XanCNTx0^-MYhk!}6(2a&038lxPpRm+8 z2vy$+O;q1c^57IKj7>zy%p# zBa~?zFh3OlqO%+BAflrsE-tHpEaz=m{Ub#1LLxQKkbD>BYhcp48T!yf; z`f*^!C!m_fQGAIYJ&k<}Ctz|0@LybG^j;(aYg{%3NbwEaLx$C%pSAiirJKiR2)ha` zjEQJGiAYk#d_Jg#C~S`$o7e&RNt{Ibgne`mgvufkA&YHSr8tx$aaic z73^&HimSEk^OxIt%Z+{r)^6LX2U+W`xGuY{_Aj{Z3A-P)qK3}LH0b~n{IKppj>dIF zu_q~A-MQ6q%{>HbH ze2;&qp1ozIhTM)@wpK##5`cfJ&pFhIZav=s^ldi<{M+^Rp`F&-t)%sKr~UXo`gWi1 zI8Wc<9YEjlP$c*Ij`z@adRRj5qmTEaJN=#$0{wS^hF z&M%SkK00;}9s8Pd<~_Tw?^@=b&C!oJ^36)6kcIt??;YPlbCGITw?K?vqmjk4N+^g_ x(Ae}&(?VB~s{6XBW$DZ()-PIjT-{S_edddufY<)8=Q|z(8R=60FN+z{{s$-(h{XT^ literal 0 HcmV?d00001 diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..8a396c2 --- /dev/null +++ b/app/main.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import tempfile +from pathlib import Path +from typing import Any + +import Quartz +import Vision +from fastapi import FastAPI, File, Form, HTTPException, UploadFile + +app = FastAPI(title="FastAPI Vision OCR", version="0.1.0") + + +def recognize_text( + image_path: Path, + recognition_level: str = "accurate", + languages: list[str] | None = None, +) -> dict[str, Any]: + image_url = Quartz.CFURLCreateFromFileSystemRepresentation( + None, + str(image_path).encode("utf-8"), + len(str(image_path)), + False, + ) + image_source = Quartz.CGImageSourceCreateWithURL(image_url, None) + if image_source is None: + raise ValueError("Unsupported image format.") + + cg_image = Quartz.CGImageSourceCreateImageAtIndex(image_source, 0, None) + if cg_image is None: + raise ValueError("Failed to decode image.") + + request = Vision.VNRecognizeTextRequest.alloc().init() + request.setRecognitionLevel_( + Vision.VNRequestTextRecognitionLevelFast + if recognition_level == "fast" + else Vision.VNRequestTextRecognitionLevelAccurate + ) + request.setUsesLanguageCorrection_(True) + + if languages: + request.setRecognitionLanguages_(languages) + + handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_( + cg_image, None + ) + success, error = handler.performRequests_error_([request], None) + if not success: + message = str(error) if error else "Vision OCR failed." + raise RuntimeError(message) + + results = request.results() or [] + lines: list[dict[str, Any]] = [] + for observation in results: + candidates = observation.topCandidates_(1) + if not candidates: + continue + candidate = candidates[0] + lines.append( + { + "text": str(candidate.string()), + "confidence": float(candidate.confidence()), + } + ) + + return { + "text": "\n".join(line["text"] for line in lines), + "lines": lines, + } + + +@app.get("/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/ocr") +async def ocr( + image: UploadFile = File(...), + recognition_level: str = Form("accurate"), + languages: list[str] | None = Form(None), +) -> dict[str, Any]: + if recognition_level not in {"fast", "accurate"}: + raise HTTPException(status_code=400, detail="recognition_level must be fast or accurate") + + suffix = Path(image.filename or "upload.bin").suffix or ".bin" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(await image.read()) + tmp_path = Path(tmp.name) + + try: + return recognize_text( + image_path=tmp_path, + recognition_level=recognition_level, + languages=languages, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) from exc + finally: + tmp_path.unlink(missing_ok=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5a3d04f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +fastapi>=0.115,<1.0 +uvicorn>=0.35,<1.0 +python-multipart>=0.0.20,<1.0 +pyobjc-core>=11.1 +pyobjc-framework-Vision>=11.1 +pyobjc-framework-Quartz>=11.1