commit 1c8df130a719a02117a5c217f1c1943b1e7e5964 Author: Nice Guy Date: Thu Mar 19 18:59:59 2026 +0800 feat: init diff --git a/README.md b/README.md new file mode 100644 index 0000000..f21e420 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# fastapi-vision-ocr + +一个更轻的 macOS OCR API 示例: + +- `FastAPI` 提供 HTTP 接口 +- `pyobjc-framework-Vision` 直接调用 `VNRecognizeTextRequest` +- 接收 `multipart/form-data` 图片上传 + +## 要求 + +- macOS +- Python 3.11+ +- Xcode Command Line Tools + +## 安装 + +```bash +cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr +python3 -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +``` + +## 启动 + +```bash +cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr +source .venv/bin/activate +uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload +``` + +## 测试 + +健康检查: + +```bash +curl http://127.0.0.1:8000/health +``` + +OCR: + +```bash +curl -X POST http://127.0.0.1:8000/ocr \ + -F "image=@/absolute/path/to/test.png" \ + -F "recognition_level=accurate" \ + -F "languages=zh-Hans" \ + -F "languages=en-US" +``` + +返回: + +```json +{ + "text": "hello\nworld", + "lines": [ + { + "text": "hello", + "confidence": 0.99 + }, + { + "text": "world", + "confidence": 0.98 + } + ] +} +``` + +## 说明 + +- 这个服务只适合跑在 macOS,因为底层依赖 Apple Vision.framework。 +- 这是 MVP 方案,目标是先把“上传图片返回文本”快速跑通。 diff --git a/app/__pycache__/main.cpython-314.pyc b/app/__pycache__/main.cpython-314.pyc new file mode 100644 index 0000000..ff58e54 Binary files /dev/null and b/app/__pycache__/main.cpython-314.pyc differ diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..8a396c2 --- /dev/null +++ b/app/main.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import tempfile +from pathlib import Path +from typing import Any + +import Quartz +import Vision +from fastapi import FastAPI, File, Form, HTTPException, UploadFile + +app = FastAPI(title="FastAPI Vision OCR", version="0.1.0") + + +def recognize_text( + image_path: Path, + recognition_level: str = "accurate", + languages: list[str] | None = None, +) -> dict[str, Any]: + image_url = Quartz.CFURLCreateFromFileSystemRepresentation( + None, + str(image_path).encode("utf-8"), + len(str(image_path)), + False, + ) + image_source = Quartz.CGImageSourceCreateWithURL(image_url, None) + if image_source is None: + raise ValueError("Unsupported image format.") + + cg_image = Quartz.CGImageSourceCreateImageAtIndex(image_source, 0, None) + if cg_image is None: + raise ValueError("Failed to decode image.") + + request = Vision.VNRecognizeTextRequest.alloc().init() + request.setRecognitionLevel_( + Vision.VNRequestTextRecognitionLevelFast + if recognition_level == "fast" + else Vision.VNRequestTextRecognitionLevelAccurate + ) + request.setUsesLanguageCorrection_(True) + + if languages: + request.setRecognitionLanguages_(languages) + + handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_( + cg_image, None + ) + success, error = handler.performRequests_error_([request], None) + if not success: + message = str(error) if error else "Vision OCR failed." + raise RuntimeError(message) + + results = request.results() or [] + lines: list[dict[str, Any]] = [] + for observation in results: + candidates = observation.topCandidates_(1) + if not candidates: + continue + candidate = candidates[0] + lines.append( + { + "text": str(candidate.string()), + "confidence": float(candidate.confidence()), + } + ) + + return { + "text": "\n".join(line["text"] for line in lines), + "lines": lines, + } + + +@app.get("/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/ocr") +async def ocr( + image: UploadFile = File(...), + recognition_level: str = Form("accurate"), + languages: list[str] | None = Form(None), +) -> dict[str, Any]: + if recognition_level not in {"fast", "accurate"}: + raise HTTPException(status_code=400, detail="recognition_level must be fast or accurate") + + suffix = Path(image.filename or "upload.bin").suffix or ".bin" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(await image.read()) + tmp_path = Path(tmp.name) + + try: + return recognize_text( + image_path=tmp_path, + recognition_level=recognition_level, + languages=languages, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) from exc + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) from exc + finally: + tmp_path.unlink(missing_ok=True) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5a3d04f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +fastapi>=0.115,<1.0 +uvicorn>=0.35,<1.0 +python-multipart>=0.0.20,<1.0 +pyobjc-core>=11.1 +pyobjc-framework-Vision>=11.1 +pyobjc-framework-Quartz>=11.1