feat: init

2026-03-19 18:59:59 +08:00 · 2026-03-19 18:59:59 +08:00 · 1c8df130a7
commit 1c8df130a7
4 changed files with 179 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,71 @@
+# fastapi-vision-ocr
+
+一个更轻的 macOS OCR API 示例：
+
+- `FastAPI` 提供 HTTP 接口
+- `pyobjc-framework-Vision` 直接调用 `VNRecognizeTextRequest`
+- 接收 `multipart/form-data` 图片上传
+
+## 要求
+
+- macOS
+- Python 3.11+
+- Xcode Command Line Tools
+
+## 安装
+
+```bash
+cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+
+## 启动
+
+```bash
+cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr
+source .venv/bin/activate
+uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
+```
+
+## 测试
+
+健康检查：
+
+```bash
+curl http://127.0.0.1:8000/health
+```
+
+OCR：
+
+```bash
+curl -X POST http://127.0.0.1:8000/ocr \
+  -F "image=@/absolute/path/to/test.png" \
+  -F "recognition_level=accurate" \
+  -F "languages=zh-Hans" \
+  -F "languages=en-US"
+```
+
+返回：
+
+```json
+{
+  "text": "hello\nworld",
+  "lines": [
+    {
+      "text": "hello",
+      "confidence": 0.99
+    },
+    {
+      "text": "world",
+      "confidence": 0.98
+    }
+  ]
+}
+```
+
+## 说明
+
+- 这个服务只适合跑在 macOS，因为底层依赖 Apple Vision.framework。
+- 这是 MVP 方案，目标是先把“上传图片返回文本”快速跑通。
--- a/app/pycache/main.cpython-314.pyc
+++ b/app/pycache/main.cpython-314.pyc
--- a/app/main.py
+++ b/app/main.py
@ -0,0 +1,102 @@
+from __future__ import annotations
+
+import tempfile
+from pathlib import Path
+from typing import Any
+
+import Quartz
+import Vision
+from fastapi import FastAPI, File, Form, HTTPException, UploadFile
+
+app = FastAPI(title="FastAPI Vision OCR", version="0.1.0")
+
+
+def recognize_text(
+    image_path: Path,
+    recognition_level: str = "accurate",
+    languages: list[str] | None = None,
+) -> dict[str, Any]:
+    image_url = Quartz.CFURLCreateFromFileSystemRepresentation(
+        None,
+        str(image_path).encode("utf-8"),
+        len(str(image_path)),
+        False,
+    )
+    image_source = Quartz.CGImageSourceCreateWithURL(image_url, None)
+    if image_source is None:
+        raise ValueError("Unsupported image format.")
+
+    cg_image = Quartz.CGImageSourceCreateImageAtIndex(image_source, 0, None)
+    if cg_image is None:
+        raise ValueError("Failed to decode image.")
+
+    request = Vision.VNRecognizeTextRequest.alloc().init()
+    request.setRecognitionLevel_(
+        Vision.VNRequestTextRecognitionLevelFast
+        if recognition_level == "fast"
+        else Vision.VNRequestTextRecognitionLevelAccurate
+    )
+    request.setUsesLanguageCorrection_(True)
+
+    if languages:
+        request.setRecognitionLanguages_(languages)
+
+    handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(
+        cg_image, None
+    )
+    success, error = handler.performRequests_error_([request], None)
+    if not success:
+        message = str(error) if error else "Vision OCR failed."
+        raise RuntimeError(message)
+
+    results = request.results() or []
+    lines: list[dict[str, Any]] = []
+    for observation in results:
+        candidates = observation.topCandidates_(1)
+        if not candidates:
+            continue
+        candidate = candidates[0]
+        lines.append(
+            {
+                "text": str(candidate.string()),
+                "confidence": float(candidate.confidence()),
+            }
+        )
+
+    return {
+        "text": "\n".join(line["text"] for line in lines),
+        "lines": lines,
+    }
+
+
+@app.get("/health")
+def health() -> dict[str, str]:
+    return {"status": "ok"}
+
+
+@app.post("/ocr")
+async def ocr(
+    image: UploadFile = File(...),
+    recognition_level: str = Form("accurate"),
+    languages: list[str] | None = Form(None),
+) -> dict[str, Any]:
+    if recognition_level not in {"fast", "accurate"}:
+        raise HTTPException(status_code=400, detail="recognition_level must be fast or accurate")
+
+    suffix = Path(image.filename or "upload.bin").suffix or ".bin"
+    with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
+        tmp.write(await image.read())
+        tmp_path = Path(tmp.name)
+
+    try:
+        return recognize_text(
+            image_path=tmp_path,
+            recognition_level=recognition_level,
+            languages=languages,
+        )
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+    finally:
+        tmp_path.unlink(missing_ok=True)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
+fastapi>=0.115,<1.0
+uvicorn>=0.35,<1.0
+python-multipart>=0.0.20,<1.0
+pyobjc-core>=11.1
+pyobjc-framework-Vision>=11.1
+pyobjc-framework-Quartz>=11.1