feat: init
This commit is contained in:
commit
1c8df130a7
|
|
@ -0,0 +1,71 @@
|
|||
# fastapi-vision-ocr
|
||||
|
||||
一个更轻的 macOS OCR API 示例:
|
||||
|
||||
- `FastAPI` 提供 HTTP 接口
|
||||
- `pyobjc-framework-Vision` 直接调用 `VNRecognizeTextRequest`
|
||||
- 接收 `multipart/form-data` 图片上传
|
||||
|
||||
## 要求
|
||||
|
||||
- macOS
|
||||
- Python 3.11+
|
||||
- Xcode Command Line Tools
|
||||
|
||||
## 安装
|
||||
|
||||
```bash
|
||||
cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## 启动
|
||||
|
||||
```bash
|
||||
cd /Users/amy/Documents/workspace/ecommer-codebase/fastapi-vision-ocr
|
||||
source .venv/bin/activate
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||
```
|
||||
|
||||
## 测试
|
||||
|
||||
健康检查:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:8000/health
|
||||
```
|
||||
|
||||
OCR:
|
||||
|
||||
```bash
|
||||
curl -X POST http://127.0.0.1:8000/ocr \
|
||||
-F "image=@/absolute/path/to/test.png" \
|
||||
-F "recognition_level=accurate" \
|
||||
-F "languages=zh-Hans" \
|
||||
-F "languages=en-US"
|
||||
```
|
||||
|
||||
返回:
|
||||
|
||||
```json
|
||||
{
|
||||
"text": "hello\nworld",
|
||||
"lines": [
|
||||
{
|
||||
"text": "hello",
|
||||
"confidence": 0.99
|
||||
},
|
||||
{
|
||||
"text": "world",
|
||||
"confidence": 0.98
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 说明
|
||||
|
||||
- 这个服务只适合跑在 macOS,因为底层依赖 Apple Vision.framework。
|
||||
- 这是 MVP 方案,目标是先把“上传图片返回文本”快速跑通。
|
||||
Binary file not shown.
|
|
@ -0,0 +1,102 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import Quartz
|
||||
import Vision
|
||||
from fastapi import FastAPI, File, Form, HTTPException, UploadFile
|
||||
|
||||
app = FastAPI(title="FastAPI Vision OCR", version="0.1.0")
|
||||
|
||||
|
||||
def recognize_text(
|
||||
image_path: Path,
|
||||
recognition_level: str = "accurate",
|
||||
languages: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
image_url = Quartz.CFURLCreateFromFileSystemRepresentation(
|
||||
None,
|
||||
str(image_path).encode("utf-8"),
|
||||
len(str(image_path)),
|
||||
False,
|
||||
)
|
||||
image_source = Quartz.CGImageSourceCreateWithURL(image_url, None)
|
||||
if image_source is None:
|
||||
raise ValueError("Unsupported image format.")
|
||||
|
||||
cg_image = Quartz.CGImageSourceCreateImageAtIndex(image_source, 0, None)
|
||||
if cg_image is None:
|
||||
raise ValueError("Failed to decode image.")
|
||||
|
||||
request = Vision.VNRecognizeTextRequest.alloc().init()
|
||||
request.setRecognitionLevel_(
|
||||
Vision.VNRequestTextRecognitionLevelFast
|
||||
if recognition_level == "fast"
|
||||
else Vision.VNRequestTextRecognitionLevelAccurate
|
||||
)
|
||||
request.setUsesLanguageCorrection_(True)
|
||||
|
||||
if languages:
|
||||
request.setRecognitionLanguages_(languages)
|
||||
|
||||
handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(
|
||||
cg_image, None
|
||||
)
|
||||
success, error = handler.performRequests_error_([request], None)
|
||||
if not success:
|
||||
message = str(error) if error else "Vision OCR failed."
|
||||
raise RuntimeError(message)
|
||||
|
||||
results = request.results() or []
|
||||
lines: list[dict[str, Any]] = []
|
||||
for observation in results:
|
||||
candidates = observation.topCandidates_(1)
|
||||
if not candidates:
|
||||
continue
|
||||
candidate = candidates[0]
|
||||
lines.append(
|
||||
{
|
||||
"text": str(candidate.string()),
|
||||
"confidence": float(candidate.confidence()),
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"text": "\n".join(line["text"] for line in lines),
|
||||
"lines": lines,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> dict[str, str]:
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.post("/ocr")
|
||||
async def ocr(
|
||||
image: UploadFile = File(...),
|
||||
recognition_level: str = Form("accurate"),
|
||||
languages: list[str] | None = Form(None),
|
||||
) -> dict[str, Any]:
|
||||
if recognition_level not in {"fast", "accurate"}:
|
||||
raise HTTPException(status_code=400, detail="recognition_level must be fast or accurate")
|
||||
|
||||
suffix = Path(image.filename or "upload.bin").suffix or ".bin"
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
||||
tmp.write(await image.read())
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
try:
|
||||
return recognize_text(
|
||||
image_path=tmp_path,
|
||||
recognition_level=recognition_level,
|
||||
languages=languages,
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=str(exc)) from exc
|
||||
finally:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
fastapi>=0.115,<1.0
|
||||
uvicorn>=0.35,<1.0
|
||||
python-multipart>=0.0.20,<1.0
|
||||
pyobjc-core>=11.1
|
||||
pyobjc-framework-Vision>=11.1
|
||||
pyobjc-framework-Quartz>=11.1
|
||||
Loading…
Reference in New Issue