AllerLens/clova_ocr.py at main · bulee5328/AllerLens · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
CLOVA OCR API 클라이언트
네이버 클라우드의 CLOVA OCR을 사용하여 이미지에서 텍스트를 추출합니다.
"""

import requests
import uuid
import time
import json
from typing import Dict, List
from config import CLOVA_OCR_URL, CLOVA_OCR_SECRET


class ClovaOCR:
    """CLOVA OCR API 클라이언트"""

    def __init__(self, api_url: str = None, secret_key: str = None):
        """
        Args:
            api_url: CLOVA OCR API URL
            secret_key: CLOVA OCR Secret Key
        """
        self.api_url = api_url or CLOVA_OCR_URL
        self.secret_key = secret_key or CLOVA_OCR_SECRET

        if not self.api_url or not self.secret_key:
            raise ValueError("CLOVA OCR API URL과 Secret Key가 필요합니다.")

    def extract_text(self, image_path: str) -> Dict:
        """
        이미지에서 텍스트 추출

        Args:
            image_path: 로컬 이미지 파일 경로 또는 URL

        Returns:
            {
                "raw_text": "전체 텍스트",
                "structured_data": [...],
                "confidence": 0.95,
                "fields": [...]
            }
        """
        # 이미지 파일 읽기 (로컬 파일인 경우)
        if image_path.startswith("http://") or image_path.startswith("https://"):
            # URL인 경우
            image_data = None
            image_url = image_path
        else:
            # 로컬 파일인 경우
            with open(image_path, 'rb') as f:
                import base64
                image_data = base64.b64encode(f.read()).decode('utf-8')
            image_url = None

        # API 요청 구성
        request_json = {
            'images': [
                {
                    'format': self._get_image_format(image_path),
                    'name': 'food_label',
                    'data': image_data,
                    'url': image_url
                }
            ],
            'requestId': str(uuid.uuid4()),
            'version': 'V2',
            'timestamp': int(round(time.time() * 1000))
        }

        headers = {
            'X-OCR-SECRET': self.secret_key,
            'Content-Type': 'application/json'
        }

        # API 호출
        try:
            response = requests.post(
                self.api_url,
                headers=headers,
                data=json.dumps(request_json),
                timeout=30
            )
            response.raise_for_status()

            result = response.json()
            return self._parse_response(result)

        except requests.exceptions.RequestException as e:
            raise Exception(f"CLOVA OCR API 호출 실패: {str(e)}")

    def _get_image_format(self, image_path: str) -> str:
        """이미지 포맷 추출"""
        if image_path.lower().endswith('.png'):
            return 'png'
        elif image_path.lower().endswith('.pdf'):
            return 'pdf'
        else:
            return 'jpg'

    def _parse_response(self, response: Dict) -> Dict:
        """CLOVA OCR 응답 파싱"""
        texts = []
        confidence_scores = []
        fields_info = []

        for image in response.get('images', []):
            for field in image.get('fields', []):
                text = field.get('inferText', '')
                confidence = field.get('inferConfidence', 0)

                texts.append(text)
                confidence_scores.append(confidence)

                fields_info.append({
                    "text": text,
                    "confidence": confidence,
                    "bounding_box": field.get('boundingPoly', {})
                })

        # 전체 텍스트 결합
        raw_text = ' '.join(texts)

        # 평균 신뢰도
        avg_confidence = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0

        return {
            "raw_text": raw_text,
            "structured_data": response.get('images', []),
            "confidence": round(avg_confidence, 3),
            "fields": fields_info
        }


# ============================================
# 테스트 코드
# ============================================
if __name__ == "__main__":
    # 설정 검증
    from config import validate_config
    validate_config()

    # OCR 테스트
    ocr = ClovaOCR()

    # 테스트 이미지 (로컬 파일 또는 URL)
    test_image = "test_food_label.jpg"  # 실제 이미지 경로로 변경

    try:
        result = ocr.extract_text(test_image)

        print("=== CLOVA OCR 결과 ===")
        print(f"신뢰도: {result['confidence']}")
        print(f"\n추출된 텍스트:\n{result['raw_text']}")
        print(f"\n필드 개수: {len(result['fields'])}")

    except Exception as e:
        print(f"❌ 에러: {str(e)}")