This repository was archived by the owner on Sep 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTextProcessor.py
23 lines (19 loc) · 2.77 KB
/
TextProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#importing modules
import pytesseract as pyTesseract, numpy as np, urllib
from PIL import Image
data = 'data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/4gIoSUNDX1BST0ZJTEUAAQEAAAIYAAAAAAQwAABtbnRyUkdCIFhZWiAAAAAAAAAAAAAAAABhY3NwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAA9tYAAQAAAADTLQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAlkZXNjAAAA8AAAAHRyWFlaAAABZAAAABRnWFlaAAABeAAAABRiWFlaAAABjAAAABR3dHB0AAABoAAAABRyVFJDAAABtAAAAChnVFJDAAABtAAAAChiVFJDAAABtAAAAChjcHJ0AAAB3AAAADxtbHVjAAAAAAAAAAEAAAAMZW5VUwAAAFgAAAAcAEcAbwBvAGcAbABlAC8AUwBrAGkAYQAvADIANwBCAEQAQwAzADUANwA1ADYAOQBDAEIAMAAxAEQAOQA4AEMAQwBGADUAMgAyAEEANgA2AEYAQgAzAEUAMlhZWiAAAAAAAABuQAAAOEgAAAONWFlaIAAAAAAAAGI2AAC3pgAAEMZYWVogAAAAAAAAJmAAABASAAC+2VhZWiAAAAAAAAD21gABAAAAANMtcGFyYQAAAAAABAAAAAJlQgAA8xUAAAyqAAAT5QAAC4MAAAAXAAAAAG1sdWMAAAAAAAAAAQAAAAxlblVTAAAAIAAAABwARwBvAG8AZwBsAGUAIABJAG4AYwAuACAAMgAwADEANv/bAEMAAwICAwICAwMDAwQDAwQFCAUFBAQFCgcHBggMCgwMCwoLCw0OEhANDhEOCwsQFhARExQVFRUMDxcYFhQYEhQVFP/bAEMBAwQEBQQFCQUFCRQNCw0UFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFP/AABEIBZALQAMBIgACEQEDEQH/...39p+dvA61VvhleFlFddYwU0sMrEud3gpGrFFJDTxRxMYjVVVw5yQtcmc4c5UVD3L4faKvgo605djqH/rsB42/uZl0gt/hEV0EzkbJXWCpp4UVe16TQSKif82Ny/sA9geFl4QWpeBVpsdo0FpRb1eK1jnbkopZqaigZhqJsjx5TuaNTKIiMXKLyJfgpccNQ8ftIX6j13pNbPdaBzIZ2vopIqStglR6JhkuVymxyOblU5tX87CdPhT+FbcfBnrrGq6FdqO0XSN+24pc1pmxzNXyolb0L+e1WuRcpnysJ5Kqac01/dML3rK4uoLDwXrr1XNifOtNQXl80iRtTLnbW0irhPT9aekDWPBjhvTcI/7o/R6UoXOdQUFZWupUdlVZDJbZpY2qqquVa2Rrc9+M956u8MvwpKzwbNO2JtmtUFyv17kmSmfXI5aaCOHZ0jno1zXOcvSsRqIqJ2qq8kR3kzgVxbTjl/dB9O60S0JYluKTtWi8Y8Y2LHa5Ys79jM52Ivmpjs7i+f3Vvz+F/wCi6f8AygG7fAp8KK8+EfZNSx6ht1FR3eySwbpre1zIZopkk2+S5zlRyLE/PPCoreXaeLOMnDuw0nh7y6Vlp2RWC4amty1ECKqMVlV0Esrf8FFWZycuSZ9CG5f7lJ/G8UP0Wz/5s1D4XGib9xF8N/VOntMUbq++1jqPxWnZMyJXKy3QSLh73NamGscvNU7AP0B8JPVmvuGnC2Ks4W6Xhvlzhnjp30kdM6bxal2OTfHCxWq5UVI2o1M4Rc7VRFxofgX/AHQWgitN1tXGt/5OaioZ2sifFbJ0Wojci5SSJrXbHtVOfmoqObhOSquDh8LXjT4Mdqsdt4z6FiusFYjoqO5R3GFtXK2PbvWRYnSMe5EeztRir2qrlypvfh5xH4U+G7pC6U82n0uTLcrY6u33ukak9L0qORj45Gqu3dsfhzHI5Nq5xyA8F8P6rSdf4dNhqtDLnSlRqSKegRIHQta12HOa1'
response = urllib.request.urlopen(data)
with open('image.jpg', 'wb') as f:
f.write(response.file.read())
# If you don't have tesseract executable in your PATH, include the following:
pyTesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
filename = r'TestImages\1_python-ocr.jpg'
img1 = np.array(Image.open(filename))
text = pyTesseract.image_to_string(img1)
# import cv2norm_img = np.zeros((img.shape[0], img.shape[1]))
# img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
# img = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY)[1]
# img = cv2.GaussianBlur(img, (1, 1), 0)
#converting image to text
# print(pyTesseract.image_to_string(Image.open('image.jpg')))
print(text)