Skip to content

Commit

Permalink
Setup for rotation
Browse files Browse the repository at this point in the history
  • Loading branch information
aryaminus committed Feb 17, 2018
1 parent c0d86e9 commit 1b07e18
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 32 deletions.
71 changes: 39 additions & 32 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import io
import os
import subprocess
import sys
import time
from subprocess import call

import PIL.Image as Im
import pyocr
import pyocr.builders
from PIL import Image as PI
from PIL import Image as Im
from wand.image import Image

VALIDITY = [".jpg",".gif",".png",".tga",".tif",".bmp", ".pdf"]
Expand Down Expand Up @@ -73,11 +75,6 @@ def pdf_run(self, image_file_name, filename):

page_start = time.time()

#img_ori = self.orientation_check(img_per_page.make_blob("png"))
#self.img_run(img_ori, text_file_path)

#call(["tesseract", image_file_name, text_file_path], stdout=FNULL) #Fetch tesseract with FNULL in write mode

page_elaboration = time.time() - page_start

print("page %s - size %s - process %2d sec." % (page, img_per_page.size, page_elaboration))
Expand All @@ -88,27 +85,21 @@ def pdf_run(self, image_file_name, filename):
process_end = time.time() - process_start
print("Total elaboration time: %s" % process_end)

def orientation_check(self, image):
orientation = ""
ori_check = PI.open(io.BytesIO(image))

try:
if self.tool.can_detect_orientation():
orientation = self.tool.detect_orientation(ori_check, lang=self.lang)
angle = orientation["angle"]

if angle != 0:
ori_check.rotate(orientation["angle"])

except pyocr.PyocrException as exc:
print("Orientation detection failed: {}".format(exc))

print("Orientation: {}".format(orientation))

return ori_check

def img_run(self, image_file_name, text_file_path):
call(["tesseract", image_file_name, text_file_path], stdout=FNULL) #Fetch tesseract with FNULL in write mode
def get_rotation_info(self, filename):
stdoutdata = subprocess.getoutput("tesseract" + filename + ' %s - -psm 0')
degrees = None

for line in stdoutdata.splitlines():
info = 'Orientation in degrees: '
if info in line:
degrees = -float(line.replace(info, '').strip())
return degrees

def fix_dpi_and_rotation(self, filename, degrees, ext):
im1 = Im.open(filename)
print('Fixing rotation %.2f in %s...' % (degrees, filename))
im1.rotate(degrees).save('../%s' % filename,
ext, quality=97, dpi = (300, 300))

def main(self, path):
if bool(os.path.exists(path)):
Expand All @@ -125,6 +116,20 @@ def main(self, path):
filename = ''.join(e for e in filename if e.isalnum() or e == '-') #Join string of filename if it contains alphanumeric characters or -
self.pdf_run(image_file_name, filename)

for f in os.listdir(path):
ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc

if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list
other_files += 1 #Increment if other than validity extension found
continue

else:
image_file_name = path + '/' + f #Full /dir/path/filename.extension

degrees = self.get_rotation_info(image_file_name)
if degrees:
self.fix_dpi_and_rotation(image_file_name, degrees, ext)

for f in os.listdir(path): #Return list of files in path directory

ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc
Expand All @@ -134,25 +139,27 @@ def main(self, path):
filename = ''.join(e for e in filename if e.isalnum() or e == '-') #Join string of filename if it contains alphanumeric characters or -
text_file_path = directory_path + filename #Join dir_path with file_name

if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list
other_files += 1 #Increment if other than validity extension found
continue

if count == 0: #No directory created
self.create_directory(directory_path) #function to create directory
count += 1

#self.img_run(image_file_name, text_file_path)
if ext.lower() == ".pdf": #For PDF
continue

else:
degrees = self.get_rotation_info(image_file_name)

if degrees:
self.fix_dpi_and_rotation(image_file_name, degrees, ext)

call(["tesseract", image_file_name, text_file_path], stdout=FNULL) #Fetch tesseract with FNULL in write mode

print(str(count) + (" file" if count == 1 else " files") + " processed")

for f in os.listdir(path):
if f.startswith("saram_"):
os.remove(os.path.join(path, f))
os.remove(os.path.join(path, f))

if count + other_files == 0:
print("No files found") #No files found
Expand Down
34 changes: 34 additions & 0 deletions rotation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import subprocess
import PIL.Image as Image

from glob import glob

command = 'c:\\Share\\tesseract.exe'
image = '337.jpg'
DPI = 300
arguments = ' %s - -psm 0'


def get_rotation_info(filename):
stdoutdata = subprocess.getoutput(command + arguments % filename)
degrees = None
for line in stdoutdata.splitlines():
info = 'Orientation in degrees: '
if info in line:
degrees = -float(line.replace(info, '').strip())
#print("Found rotation: %.2f" % degrees)
return degrees

def fix_dpi_and_rotation(filename, degrees, dpi_info):
im1 = Image.open(filename)
print('Fixing rotation %.2f in %s...' % (degrees, filename))
im1.rotate(degrees).save('../%s' % filename,
'JPEG', quality=97, dpi = (dpi_info, dpi_info))

filenames = sorted(glob('*.jpg'))
for filename in filenames:
print('Checking %s...' % filename)
degrees = get_rotation_info(filename)
if degrees:
fix_dpi_and_rotation(filename, degrees, DPI)

0 comments on commit 1b07e18

Please sign in to comment.