From 04cd175390b3264c6c1f6200653adeb3e607cb13 Mon Sep 17 00:00:00 2001 From: Sunim Acharya Date: Sun, 18 Feb 2018 21:28:26 +0545 Subject: [PATCH] Update for beta state --- README.md | 9 ++++++--- README.rst | 2 +- setup.py | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index de7d004..87e055c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # Saram - Image/PDF OCR conversion Get OCR in txt form from an image or pdf extension supporting multiple files from directory using `pytesseract` with support for rotation in case of wrong orientation along. -**Currently in alpha state** +**Currently in beta state** [![Saram features](https://i.imgur.com/M9dAwPq.gif)](https://i.imgur.com/M9dAwPq.gif) **Note:** -Make sure you have a OCR tool like `tesseract` and certain data value for comparing OCR, eg `tesseract-data-eng` along with `Pillow` and `Wand` for image conversion and loading which will be fetched during pip install +Make sure you have a OCR tool like `tesseract` and certain data value for comparing OCR, eg `tesseract-data-eng` along with `Pillow` and `Wand` for image conversion and loading which will be fetched during pip install. + +**For using in python**: +Refer to the py-module branch ## Installation @@ -26,7 +29,7 @@ $ python main.py ``` ## Todo -- [x] Add support for PDF by PDF -> image -> txt with converted image deletion after processing +- [x] Add support for PDF by PDF -> Image -> Txt with converted image deletion after processing - [x] Double check for orientation in case of image and PDF - [x] Make a PIP package - [ ] Add NLP to process the most repeated frequent characters to filer content diff --git a/README.rst b/README.rst index a1b1414..e23c584 100644 --- a/README.rst +++ b/README.rst @@ -3,4 +3,4 @@ To use (with caution), simply do:: $ pip install saram $ saram -Mkae sure you have a OCR tool like tesseract and certain data value for comparing OCR \ No newline at end of file +Make sure you have a OCR tool like `tesseract` and certain data value for comparing OCR, eg `tesseract-data-eng` along with `Pillow` and `Wand` for image conversion and loading which will be fetched during pip install. \ No newline at end of file diff --git a/setup.py b/setup.py index 84497d7..789323f 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def readme(): setup( name = 'saram', packages = ['saram'], # this must be the same as the name above - version = '0.8.2', + version = '1.0.1', description = 'A library to fetch images from a directory and get OCR and store in txt with orientation rotation check and pdf support.', long_description = readme(), author = 'Sunim Acharya',