From 130f7cda001f65b34656f8a87ec446e96a870ea8 Mon Sep 17 00:00:00 2001 From: RaphaelK Date: Mon, 2 Sep 2019 16:28:19 +0200 Subject: [PATCH] add docx support --- homer/constants.py | 2 ++ homer/homer_cmd.py | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/homer/constants.py b/homer/constants.py index 92abcd5..3950d41 100644 --- a/homer/constants.py +++ b/homer/constants.py @@ -26,5 +26,7 @@ 'a touch', 'sometimes', 'mostly', 'possibly', 'might', 'a tad', 'hardly', 'seem' ] +DOCX_EXTENSION = ".docx" +TXT_EXTENSION = ".txt" diff --git a/homer/homer_cmd.py b/homer/homer_cmd.py index 31092a8..71322c5 100644 --- a/homer/homer_cmd.py +++ b/homer/homer_cmd.py @@ -10,16 +10,28 @@ """ import os import click +import docx2txt from homer.analyzer import Article from homer.cmdline_printer import ArticlePrinter +from homer.constants import DOCX_EXTENSION, TXT_EXTENSION + +def create_txt_from_docx(file_path): + filename, file_extension = os.path.splitext(file_path) + if file_extension == DOCX_EXTENSION: + text = docx2txt.process(file_path) + with open(filename + TXT_EXTENSION, "w") as text_file: + print(text, file=text_file) + return os.path.abspath(filename + TXT_EXTENSION) + else: + return os.path.abspath(file_path) @click.command() @click.option('--name', help='Article name, can be an empty string.') @click.option('--author', help='Author name, can be an empty string.') @click.option('--file_path', required=True, type=click.Path(exists=True)) def homer_cmd(name, author, file_path): - file_path = os.path.abspath(file_path) - printer = ArticlePrinter(Article(name, author, open(file_path, mode='r', encoding='utf-8').read())) + new_path = create_txt_from_docx(file_path) + printer = ArticlePrinter(Article(name, author, open(new_path, mode='r', encoding='utf-8').read())) printer.print_article_stats() printer.print_paragraph_stats()