IN PROGRESS - Testing additional ingest functionality (User definable…

… output formats & behaviour)
crocodilestick · Nov 18, 2024 · 06a5e0f · 06a5e0f
1 parent fc18887
commit 06a5e0f
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 63 deletions.
diff --git a/root/app/calibre-web/cps/cwa_functions.py b/root/app/calibre-web/cps/cwa_functions.py
@@ -81,7 +81,7 @@ def set_cwa_settings():
                         'fbz', 'html', 'htmlz', 'lit',
                         'lrf', 'mobi', 'odt', 'pdf',
                         'prc', 'pdb', 'pml', 'rb',
-                        'rtf', 'snb', 'tcr', 'txtz']
+                        'rtf', 'snb', 'tcr', 'txtz', 'txt']
     target_formats = ['epub', 'azw3', 'kepub', 'mobi', 'pdf']
     boolean_settings = ["auto_backup_imports",
                         "auto_backup_conversions",
@@ -90,13 +90,13 @@ def set_cwa_settings():
                         "auto_convert"]
     string_settings = ["auto_convert_target_format"]
     for format in ignorable_formats:
-        string_settings.append(f"ignore_import_{format}")
+        string_settings.append(f"ignore_ingest_{format}")
         string_settings.append(f"ignore_convert_{format}")
 
     if request.method == 'POST':
         cwa_db = CWA_DB()
         if request.form['submit_button'] == "Submit":
-            result = {"auto_convert_ignored_formats":[], "auto_import_ignored_formats":[]}
+            result = {"auto_convert_ignored_formats":[], "auto_ingest_ignored_formats":[]}
             # set boolean_settings
             for setting in boolean_settings:
                 value = request.form.get(setting)
@@ -114,27 +114,27 @@ def set_cwa_settings():
                     else:
                         result["auto_convert_ignored_formats"].append(value)
                         continue
-                elif setting[:13] == "ignore_import":
+                elif setting[:13] == "ignore_ingest":
                     if value == None:
                         continue
                     else:
-                        result["auto_import_ignored_formats"].append(value)
+                        result["auto_ingest_ignored_formats"].append(value)
                         continue
-                elif setting == "auto_import_target_format" and value == None:
-                    value = cwa_db.cwa_settings['auto_import_target_format']
+                elif setting == "auto_convert_target_format" and value == None:
+                    value = cwa_db.cwa_settings['auto_convert_target_format']
 
                 result |= {setting:value}
 
             # Prevent ignoring of target format
             if result['auto_convert_target_format'] in result['auto_convert_ignored_formats']:
                 result['auto_convert_ignored_formats'].remove(result['auto_convert_target_format'])
-            if result['auto_convert_target_format'] in result['auto_import_ignored_formats']:
-                result['auto_import_ignored_formats'].remove(result['auto_convert_target_format'])
+            if result['auto_convert_target_format'] in result['auto_ingest_ignored_formats']:
+                result['auto_ingest_ignored_formats'].remove(result['auto_convert_target_format'])
 
             # DEBUGGING
             with open("/config/post_request" ,"w") as f:
                 for key in result.keys():
-                    if key == "auto_convert_ignored_formats" or key == "auto_import_ignored_formats":
+                    if key == "auto_convert_ignored_formats" or key == "auto_ingest_ignored_formats":
                         f.write(f"{key} - {', '.join(result[key])}\n")
                     else:
                         f.write(f"{key} - {result[key]}\n")

diff --git a/root/app/calibre-web/cps/templates/cwa_settings.html b/root/app/calibre-web/cps/templates/cwa_settings.html
@@ -98,27 +98,27 @@ <h3>CWA Auto-Convert - Ignored Formats</h3>
           {% endfor %}
         </div>
 
-        <h3>CWA Auto-Import - Ignored Formats</h3>
+        <h3>CWA Auto-Ingest - Ignored Formats</h3>
         <p style="color: whitesmoke;
         font-style: italic;
         font-size: inherit;
         line-height: normal;
         padding-left: 10px;
-        max-width: 90rem;">The formats selected here will be ignored by CWA's Auto-Import feature</p>
+        max-width: 90rem;">The formats selected here will be ignored by CWA's Auto-Ingest feature</p>
         <div style="max-width: 90rem; padding-left: 30px;">
           {% for format in ignorable_formats -%}
-            <label for="ignore_import_{{ format }}" style="width: 75px; padding-right: 6px;">
-              {% if format in cwa_settings['auto_import_ignored_formats'] %}
+            <label for="ignore_ingest_{{ format }}" style="width: 75px; padding-right: 6px;">
+              {% if format in cwa_settings['auto_ingest_ignored_formats'] %}
                 {% if format == cwa_settings['auto_convert_target_format'] %}
-                  <input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
+                  <input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
                 {% else %}
-                  <input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" checked style="vertical-align: middle;">
+                  <input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" checked style="vertical-align: middle;">
                 {% endif %}
               {% else %}
                 {% if format == cwa_settings['auto_convert_target_format'] %}
-                  <input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
+                  <input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
                 {% else %}
-                  <input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" style="vertical-align: middle;">
+                  <input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" style="vertical-align: middle;">
                 {% endif %}
               {% endif %}
               <span style="padding-left: 4px; vertical-align: middle;">{{ format }}</span>

diff --git a/scripts/cwa_db.py b/scripts/cwa_db.py
@@ -27,7 +27,7 @@ def __init__(self, verbose=False):
                                     "auto_convert": 1,
                                     "auto_convert_target_format": "epub",
                                     "auto_convert_ignored_formats":"",
-                                    "auto_import_ignored_formats":""}
+                                    "auto_ingest_ignored_formats":""}
 
         self.tables, self.schema = self.make_tables()
         self.ensure_schema_match()
@@ -168,15 +168,15 @@ def get_cwa_settings(self) -> dict[str:bool|str]:
             if type(cwa_settings[header]) == int:
                 cwa_settings[header] = bool(cwa_settings[header])
         cwa_settings['auto_convert_ignored_formats'] = cwa_settings['auto_convert_ignored_formats'].split(',')
-        cwa_settings['auto_import_ignored_formats'] = cwa_settings['auto_import_ignored_formats'].split(',')
+        cwa_settings['auto_ingest_ignored_formats'] = cwa_settings['auto_ingest_ignored_formats'].split(',')
 
         return cwa_settings
 
 
     def update_cwa_settings(self, result) -> None:
         """Sets settings using POST request from set_cwa_settings()"""
         for setting in result.keys():
-            if setting == "auto_convert_ignored_formats" or setting == "auto_import_ignored_formats":
+            if setting == "auto_convert_ignored_formats" or setting == "auto_ingest_ignored_formats":
                 result[setting] = ','.join(result[setting])
 
             if type(result[setting]) == int:
@@ -287,7 +287,7 @@ def import_add_entry(self, filename, original_backed_up):
         self.con.commit()
 
 
-    def conversion_add_entry(self, filename, original_format, original_backed_up):
+    def conversion_add_entry(self, filename, original_format, original_backed_up): # TODO Add end_format
         timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         self.cur.execute("INSERT INTO cwa_conversions(timestamp, filename, original_format, original_backed_up) VALUES (?, ?, ?, ?);", (timestamp, filename, original_format, original_backed_up))
         self.con.commit()

diff --git a/scripts/cwa_schema.sql b/scripts/cwa_schema.sql
@@ -29,5 +29,5 @@ CREATE TABLE IF NOT EXISTS cwa_settings(
     auto_convert SMALLINT DEFAULT 1 NOT NULL,
     auto_convert_target_format TEXT DEFAULT "epub" NOT NULL,
     auto_convert_ignored_formats TEXT DEFAULT "" NOT NULL,
-    auto_import_ignored_formats TEXT DEFAULT "" NOT NULL
+    auto_ingest_ignored_formats TEXT DEFAULT "" NOT NULL
 );
diff --git a/scripts/ingest-processor.py b/scripts/ingest-processor.py
@@ -46,13 +46,18 @@ def __init__(self, filepath: str):
         self.db = CWA_DB()
         self.cwa_settings = self.db.cwa_settings
 
-        self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz']
-        self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4',  'prc', 'odt', 'lrf', 'pdb',  'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz']
+        self.auto_convert_on = self.db.cwa_settings['auto_convert']
+        self.target_format = self.db.cwa_settings['auto_convert_target_format']
+        self.ingest_ignored_formats = self.db.cwa_settings['auto_ingest_ignored_formats']
+        self.convert_ignored_formats = self.db.cwa_settings['auto_convert_ignored_formats']
+
+        self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz', 'txt']
+        self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4',  'prc', 'odt', 'lrf', 'pdb',  'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
         self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")
 
         self.filepath = filepath # path of the book we're targeting
         self.filename = os.path.basename(filepath)
-        self.is_epub: bool = bool(self.filepath.endswith('.epub'))
+        self.is_target_format = bool(self.filepath.endswith(self.target_format))
 
 
     def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
@@ -68,10 +73,10 @@ def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
 
 
     def convert_book(self, import_format: str) -> tuple[bool, str]:
-        """Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting epubs to the calibre-web import folder."""
+        """Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting files to the calibre-web import folder."""
         print(f"[ingest-processor]: START_CON: Converting {self.filename}...\n")
         original_filepath = Path(self.filepath)
-        target_filepath = f"{self.tmp_conversion_dir}{original_filepath.stem}.epub"
+        target_filepath = f"{self.tmp_conversion_dir}{original_filepath.stem}.{self.target_format}"
         try:
             t_convert_book_start = time.time()
             subprocess.run(['ebook-convert', self.filepath, target_filepath], check=True)
@@ -83,8 +88,8 @@ def convert_book(self, import_format: str) -> tuple[bool, str]:
                 shutil.copyfile(self.filepath, f"/config/processed_books/converted/{os.path.basename(original_filepath)}")
 
             self.db.conversion_add_entry(original_filepath.stem,
-                                         import_format,
-                                         str(self.cwa_settings["auto_backup_conversions"]))
+                                        import_format,
+                                        str(self.cwa_settings["auto_backup_conversions"]))
 
             return True, target_filepath
 
@@ -94,20 +99,18 @@ def convert_book(self, import_format: str) -> tuple[bool, str]:
             return False, ""
 
 
-    def can_convert_check(self):
-        """When no epubs are detected in the download, this function will go through the list of new files 
-        and check for the format the are in that has the highest chance of successful conversion according to the input format hierarchy list 
-        provided by calibre"""
+    def can_convert_check(self, bool_only=False) -> tuple[bool, str]:
+        """When the current filepath isn't of the target format, this function will check if the file is able to be converted to the target format,
+        returning a can_convert bool with the answer"""
         can_convert = False
-        import_format = ''
-        for format in self.hierarchy_of_success:
-            can_be_converted = bool(self.filepath.endswith(f'.{format}'))
-            if can_be_converted:
-                can_convert = True
-                import_format = format
-                break
+        import_format = Path(self.filepath).suffix
+        if import_format in self.supported_book_formats:
+            can_convert = True
 
-        return can_convert, import_format
+        if bool_only:
+            return can_convert, import_format
+        else:
+            return can_convert
 
 
     def delete_current_file(self) -> None:
@@ -117,7 +120,7 @@ def delete_current_file(self) -> None:
 
 
     def add_book_to_library(self, book_path) -> None:
-        print("[ingest-processor]: Importing new epub to CWA...")
+        print("[ingest-processor]: Importing new book to CWA...")
         import_path = Path(book_path)
         import_filename = os.path.basename(book_path)
         try:
@@ -128,7 +131,7 @@ def add_book_to_library(self, book_path) -> None:
                 shutil.copyfile(book_path, f"/config/processed_books/imported/{import_filename}")
 
             self.db.import_add_entry(import_path.stem,
-                                     str(self.cwa_settings["auto_backup_imports"]))
+                                    str(self.cwa_settings["auto_backup_imports"]))
 
         except subprocess.CalledProcessError as e:
             print(f"[ingest-processor] {import_path.stem} was not able to be added to the Calibre Library due to the following error:\nCALIBREDB EXIT/ERROR CODE: {e.returncode}\n{e.stderr}")
@@ -165,26 +168,33 @@ def main(filepath=sys.argv[1]):
 
     nbp = NewBookProcessor(filepath)
 
-    if not nbp.is_epub: # Books require conversion
-        print(f"\n[ingest-processor]: Starting conversion process for {nbp.filename}...")
-        can_convert, import_format = nbp.can_convert_check()
-        print(f"[ingest-processor]: Converting file from {import_format} to epub format...\n")
-
-        if can_convert:
-            result, epub_filepath = nbp.convert_book(import_format)
-            if result:
-                nbp.add_book_to_library(epub_filepath)
-                nbp.empty_tmp_con_dir()
+    # Check if the user has chosen to exclude files of this type from the ingest process
+    if Path(nbp.filename).suffix in nbp.ingest_ignored_formats:
+        continue
+    else:
+        if nbp.is_target_format: # File can just be imported
+            print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...")
+            nbp.add_book_to_library(filepath)
         else:
-            print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported.")
-
-    else: # Books need imported
-        print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...")
-        nbp.add_book_to_library(filepath)
-
-    nbp.set_library_permissions()
-    nbp.delete_current_file()
-    del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests
+            if nbp.auto_convert_on and nbp.can_convert_check(bool_only=True): # File can be converted to target format and Auto-Converter is on
+                print(f"\n[ingest-processor]: Starting conversion process for {nbp.filename}...")
+                can_convert, import_format = nbp.can_convert_check()
+                print(f"[ingest-processor]: Converting file from {import_format} to epub format...\n")
+
+                if can_convert:
+                    result, converted_filepath = nbp.convert_book(import_format)
+                    if result:
+                        nbp.add_book_to_library(converted_filepath)
+                        nbp.empty_tmp_con_dir()
+            elif not nbp.auto_convert_on and nbp.can_convert_check(bool_only=True): # Books not in target format but Auto-Converter is off so files are imported anyway
+                print(f"\n[ingest-processor]: {nbp.filename} not in target format but CWA Auto-Convert is deactivated so importing file anyway...")
+                nbp.add_book_to_library(filepath)
+            else:
+                print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported / is not a known ebook format.")
+
+            nbp.set_library_permissions()
+            nbp.delete_current_file()
+            del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests
 
 if __name__ == "__main__":
     main()