Skip to content

Commit

Permalink
IN PROGRESS - Testing additional ingest functionality (User definable…
Browse files Browse the repository at this point in the history
… output formats & behaviour)
  • Loading branch information
crocodilestick committed Nov 18, 2024
1 parent fc18887 commit 06a5e0f
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 63 deletions.
20 changes: 10 additions & 10 deletions root/app/calibre-web/cps/cwa_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def set_cwa_settings():
'fbz', 'html', 'htmlz', 'lit',
'lrf', 'mobi', 'odt', 'pdf',
'prc', 'pdb', 'pml', 'rb',
'rtf', 'snb', 'tcr', 'txtz']
'rtf', 'snb', 'tcr', 'txtz', 'txt']
target_formats = ['epub', 'azw3', 'kepub', 'mobi', 'pdf']
boolean_settings = ["auto_backup_imports",
"auto_backup_conversions",
Expand All @@ -90,13 +90,13 @@ def set_cwa_settings():
"auto_convert"]
string_settings = ["auto_convert_target_format"]
for format in ignorable_formats:
string_settings.append(f"ignore_import_{format}")
string_settings.append(f"ignore_ingest_{format}")
string_settings.append(f"ignore_convert_{format}")

if request.method == 'POST':
cwa_db = CWA_DB()
if request.form['submit_button'] == "Submit":
result = {"auto_convert_ignored_formats":[], "auto_import_ignored_formats":[]}
result = {"auto_convert_ignored_formats":[], "auto_ingest_ignored_formats":[]}
# set boolean_settings
for setting in boolean_settings:
value = request.form.get(setting)
Expand All @@ -114,27 +114,27 @@ def set_cwa_settings():
else:
result["auto_convert_ignored_formats"].append(value)
continue
elif setting[:13] == "ignore_import":
elif setting[:13] == "ignore_ingest":
if value == None:
continue
else:
result["auto_import_ignored_formats"].append(value)
result["auto_ingest_ignored_formats"].append(value)
continue
elif setting == "auto_import_target_format" and value == None:
value = cwa_db.cwa_settings['auto_import_target_format']
elif setting == "auto_convert_target_format" and value == None:
value = cwa_db.cwa_settings['auto_convert_target_format']

result |= {setting:value}

# Prevent ignoring of target format
if result['auto_convert_target_format'] in result['auto_convert_ignored_formats']:
result['auto_convert_ignored_formats'].remove(result['auto_convert_target_format'])
if result['auto_convert_target_format'] in result['auto_import_ignored_formats']:
result['auto_import_ignored_formats'].remove(result['auto_convert_target_format'])
if result['auto_convert_target_format'] in result['auto_ingest_ignored_formats']:
result['auto_ingest_ignored_formats'].remove(result['auto_convert_target_format'])

# DEBUGGING
with open("/config/post_request" ,"w") as f:
for key in result.keys():
if key == "auto_convert_ignored_formats" or key == "auto_import_ignored_formats":
if key == "auto_convert_ignored_formats" or key == "auto_ingest_ignored_formats":
f.write(f"{key} - {', '.join(result[key])}\n")
else:
f.write(f"{key} - {result[key]}\n")
Expand Down
16 changes: 8 additions & 8 deletions root/app/calibre-web/cps/templates/cwa_settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,27 @@ <h3>CWA Auto-Convert - Ignored Formats</h3>
{% endfor %}
</div>

<h3>CWA Auto-Import - Ignored Formats</h3>
<h3>CWA Auto-Ingest - Ignored Formats</h3>
<p style="color: whitesmoke;
font-style: italic;
font-size: inherit;
line-height: normal;
padding-left: 10px;
max-width: 90rem;">The formats selected here will be ignored by CWA's Auto-Import feature</p>
max-width: 90rem;">The formats selected here will be ignored by CWA's Auto-Ingest feature</p>
<div style="max-width: 90rem; padding-left: 30px;">
{% for format in ignorable_formats -%}
<label for="ignore_import_{{ format }}" style="width: 75px; padding-right: 6px;">
{% if format in cwa_settings['auto_import_ignored_formats'] %}
<label for="ignore_ingest_{{ format }}" style="width: 75px; padding-right: 6px;">
{% if format in cwa_settings['auto_ingest_ignored_formats'] %}
{% if format == cwa_settings['auto_convert_target_format'] %}
<input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
<input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
{% else %}
<input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" checked style="vertical-align: middle;">
<input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" checked style="vertical-align: middle;">
{% endif %}
{% else %}
{% if format == cwa_settings['auto_convert_target_format'] %}
<input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
<input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" disabled style="vertical-align: middle;">
{% else %}
<input type="checkbox" id="ignore_import_{{ format }}" name="ignore_import_{{ format }}" value="{{ format }}" style="vertical-align: middle;">
<input type="checkbox" id="ignore_ingest_{{ format }}" name="ignore_ingest_{{ format }}" value="{{ format }}" style="vertical-align: middle;">
{% endif %}
{% endif %}
<span style="padding-left: 4px; vertical-align: middle;">{{ format }}</span>
Expand Down
8 changes: 4 additions & 4 deletions scripts/cwa_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, verbose=False):
"auto_convert": 1,
"auto_convert_target_format": "epub",
"auto_convert_ignored_formats":"",
"auto_import_ignored_formats":""}
"auto_ingest_ignored_formats":""}

self.tables, self.schema = self.make_tables()
self.ensure_schema_match()
Expand Down Expand Up @@ -168,15 +168,15 @@ def get_cwa_settings(self) -> dict[str:bool|str]:
if type(cwa_settings[header]) == int:
cwa_settings[header] = bool(cwa_settings[header])
cwa_settings['auto_convert_ignored_formats'] = cwa_settings['auto_convert_ignored_formats'].split(',')
cwa_settings['auto_import_ignored_formats'] = cwa_settings['auto_import_ignored_formats'].split(',')
cwa_settings['auto_ingest_ignored_formats'] = cwa_settings['auto_ingest_ignored_formats'].split(',')

return cwa_settings


def update_cwa_settings(self, result) -> None:
"""Sets settings using POST request from set_cwa_settings()"""
for setting in result.keys():
if setting == "auto_convert_ignored_formats" or setting == "auto_import_ignored_formats":
if setting == "auto_convert_ignored_formats" or setting == "auto_ingest_ignored_formats":
result[setting] = ','.join(result[setting])

if type(result[setting]) == int:
Expand Down Expand Up @@ -287,7 +287,7 @@ def import_add_entry(self, filename, original_backed_up):
self.con.commit()


def conversion_add_entry(self, filename, original_format, original_backed_up):
def conversion_add_entry(self, filename, original_format, original_backed_up): # TODO Add end_format
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
self.cur.execute("INSERT INTO cwa_conversions(timestamp, filename, original_format, original_backed_up) VALUES (?, ?, ?, ?);", (timestamp, filename, original_format, original_backed_up))
self.con.commit()
Expand Down
2 changes: 1 addition & 1 deletion scripts/cwa_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ CREATE TABLE IF NOT EXISTS cwa_settings(
auto_convert SMALLINT DEFAULT 1 NOT NULL,
auto_convert_target_format TEXT DEFAULT "epub" NOT NULL,
auto_convert_ignored_formats TEXT DEFAULT "" NOT NULL,
auto_import_ignored_formats TEXT DEFAULT "" NOT NULL
auto_ingest_ignored_formats TEXT DEFAULT "" NOT NULL
);
90 changes: 50 additions & 40 deletions scripts/ingest-processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,18 @@ def __init__(self, filepath: str):
self.db = CWA_DB()
self.cwa_settings = self.db.cwa_settings

self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz']
self.auto_convert_on = self.db.cwa_settings['auto_convert']
self.target_format = self.db.cwa_settings['auto_convert_target_format']
self.ingest_ignored_formats = self.db.cwa_settings['auto_ingest_ignored_formats']
self.convert_ignored_formats = self.db.cwa_settings['auto_convert_ignored_formats']

self.supported_book_formats = ['azw', 'azw3', 'azw4', 'cbz', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'docx', 'epub', 'fb2', 'fbz', 'html', 'htmlz', 'lit', 'lrf', 'mobi', 'odt', 'pdf', 'prc', 'pdb', 'pml', 'rb', 'rtf', 'snb', 'tcr', 'txtz', 'txt']
self.hierarchy_of_success = ['lit', 'mobi', 'azw', 'epub', 'azw3', 'fb2', 'fbz', 'azw4', 'prc', 'odt', 'lrf', 'pdb', 'cbz', 'pml', 'rb', 'cbr', 'cb7', 'cbc', 'chm', 'djvu', 'snb', 'tcr', 'pdf', 'docx', 'rtf', 'html', 'htmlz', 'txtz', 'txt']
self.ingest_folder, self.library_dir, self.tmp_conversion_dir = self.get_dirs("/app/calibre-web-automated/dirs.json")

self.filepath = filepath # path of the book we're targeting
self.filename = os.path.basename(filepath)
self.is_epub: bool = bool(self.filepath.endswith('.epub'))
self.is_target_format = bool(self.filepath.endswith(self.target_format))


def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:
Expand All @@ -68,10 +73,10 @@ def get_dirs(self, dirs_json_path: str) -> tuple[str, str, str]:


def convert_book(self, import_format: str) -> tuple[bool, str]:
"""Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting epubs to the calibre-web import folder."""
"""Uses the following terminal command to convert the books provided using the calibre converter tool:\n\n--- ebook-convert myfile.input_format myfile.output_format\n\nAnd then saves the resulting files to the calibre-web import folder."""
print(f"[ingest-processor]: START_CON: Converting {self.filename}...\n")
original_filepath = Path(self.filepath)
target_filepath = f"{self.tmp_conversion_dir}{original_filepath.stem}.epub"
target_filepath = f"{self.tmp_conversion_dir}{original_filepath.stem}.{self.target_format}"
try:
t_convert_book_start = time.time()
subprocess.run(['ebook-convert', self.filepath, target_filepath], check=True)
Expand All @@ -83,8 +88,8 @@ def convert_book(self, import_format: str) -> tuple[bool, str]:
shutil.copyfile(self.filepath, f"/config/processed_books/converted/{os.path.basename(original_filepath)}")

self.db.conversion_add_entry(original_filepath.stem,
import_format,
str(self.cwa_settings["auto_backup_conversions"]))
import_format,
str(self.cwa_settings["auto_backup_conversions"]))

return True, target_filepath

Expand All @@ -94,20 +99,18 @@ def convert_book(self, import_format: str) -> tuple[bool, str]:
return False, ""


def can_convert_check(self):
"""When no epubs are detected in the download, this function will go through the list of new files
and check for the format the are in that has the highest chance of successful conversion according to the input format hierarchy list
provided by calibre"""
def can_convert_check(self, bool_only=False) -> tuple[bool, str]:
"""When the current filepath isn't of the target format, this function will check if the file is able to be converted to the target format,
returning a can_convert bool with the answer"""
can_convert = False
import_format = ''
for format in self.hierarchy_of_success:
can_be_converted = bool(self.filepath.endswith(f'.{format}'))
if can_be_converted:
can_convert = True
import_format = format
break
import_format = Path(self.filepath).suffix
if import_format in self.supported_book_formats:
can_convert = True

return can_convert, import_format
if bool_only:
return can_convert, import_format
else:
return can_convert


def delete_current_file(self) -> None:
Expand All @@ -117,7 +120,7 @@ def delete_current_file(self) -> None:


def add_book_to_library(self, book_path) -> None:
print("[ingest-processor]: Importing new epub to CWA...")
print("[ingest-processor]: Importing new book to CWA...")
import_path = Path(book_path)
import_filename = os.path.basename(book_path)
try:
Expand All @@ -128,7 +131,7 @@ def add_book_to_library(self, book_path) -> None:
shutil.copyfile(book_path, f"/config/processed_books/imported/{import_filename}")

self.db.import_add_entry(import_path.stem,
str(self.cwa_settings["auto_backup_imports"]))
str(self.cwa_settings["auto_backup_imports"]))

except subprocess.CalledProcessError as e:
print(f"[ingest-processor] {import_path.stem} was not able to be added to the Calibre Library due to the following error:\nCALIBREDB EXIT/ERROR CODE: {e.returncode}\n{e.stderr}")
Expand Down Expand Up @@ -165,26 +168,33 @@ def main(filepath=sys.argv[1]):

nbp = NewBookProcessor(filepath)

if not nbp.is_epub: # Books require conversion
print(f"\n[ingest-processor]: Starting conversion process for {nbp.filename}...")
can_convert, import_format = nbp.can_convert_check()
print(f"[ingest-processor]: Converting file from {import_format} to epub format...\n")

if can_convert:
result, epub_filepath = nbp.convert_book(import_format)
if result:
nbp.add_book_to_library(epub_filepath)
nbp.empty_tmp_con_dir()
# Check if the user has chosen to exclude files of this type from the ingest process
if Path(nbp.filename).suffix in nbp.ingest_ignored_formats:
continue
else:
if nbp.is_target_format: # File can just be imported
print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...")
nbp.add_book_to_library(filepath)
else:
print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported.")

else: # Books need imported
print(f"\n[ingest-processor]: No conversion needed for {nbp.filename}, importing now...")
nbp.add_book_to_library(filepath)

nbp.set_library_permissions()
nbp.delete_current_file()
del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests
if nbp.auto_convert_on and nbp.can_convert_check(bool_only=True): # File can be converted to target format and Auto-Converter is on
print(f"\n[ingest-processor]: Starting conversion process for {nbp.filename}...")
can_convert, import_format = nbp.can_convert_check()
print(f"[ingest-processor]: Converting file from {import_format} to epub format...\n")

if can_convert:
result, converted_filepath = nbp.convert_book(import_format)
if result:
nbp.add_book_to_library(converted_filepath)
nbp.empty_tmp_con_dir()
elif not nbp.auto_convert_on and nbp.can_convert_check(bool_only=True): # Books not in target format but Auto-Converter is off so files are imported anyway
print(f"\n[ingest-processor]: {nbp.filename} not in target format but CWA Auto-Convert is deactivated so importing file anyway...")
nbp.add_book_to_library(filepath)
else:
print(f"[ingest-processor]: Cannot convert {nbp.filepath}. {import_format} is currently unsupported / is not a known ebook format.")

nbp.set_library_permissions()
nbp.delete_current_file()
del nbp # New in Version 2.0.0, should drastically reduce memory usage with large ingests

if __name__ == "__main__":
main()

0 comments on commit 06a5e0f

Please sign in to comment.