diff --git a/minet/cli/extract/__init__.py b/minet/cli/extract/__init__.py index 0c92a7f8fa..e29d2e4f64 100644 --- a/minet/cli/extract/__init__.py +++ b/minet/cli/extract/__init__.py @@ -1,4 +1,4 @@ -from casanova import ThreadSafeResumer +from casanova import IndexedResumer from minet.cli.argparse import command @@ -91,7 +91,7 @@ def resolve_arguments(cli_args): """, resolve=resolve_arguments, variadic_input={"dummy_column": "path", "optional": True, "no_help": True}, - resumer=ThreadSafeResumer, + resumer=IndexedResumer, arguments=[ { "flags": ["-g", "--glob"], diff --git a/minet/cli/extract/extract.py b/minet/cli/extract/extract.py index 3bdcd136de..06b73c7e61 100644 --- a/minet/cli/extract/extract.py +++ b/minet/cli/extract/extract.py @@ -9,7 +9,7 @@ from os.path import isdir from dataclasses import dataclass from itertools import count -from casanova import ThreadSafeEnricher +from casanova import IndexedEnricher from threading import Lock from minet.exceptions import TrafilaturaError @@ -72,13 +72,13 @@ def worker(payload: ExtractWorkerPayload) -> ExtractResult: @with_enricher_and_loading_bar( headers=HEADERS, - enricher_type="threadsafe", + enricher_type="indexed", index_column="extract_original_index", title="Extracting text", unit="docs", total_from_enricher=False, ) -def action(cli_args, enricher: ThreadSafeEnricher, loading_bar): +def action(cli_args, enricher: IndexedEnricher, loading_bar): if cli_args.input_dir is not None and not isdir(cli_args.input_dir): raise FatalError( 'Could not find the [cyan]-I/--input-dir "{}"[/cyan] directory!'.format( diff --git a/minet/cli/fetch/__init__.py b/minet/cli/fetch/__init__.py index f10d0a1de4..adab91d168 100644 --- a/minet/cli/fetch/__init__.py +++ b/minet/cli/fetch/__init__.py @@ -1,6 +1,6 @@ -from casanova import ThreadSafeResumer +from casanova import IndexedResumer -from minet.cli.argparse import command, BooleanAction, FolderStrategyType +from minet.cli.argparse import command, FolderStrategyType from minet.cli.constants import DEFAULT_CONTENT_FOLDER, DEFAULT_SCREENSHOT_FOLDER from minet.cli.exceptions import InvalidArgumentsError @@ -161,7 +161,7 @@ def resolve_fetch_arguments(cli_args): $ minet fetch url -i file.csv -O html > report.csv """, resolve=resolve_fetch_arguments, - resumer=ThreadSafeResumer, + resumer=IndexedResumer, variadic_input={"dummy_column": "url"}, arguments=[ *FETCH_RESOLVE_COMMON_ARGUMENTS, @@ -260,7 +260,7 @@ def resolve_fetch_arguments(cli_args): . Resolving a single url: $ minet resolve https://lemonde.fr """, - resumer=ThreadSafeResumer, + resumer=IndexedResumer, variadic_input={"dummy_column": "url"}, arguments=[ *FETCH_RESOLVE_COMMON_ARGUMENTS, @@ -332,7 +332,7 @@ def resolve_fetch_arguments(cli_args): . Screenshot a single url: $ minet screenshot https://lemonde.fr """, - resumer=ThreadSafeResumer, + resumer=IndexedResumer, variadic_input={"dummy_column": "url"}, arguments=[ *COMMON_ARGUMENTS, diff --git a/minet/cli/fetch/fetch.py b/minet/cli/fetch/fetch.py index c16bd05ceb..b0239ac0c8 100644 --- a/minet/cli/fetch/fetch.py +++ b/minet/cli/fetch/fetch.py @@ -152,14 +152,14 @@ def get_title(cli_args): @with_enricher_and_loading_bar( headers=get_headers, - enricher_type="threadsafe", + enricher_type="indexed", index_column="original_index", title=get_title, unit="urls", stats_sort_key=loading_bar_stats_sort_key, ) @with_ctrl_c_warning -def action(cli_args, enricher: casanova.ThreadSafeEnricher, loading_bar: LoadingBar): +def action(cli_args, enricher: casanova.IndexedEnricher, loading_bar: LoadingBar): # Resolving or fetching? resolve = cli_args.action == "resolve" diff --git a/minet/cli/utils.py b/minet/cli/utils.py index 66ac7e9fa6..960a6ea5fe 100644 --- a/minet/cli/utils.py +++ b/minet/cli/utils.py @@ -12,7 +12,7 @@ import yaml import platform import casanova -from casanova.namedrecord import is_tabular_record_class +from casanova import is_tabular_record_class from dataclasses import dataclass from glob import iglob from copy import copy @@ -386,7 +386,7 @@ def wrapper(cli_args, *args, **kwargs): and cli_args.resume and isinstance( cli_args.output, - (casanova.RowCountResumer, casanova.ThreadSafeResumer), + (casanova.RowCountResumer, casanova.IndexedResumer), ) and cli_args.output.can_resume() ): @@ -407,8 +407,8 @@ def listener(event, _): enricher_fn = casanova.enricher - if enricher_type == "threadsafe": - enricher_fn = casanova.threadsafe_enricher + if enricher_type == "indexed": + enricher_fn = casanova.indexed_enricher elif enricher_type == "batch": enricher_fn = casanova.batch_enricher diff --git a/requirements.txt b/requirements.txt index 3b2612a5df..11b726b264 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,7 @@ wheel about-time==4.2.1 beautifulsoup4==4.11.1 browser-cookie3==0.19.1 -casanova==1.16.1 +casanova==2.0.0 charset-normalizer==3.1.0 dateparser==1.1.6 ebbe==1.13.2 diff --git a/setup.py b/setup.py index b166d35a5a..406f2ee5ed 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ "about-time>=4,<5", "beautifulsoup4>=4.7.1,<5", "browser-cookie3==0.19.1", - "casanova>=1.16.1,<1.17", + "casanova>=2.0.0,<3", "charset-normalizer>=3,<4", "dateparser>=1.1.1", "ebbe>=1.13,<2",