Skip to content

Commit

Permalink
Upgrading casanova to v2
Browse files Browse the repository at this point in the history
  • Loading branch information
Yomguithereal committed Dec 15, 2023
1 parent e410d80 commit 8b80fef
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 18 deletions.
4 changes: 2 additions & 2 deletions minet/cli/extract/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from casanova import ThreadSafeResumer
from casanova import IndexedResumer

from minet.cli.argparse import command

Expand Down Expand Up @@ -91,7 +91,7 @@ def resolve_arguments(cli_args):
""",
resolve=resolve_arguments,
variadic_input={"dummy_column": "path", "optional": True, "no_help": True},
resumer=ThreadSafeResumer,
resumer=IndexedResumer,
arguments=[
{
"flags": ["-g", "--glob"],
Expand Down
6 changes: 3 additions & 3 deletions minet/cli/extract/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from os.path import isdir
from dataclasses import dataclass
from itertools import count
from casanova import ThreadSafeEnricher
from casanova import IndexedEnricher
from threading import Lock

from minet.exceptions import TrafilaturaError
Expand Down Expand Up @@ -72,13 +72,13 @@ def worker(payload: ExtractWorkerPayload) -> ExtractResult:

@with_enricher_and_loading_bar(
headers=HEADERS,
enricher_type="threadsafe",
enricher_type="indexed",
index_column="extract_original_index",
title="Extracting text",
unit="docs",
total_from_enricher=False,
)
def action(cli_args, enricher: ThreadSafeEnricher, loading_bar):
def action(cli_args, enricher: IndexedEnricher, loading_bar):
if cli_args.input_dir is not None and not isdir(cli_args.input_dir):
raise FatalError(
'Could not find the [cyan]-I/--input-dir "{}"[/cyan] directory!'.format(
Expand Down
10 changes: 5 additions & 5 deletions minet/cli/fetch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from casanova import ThreadSafeResumer
from casanova import IndexedResumer

from minet.cli.argparse import command, BooleanAction, FolderStrategyType
from minet.cli.argparse import command, FolderStrategyType
from minet.cli.constants import DEFAULT_CONTENT_FOLDER, DEFAULT_SCREENSHOT_FOLDER
from minet.cli.exceptions import InvalidArgumentsError

Expand Down Expand Up @@ -161,7 +161,7 @@ def resolve_fetch_arguments(cli_args):
$ minet fetch url -i file.csv -O html > report.csv
""",
resolve=resolve_fetch_arguments,
resumer=ThreadSafeResumer,
resumer=IndexedResumer,
variadic_input={"dummy_column": "url"},
arguments=[
*FETCH_RESOLVE_COMMON_ARGUMENTS,
Expand Down Expand Up @@ -260,7 +260,7 @@ def resolve_fetch_arguments(cli_args):
. Resolving a single url:
$ minet resolve https://lemonde.fr
""",
resumer=ThreadSafeResumer,
resumer=IndexedResumer,
variadic_input={"dummy_column": "url"},
arguments=[
*FETCH_RESOLVE_COMMON_ARGUMENTS,
Expand Down Expand Up @@ -332,7 +332,7 @@ def resolve_fetch_arguments(cli_args):
. Screenshot a single url:
$ minet screenshot https://lemonde.fr
""",
resumer=ThreadSafeResumer,
resumer=IndexedResumer,
variadic_input={"dummy_column": "url"},
arguments=[
*COMMON_ARGUMENTS,
Expand Down
4 changes: 2 additions & 2 deletions minet/cli/fetch/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,14 @@ def get_title(cli_args):

@with_enricher_and_loading_bar(
headers=get_headers,
enricher_type="threadsafe",
enricher_type="indexed",
index_column="original_index",
title=get_title,
unit="urls",
stats_sort_key=loading_bar_stats_sort_key,
)
@with_ctrl_c_warning
def action(cli_args, enricher: casanova.ThreadSafeEnricher, loading_bar: LoadingBar):
def action(cli_args, enricher: casanova.IndexedEnricher, loading_bar: LoadingBar):
# Resolving or fetching?
resolve = cli_args.action == "resolve"

Expand Down
8 changes: 4 additions & 4 deletions minet/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import yaml
import platform
import casanova
from casanova.namedrecord import is_tabular_record_class
from casanova import is_tabular_record_class
from dataclasses import dataclass
from glob import iglob
from copy import copy
Expand Down Expand Up @@ -386,7 +386,7 @@ def wrapper(cli_args, *args, **kwargs):
and cli_args.resume
and isinstance(
cli_args.output,
(casanova.RowCountResumer, casanova.ThreadSafeResumer),
(casanova.RowCountResumer, casanova.IndexedResumer),
)
and cli_args.output.can_resume()
):
Expand All @@ -407,8 +407,8 @@ def listener(event, _):

enricher_fn = casanova.enricher

if enricher_type == "threadsafe":
enricher_fn = casanova.threadsafe_enricher
if enricher_type == "indexed":
enricher_fn = casanova.indexed_enricher

elif enricher_type == "batch":
enricher_fn = casanova.batch_enricher
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ wheel
about-time==4.2.1
beautifulsoup4==4.11.1
browser-cookie3==0.19.1
casanova==1.16.1
casanova==2.0.0
charset-normalizer==3.1.0
dateparser==1.1.6
ebbe==1.13.2
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"about-time>=4,<5",
"beautifulsoup4>=4.7.1,<5",
"browser-cookie3==0.19.1",
"casanova>=1.16.1,<1.17",
"casanova>=2.0.0,<3",
"charset-normalizer>=3,<4",
"dateparser>=1.1.1",
"ebbe>=1.13,<2",
Expand Down

0 comments on commit 8b80fef

Please sign in to comment.