From d73caa31118cd88a9b2ac8e7bc37f41a8f7d2e8a Mon Sep 17 00:00:00 2001 From: Bumsoo Kim Date: Wed, 18 Sep 2024 00:25:14 +0900 Subject: [PATCH] feat: add longblack.co as a highlight source (#1) --- .gitignore | 3 + .pylintrc.toml | 2 + Makefile | 6 +- README.md | 4 +- pyproject.toml | 16 +- requirements-dev.lock | 4 + src/ridiwise/api/longblack.py | 212 ++++++++++++++++++++++++ src/ridiwise/cmd/common_option.py | 45 +---- src/ridiwise/cmd/context.py | 9 +- src/ridiwise/cmd/main.py | 2 +- src/ridiwise/cmd/sync/longblack.py | 154 +++++++++++++++++ src/ridiwise/cmd/sync/main.py | 101 ++--------- src/ridiwise/cmd/sync/ridibooks.py | 159 ++++++++++++++++++ src/ridiwise/tests/__init__.py | 0 src/ridiwise/tests/api/__init__.py | 0 src/ridiwise/tests/api/test_logblack.py | 71 ++++++++ 16 files changed, 646 insertions(+), 142 deletions(-) create mode 100644 src/ridiwise/api/longblack.py create mode 100644 src/ridiwise/cmd/sync/longblack.py create mode 100644 src/ridiwise/cmd/sync/ridibooks.py create mode 100644 src/ridiwise/tests/__init__.py create mode 100644 src/ridiwise/tests/api/__init__.py create mode 100644 src/ridiwise/tests/api/test_logblack.py diff --git a/.gitignore b/.gitignore index a02fc4a..0a5d05c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,9 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +junit.xml +pytest-coverage.txt + # pyenv .python-version diff --git a/.pylintrc.toml b/.pylintrc.toml index 8f22fa5..1ce563e 100644 --- a/.pylintrc.toml +++ b/.pylintrc.toml @@ -382,6 +382,8 @@ disable = [ "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", "missing-docstring", + "duplicate-code", + "logging-fstring-interpolation", ] # Enable the message, report, category or checker with the given id(s). You can diff --git a/Makefile b/Makefile index 6558d2b..81df5f0 100644 --- a/Makefile +++ b/Makefile @@ -30,12 +30,14 @@ test: --cov-report term-missing:skip-covered \ --cov-report html \ --cov-report xml \ + --junitxml=junit.xml \ -vvv \ - tests + --pyargs ridiwise \ + --cov src \ | tee pytest-coverage.txt clean: - rm -rf htmlcov pytest-coverage.txt + rm -rf .coverage htmlcov coverage.xml pytest-coverage.txt junit.xml ### Docker DOCKER_REPO ?= bskim45/ridiwise diff --git a/README.md b/README.md index e8dea8b..595a363 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ridiwise -Sync Ridibooks book notes to Readwise.io +Sync book/article highlights to Readwise.io ## Installation @@ -49,7 +49,7 @@ pipx install git+https://github.com/bskim45/ridiwise.git $ ridiwise --help Usage: ridiwise [OPTIONS] COMMAND [ARGS]... - ridiwise: Sync Ridibooks book notes to Readwise.io + ridiwise: Sync book/article highlights to Readwise.io (...) ``` diff --git a/pyproject.toml b/pyproject.toml index 2aa19ad..fb9e36b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,10 +5,10 @@ authors = [ { name = "Bumsoo Kim", email = "bskim45@gmail.com" } ] dependencies = [ - "typer>=0.12.3", - "httpx>=0.27.0", - "browser-cookie3>=0.19.1", - "playwright>=1.45.1", + "typer>=0.12.3", + "httpx>=0.27.0", + "browser-cookie3>=0.19.1", + "playwright>=1.45.1", ] readme = "README.md" license = { file = "LICENSE" } @@ -31,6 +31,7 @@ dev-dependencies = [ "ruff>=0.5.4", "pylint>=3.2.6", "bump-my-version>=0.24.3", + "pytest-cov>=5.0.0", ] @@ -70,3 +71,10 @@ allow_dirty = true [[tool.bumpversion.files]] filename = 'src/ridiwise/__init__.py' + + +[tool.coverage.run] +parallel = true +omit = [ + "*/tests/*", +] diff --git a/requirements-dev.lock b/requirements-dev.lock index 11bdb09..4390257 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -30,6 +30,8 @@ click==8.1.7 # via bump-my-version # via rich-click # via typer +coverage==7.6.1 + # via pytest-cov dill==0.3.8 # via pylint distlib==0.3.8 @@ -90,6 +92,8 @@ pygments==2.18.0 # via rich pylint==3.2.6 pytest==8.3.1 + # via pytest-cov +pytest-cov==5.0.0 python-dotenv==1.0.1 # via pydantic-settings pyyaml==6.0.1 diff --git a/src/ridiwise/api/longblack.py b/src/ridiwise/api/longblack.py new file mode 100644 index 0000000..fcb73ea --- /dev/null +++ b/src/ridiwise/api/longblack.py @@ -0,0 +1,212 @@ +import datetime +import re +import urllib.parse +from typing import Optional, TypedDict +from zoneinfo import ZoneInfo + +from playwright.sync_api import ( + Locator, +) +from playwright.sync_api import ( + TimeoutError as PlaywrightTimeoutError, +) + +from ridiwise.api.browser_base_client import BrowserBaseClient + +DOMAIN = 'www.longblack.co' +COOKIE_DOMAIN = f'https://{DOMAIN}' + +SELECTOR_LOGIN_USER_ID = 'form.login-form input[name="email"]' +SELECTOR_LOGIN_PASSWORD = 'form.login-form input[name="password"]' +SELECTOR_LOGIN_BUTTON = 'form.login-form button[type="submit"]' + + +class Note(TypedDict): + """ + Article + """ + + note_id: str + note_url: str + title: str + author: Optional[str] + cover_image_url: Optional[str] + + +class Scrap(TypedDict): + """ + Highlight + """ + + scrap_id: str + scrap_url: str + highlighted_text: str + memo: Optional[str] + created_datetime: datetime.datetime + note: Note + + +class LongblackClient(BrowserBaseClient): + base_url = f'https://{DOMAIN}' + provider = 'longblack' + storage_state_filename = f'browser_state_{provider}.json' + + def __init__( + self, + user_id: str, + password: str, + *args, + **kwargs, + ): + self.user_id = user_id + self.password = password + + super().__init__(*args, **kwargs) + + @staticmethod + def parse_scrap_url(url) -> Optional[tuple[str, str]]: + """ + Extracts the book_id from a given URI. + """ + pattern = re.compile(r'/note/(\d+).*#memoId=([A-Za-z0-9]+)') + match = pattern.search(url) + if match: + note_id, scrap_id = match.groups() + return note_id, scrap_id + return None + + @staticmethod + def parse_scrap_date(datetime_string) -> Optional[datetime.datetime]: + """ + Parses a date string in the format 'YYYY.MM.DD HH:MM' + """ + datetime_format = '%Y.%m.%d %H:%M' + dt = datetime.datetime.strptime(datetime_string, datetime_format) + dt = dt.replace(tzinfo=ZoneInfo('Asia/Seoul')) + return dt + + @staticmethod + def get_author_from_scrap_title(title: str) -> Optional[str]: + """ + Extracts the author from the scrap title. + """ + if ':' not in title: + return None + + parts = title.split(':', 1) + author = parts[0].strip() + title = parts[1].strip() + + if not author or not title: + return None + + return author + + def login(self): + self.logger.info(f'Login: `{DOMAIN}`') + + with self.browser_context.new_page() as page: + page.goto(f'{self.base_url}/login?return_url=/membership') + page.wait_for_selector(SELECTOR_LOGIN_USER_ID) + + page.locator(SELECTOR_LOGIN_USER_ID).fill(self.user_id) + page.locator(SELECTOR_LOGIN_PASSWORD).fill(self.password) + + page.click(SELECTOR_LOGIN_BUTTON) + + try: + page.wait_for_url('**/membership') + self.cache_dir.mkdir(parents=True, exist_ok=True) + self.browser_context.storage_state( + path=self.cache_dir / self.storage_state_filename + ) + except PlaywrightTimeoutError as e: + self.logger.error('Login timeout') + raise e + + def is_authenticated(self) -> bool: + with self.browser_context.new_page() as page: + res = page.request.get(f'{self.base_url}/membership', max_redirects=0) + return res.ok + + def get_scraps(self) -> list[Scrap]: + if not self.is_authenticated(): + self.logger.info('Login required') + self.login() + + scraps = [] + + # get recent 20 pages only to avoid spamming the server + for page_num in range(1, 21): + query_params = urllib.parse.urlencode( + { + 'page': page_num, + 'view': 'note', + 'sort': 'latest', + 'search': '', + } + ) + + with self.browser_context.new_page() as page: + page.goto(f'{self.base_url}/scrap?{query_params}') + items = page.locator('.swiper-slide:has(div.scrap)').all() + + if not items: + break + + scraps.extend([self._parse_dom(item) for item in items]) + + return scraps + + def _parse_dom(self, elem: Locator) -> Scrap: + highlighted_text = elem.locator('.scrap-content').inner_text().strip() + date_str = elem.locator('.date').text_content().strip() + scrap_date = self.parse_scrap_date(date_str) + + note_info = elem.locator('a.note-info') + + scrap_url = note_info.get_attribute('href') + note_id, scrap_id = self.parse_scrap_url(scrap_url) + note_title = note_info.locator('span').text_content().strip() + note_cover_image_url = note_info.locator('img').get_attribute('src') + + memo = self._get_memo(elem) + + return { + 'scrap_id': scrap_id, + 'scrap_url': scrap_url, + 'highlighted_text': highlighted_text, + 'memo': memo, + 'created_datetime': scrap_date, + 'note': { + 'title': note_title, + 'note_url': f'{self.base_url}/note/{note_id}', + 'note_id': note_id, + 'author': self.get_author_from_scrap_title(note_title), + 'cover_image_url': note_cover_image_url, + }, + } + + @staticmethod + def _get_memo(elem: Locator) -> Optional[str]: + memo_button = elem.locator('.actions').locator('button.show-memo') + indicator = memo_button.locator('.memo-icon.dot') + + if not indicator.is_visible(): + return None + + memo_button.click() + memo_modals = elem.page.locator('.memo-modal') + memo_modal = memo_modals.locator('visible=true') + + if not memo_modal.is_visible(): + try: + memo_modal = memo_modals.last + memo_modal.wait_for(state='visible') + except PlaywrightTimeoutError: + return None + + memo = memo_modal.get_by_role('textbox').input_value() + memo_modal.locator('.actions').locator('button.negative').click() + + return memo diff --git a/src/ridiwise/cmd/common_option.py b/src/ridiwise/cmd/common_option.py index aa50871..5124934 100644 --- a/src/ridiwise/cmd/common_option.py +++ b/src/ridiwise/cmd/common_option.py @@ -1,60 +1,26 @@ -import enum -from typing import Optional +from collections import defaultdict import typer -from ridiwise.cmd.context import AuthState, ContextState - - -@enum.unique -class RidiAuthMethod(enum.StrEnum): - # BROWSER_COOKIE = 'browser_cookie' - HEADLESS_BROWSER = 'headless_browser' +from ridiwise.cmd.context import ContextState def check_common_options( ctx: typer.Context, - auth_method: RidiAuthMethod, - user_id: Optional[str], - password: Optional[str], headless_mode: bool, browser_timeout_seconds: int, ): context: ContextState = ctx.ensure_object(dict) - auth_state: AuthState = { - 'auth_method': auth_method, - } + if 'auths' not in context: + context['auths'] = defaultdict() - context['auth'] = auth_state context['headless_mode'] = headless_mode context['browser_timeout_seconds'] = browser_timeout_seconds - if auth_method == RidiAuthMethod.HEADLESS_BROWSER: - if not all([user_id, password]): - raise typer.BadParameter('`user_id` and `password` must be provided.') - - auth_state['user_id'] = user_id - auth_state['password'] = password - def common_params( ctx: typer.Context, - auth_method: RidiAuthMethod = typer.Option( - default=RidiAuthMethod.HEADLESS_BROWSER, - envvar='RIDI_AUTH_METHOD', - help='Authentication method to use with Ridibooks.', - ), - user_id: Optional[str] = typer.Option( - default=None, - envvar='RIDI_USER_ID', - help='Ridibooks user ID.', - ), - password: Optional[str] = typer.Option( - default=None, - envvar='RIDI_PASSWORD', - help='Ridibooks password.', - ), headless_mode: bool = typer.Option( True, envvar='HEADLESS_MODE', @@ -69,9 +35,6 @@ def common_params( ctx.ensure_object(dict) check_common_options( ctx=ctx, - auth_method=auth_method, - user_id=user_id, - password=password, headless_mode=headless_mode, browser_timeout_seconds=browser_timeout_seconds, ) diff --git a/src/ridiwise/cmd/context.py b/src/ridiwise/cmd/context.py index ca9f6e4..8e03704 100644 --- a/src/ridiwise/cmd/context.py +++ b/src/ridiwise/cmd/context.py @@ -1,8 +1,15 @@ +import enum import logging from pathlib import Path from typing import Optional, TypedDict +@enum.unique +class AuthMethod(enum.StrEnum): + # BROWSER_COOKIE = 'browser_cookie' + HEADLESS_BROWSER = 'headless_browser' + + class AuthState(TypedDict, total=False): auth_method: str user_id: Optional[str] @@ -11,7 +18,7 @@ class AuthState(TypedDict, total=False): class ContextState(TypedDict): logger: logging.Logger - auth: AuthState + auths: dict[str, AuthState] config_dir: Path cache_dir: Path diff --git a/src/ridiwise/cmd/main.py b/src/ridiwise/cmd/main.py index a6d9066..f8cae8f 100644 --- a/src/ridiwise/cmd/main.py +++ b/src/ridiwise/cmd/main.py @@ -53,7 +53,7 @@ def main( ] = '~/.cache/ridiwise', ): """ - ridiwise: Sync Ridibooks book notes to Readwise.io + ridiwise: Sync book/article highlights to Readwise.io """ ctx.ensure_object(dict) ctx.obj = { diff --git a/src/ridiwise/cmd/sync/longblack.py b/src/ridiwise/cmd/sync/longblack.py new file mode 100644 index 0000000..16bd915 --- /dev/null +++ b/src/ridiwise/cmd/sync/longblack.py @@ -0,0 +1,154 @@ +import itertools +from typing import Optional + +import typer +from typing_extensions import Annotated + +from ridiwise.api.longblack import LongblackClient +from ridiwise.api.readwise import ReadwiseClient +from ridiwise.cmd.common_option import common_params +from ridiwise.cmd.context import AuthMethod, AuthState, ContextState +from ridiwise.cmd.utils import with_extra_parameters + +PROVIDER = 'longblack' + +app = typer.Typer(name='longblack') + + +@app.callback() +def main(): + """ + Sync Longblack scraps(highlights) to another service. + """ + + +def check_longblack_common_options( + ctx: typer.Context, + auth_method: AuthMethod, + user_id: Optional[str], + password: Optional[str], +): + context: ContextState = ctx.ensure_object(dict) + + auth_state: AuthState = { + 'auth_method': auth_method, + } + + if auth_method == AuthMethod.HEADLESS_BROWSER: + if not all([user_id, password]): + raise typer.BadParameter('`user_id` and `password` must be provided.') + + auth_state['user_id'] = user_id + auth_state['password'] = password + + context['auths'][PROVIDER] = auth_state + + +def longblack_common_params( + ctx: typer.Context, + auth_method: AuthMethod = typer.Option( + default=AuthMethod.HEADLESS_BROWSER, + envvar='LONGBLACK_AUTH_METHOD', + help='Authentication method to use with Longblack.', + ), + user_id: Optional[str] = typer.Option( + default=None, + envvar='LONGBLACK_USER_ID', + help='Longblack user ID.', + ), + password: Optional[str] = typer.Option( + default=None, + envvar='LONGBLACK_PASSWORD', + help='Longblack password.', + ), +): + ctx.ensure_object(dict) + check_longblack_common_options( + ctx=ctx, + auth_method=auth_method, + user_id=user_id, + password=password, + ) + + +@app.command() +@with_extra_parameters(common_params) +@with_extra_parameters(longblack_common_params) +def readwise( + ctx: typer.Context, + readwise_token: Annotated[ + str, + typer.Option( + envvar='READWISE_TOKEN', + help='Readwise.io API token. https://readwise.io/access_token', + ), + ], + tags: Annotated[ + Optional[list[str]], + typer.Option( + help='Tags to attach to the highlights. Multiple tags can be provided.', + ), + ] = None, +): + """ + Sync Longblack scraps to Readwise.io. + """ + + context: ContextState = ctx.ensure_object(dict) + + with ( + LongblackClient( + user_id=context['auths'][PROVIDER]['user_id'], + password=context['auths'][PROVIDER]['password'], + cache_dir=context['cache_dir'], + headless=context['headless_mode'], + browser_timeout_seconds=context['browser_timeout_seconds'], + ) as longblack_client, + ReadwiseClient(token=readwise_token) as readwise_client, + ): + scraps = longblack_client.get_scraps() + + if not scraps: + raise typer.Abort('No scraps found.') + + result_count = { + 'articles': 0, + 'highlights': 0, + } + + highlights_response = readwise_client.create_highlights( + highlights=[ + { + 'text': scrap['highlighted_text'], + 'title': scrap['note']['title'], + 'source_type': PROVIDER, + 'category': 'articles', + 'author': scrap['note']['author'], + 'highlighted_at': scrap['created_datetime'].isoformat(), + 'note': scrap['memo'], + 'source_url': scrap['note']['note_url'], + 'highlight_url': scrap['scrap_url'], + 'image_url': scrap['note']['cover_image_url'], + } + for scrap in scraps + ] + ) + + modified_highlight_ids = itertools.chain.from_iterable( + article_result['modified_highlights'] + for article_result in highlights_response + ) + + if tags: + for highlight_id, tag in zip(modified_highlight_ids, tags): + readwise_client.create_highlight_tag( + highlight_id=highlight_id, + tag=tag, + ) + + result_count['articles'] = len({scrap['note']['note_id'] for scrap in scraps}) + result_count['highlights'] = len(scraps) + + print('Synced notes to Readwise.io:') + print('Articles: ', result_count['articles']) + print('Highlights: ', result_count['highlights']) diff --git a/src/ridiwise/cmd/sync/main.py b/src/ridiwise/cmd/sync/main.py index c53112e..3cddf95 100644 --- a/src/ridiwise/cmd/sync/main.py +++ b/src/ridiwise/cmd/sync/main.py @@ -1,13 +1,6 @@ -from typing import Optional - import typer -from typing_extensions import Annotated -from ridiwise.api.readwise import ReadwiseClient -from ridiwise.api.ridibooks import RidiClient -from ridiwise.cmd.common_option import common_params -from ridiwise.cmd.context import ContextState -from ridiwise.cmd.utils import with_extra_parameters +from ridiwise.cmd.sync import longblack, ridibooks app = typer.Typer() @@ -15,90 +8,16 @@ @app.callback() def main(): """ - Sync Ridibooks book notes to another service. - """ - - -@app.command() -@with_extra_parameters(common_params) -def readwise( - ctx: typer.Context, - readwise_token: Annotated[ - str, - typer.Option( - envvar='READWISE_TOKEN', - help='Readwise.io API token. https://readwise.io/access_token', - ), - ], - tags: Annotated[ - Optional[list[str]], - typer.Option( - help='Tags to attach to the highlights. Multiple tags can be provided.', - ), - ] = None, -): - """ - Sync Ridibooks book notes to Readwise.io. + Sync book/article highlights to another service. """ - context: ContextState = ctx.ensure_object(dict) - logger = context['logger'] - - with ( - RidiClient( - user_id=context['auth']['user_id'], - password=context['auth']['password'], - cache_dir=context['cache_dir'], - headless=context['headless_mode'], - browser_timeout_seconds=context['browser_timeout_seconds'], - ) as ridi_client, - ReadwiseClient(token=readwise_token) as readwise_client, - ): - books = ridi_client.get_books_from_shelf() - - result_count = { - 'books': 0, - 'highlights': 0, - } - - for book in books: - highlights_response = readwise_client.create_highlights( - highlights=[ - { - 'text': note['highlighted_text'], - 'title': book['book_title'], - 'source_type': 'ridibooks', - 'category': 'books', - 'author': ', '.join(book['authors']), - 'highlighted_at': note['created_date'].isoformat(), - 'note': note['memo'], - 'source_url': book['book_url'], - 'highlight_url': ( - f'{book["book_notes_url"]}#annotation_{note["id"]}' - ), - 'image_url': book['book_cover_image_url'], - } - for note in book['notes'] - ] - ) - - modified_highlight_ids = highlights_response[0]['modified_highlights'] - - if tags: - for highlight_id, tag in zip(modified_highlight_ids, tags): - readwise_client.create_highlight_tag( - highlight_id=highlight_id, - tag=tag, - ) - - result_count['books'] += 1 - result_count['highlights'] += len(book['notes']) - logger.info( - 'Created Readwise highlights: ' - f"`{book['book_title']}` / {len(book['notes'])}" - ) +app.add_typer( + ridibooks.app, + no_args_is_help=True, +) - print('Synced notes to Readwise.io:') - print('Books: ', result_count['books']) - print('Highlights: ', result_count['highlights']) +app.add_typer( + longblack.app, + no_args_is_help=True, +) diff --git a/src/ridiwise/cmd/sync/ridibooks.py b/src/ridiwise/cmd/sync/ridibooks.py new file mode 100644 index 0000000..dc88bf1 --- /dev/null +++ b/src/ridiwise/cmd/sync/ridibooks.py @@ -0,0 +1,159 @@ +from typing import Optional + +import typer +from typing_extensions import Annotated + +from ridiwise.api.readwise import ReadwiseClient +from ridiwise.api.ridibooks import RidiClient +from ridiwise.cmd.common_option import common_params +from ridiwise.cmd.context import AuthMethod, AuthState, ContextState +from ridiwise.cmd.utils import with_extra_parameters + +PROVIDER = 'ridibooks' + +app = typer.Typer(name=PROVIDER) + + +@app.callback() +def main(): + """ + Sync Ridibooks book notes to another service. + """ + + +def check_ridi_common_options( + ctx: typer.Context, + auth_method: AuthMethod, + user_id: Optional[str], + password: Optional[str], +): + context: ContextState = ctx.ensure_object(dict) + + auth_state: AuthState = { + 'auth_method': auth_method, + } + + if auth_method == AuthMethod.HEADLESS_BROWSER: + if not all([user_id, password]): + raise typer.BadParameter('`user_id` and `password` must be provided.') + + auth_state['user_id'] = user_id + auth_state['password'] = password + + context['auths'][PROVIDER] = auth_state + + +def ridi_common_params( + ctx: typer.Context, + auth_method: AuthMethod = typer.Option( + default=AuthMethod.HEADLESS_BROWSER, + envvar='RIDI_AUTH_METHOD', + help='Authentication method to use with Ridibooks.', + ), + user_id: Optional[str] = typer.Option( + default=None, + envvar='RIDI_USER_ID', + help='Ridibooks user ID.', + ), + password: Optional[str] = typer.Option( + default=None, + envvar='RIDI_PASSWORD', + help='Ridibooks password.', + ), +): + ctx.ensure_object(dict) + check_ridi_common_options( + ctx=ctx, + auth_method=auth_method, + user_id=user_id, + password=password, + ) + + +@app.command() +@with_extra_parameters(common_params) +@with_extra_parameters(ridi_common_params) +def readwise( + ctx: typer.Context, + readwise_token: Annotated[ + str, + typer.Option( + envvar='READWISE_TOKEN', + help='Readwise.io API token. https://readwise.io/access_token', + ), + ], + tags: Annotated[ + Optional[list[str]], + typer.Option( + help='Tags to attach to the highlights. Multiple tags can be provided.', + ), + ] = None, +): + """ + Sync Ridibooks book notes to Readwise.io. + """ + + context: ContextState = ctx.ensure_object(dict) + logger = context['logger'] + + with ( + RidiClient( + user_id=context['auths'][PROVIDER]['user_id'], + password=context['auths'][PROVIDER]['password'], + cache_dir=context['cache_dir'], + headless=context['headless_mode'], + browser_timeout_seconds=context['browser_timeout_seconds'], + ) as ridi_client, + ReadwiseClient(token=readwise_token) as readwise_client, + ): + books = ridi_client.get_books_from_shelf() + + if not books: + raise typer.Abort('No book notes found.') + + result_count = { + 'books': 0, + 'highlights': 0, + } + + for book in books: + highlights_response = readwise_client.create_highlights( + highlights=[ + { + 'text': note['highlighted_text'], + 'title': book['book_title'], + 'source_type': PROVIDER, + 'category': 'books', + 'author': ', '.join(book['authors']), + 'highlighted_at': note['created_date'].isoformat(), + 'note': note['memo'], + 'source_url': book['book_url'], + 'highlight_url': ( + f'{book["book_notes_url"]}#annotation_{note["id"]}' + ), + 'image_url': book['book_cover_image_url'], + } + for note in book['notes'] + ] + ) + + modified_highlight_ids = highlights_response[0]['modified_highlights'] + + if tags: + for highlight_id, tag in zip(modified_highlight_ids, tags): + readwise_client.create_highlight_tag( + highlight_id=highlight_id, + tag=tag, + ) + + result_count['books'] += 1 + result_count['highlights'] += len(book['notes']) + + logger.info( + 'Created Readwise highlights: ' + f"`{book['book_title']}` / {len(book['notes'])}" + ) + + print('Synced notes to Readwise.io:') + print('Books: ', result_count['books']) + print('Highlights: ', result_count['highlights']) diff --git a/src/ridiwise/tests/__init__.py b/src/ridiwise/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ridiwise/tests/api/__init__.py b/src/ridiwise/tests/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ridiwise/tests/api/test_logblack.py b/src/ridiwise/tests/api/test_logblack.py new file mode 100644 index 0000000..b2f5b24 --- /dev/null +++ b/src/ridiwise/tests/api/test_logblack.py @@ -0,0 +1,71 @@ +import datetime +import unittest +from zoneinfo import ZoneInfo + +from ridiwise.api.longblack import LongblackClient + + +class TestLongblackClient(unittest.TestCase): + def test_parse_scrap_url(self): + test_cases = [ + { + 'url': ( + 'https://www.longblack.co/note/12345#memoId=H172649477900012abc123' + ), + 'expected': ('12345', 'H172649477900012abc123'), + }, + { + 'url': 'https://www.longblack.co/note/invalid#memoId=invalid', + 'expected': None, + }, + {'url': 'https://www.longblack.co/invalid/url', 'expected': None}, + ] + + for case in test_cases: + with self.subTest(case=case): + result = LongblackClient.parse_scrap_url(case['url']) + self.assertEqual(result, case['expected']) + + def test_parse_scrap_date(self): + test_cases = [ + ( + '2023.10.01 12:00', + datetime.datetime(2023, 10, 1, 12, 0, tzinfo=ZoneInfo('Asia/Seoul')), + ), + ( + '2022.01.15 08:30', + datetime.datetime(2022, 1, 15, 8, 30, tzinfo=ZoneInfo('Asia/Seoul')), + ), + ( + '2021.12.31 23:59', + datetime.datetime(2021, 12, 31, 23, 59, tzinfo=ZoneInfo('Asia/Seoul')), + ), + ( + '2020.02.29 00:00', + datetime.datetime(2020, 2, 29, 0, 0, tzinfo=ZoneInfo('Asia/Seoul')), + ), + ] + + for case in test_cases: + with self.subTest(case=case): + result = LongblackClient.parse_scrap_date(case[0]) + self.assertEqual(result, case[1]) + + def test_parse_author_from_scrap_title(self): + test_cases = [ + {'title': 'Author Name: Title', 'expected': 'Author Name'}, + {'title': 'Author Name: Title: Subtitle', 'expected': 'Author Name'}, + {'title': 'Author Name : Title', 'expected': 'Author Name'}, + {'title': 'Author Name:Title', 'expected': 'Author Name'}, + {'title': 'Author Name :Title', 'expected': 'Author Name'}, + {'title': 'Title', 'expected': None}, + ] + + for case in test_cases: + with self.subTest(case=case): + result = LongblackClient.get_author_from_scrap_title(case['title']) + self.assertEqual(result, case['expected']) + + +if __name__ == '__main__': + unittest.main()