Skip to content

Commit 109d625

Browse files
BarabazsGjjvdBurg
andauthored
feat: add support for forcing source type (#153)
Co-authored-by: Gertjan van den Burg <[email protected]>
1 parent 3dfa2ba commit 109d625

File tree

2 files changed

+65
-14
lines changed

2 files changed

+65
-14
lines changed

paper2remarkable/ui.py

+38-14
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,15 @@ def build_argument_parser():
127127
help="path to config file (default: ~/.paper2remarkable.yml)",
128128
default=None,
129129
)
130+
parser.add_argument(
131+
"--source",
132+
choices=["url", "file"],
133+
help=(
134+
"Force the source type (url or file) in case of detection failure."
135+
" This is useful when the input is ambiguous, but be aware that "
136+
"this does not guarantee successful processing."
137+
),
138+
)
130139
parser.add_argument(
131140
"input",
132141
help="One or more URLs to a paper or paths to local PDF files",
@@ -153,18 +162,28 @@ def exception(msg):
153162
raise SystemExit(1)
154163

155164

156-
def choose_provider(cli_input):
165+
def choose_provider(cli_input, source_type=None):
157166
"""Choose the provider to use for the given source
158167
159-
This function first tries to check if the input is a local file, by
160-
checking if the path exists. Next, it checks if the input is a "valid" url
161-
using the validators library. If it is, the registered provider classes are
162-
checked to see which provider can handle this url.
168+
This function determines the appropriate provider based on the input and the
169+
optional source_type parameter. If source_type is specified, it overrides
170+
the automatic detection. Otherwise, it first tries to check if the input is
171+
a local file by checking if the path exists. Next, it checks if the input is
172+
a "valid" url using a regex test. If it is, the registered provider classes
173+
are checked to see which provider can handle this url.
174+
175+
Parameters
176+
----------
177+
cli_input : str
178+
The input provided by the user, either a file path or a URL.
179+
source_type : str, optional
180+
The type of the source, either "file" or "url". If provided, it overrides
181+
the automatic detection.
163182
164183
Returns
165184
-------
166185
provider : class
167-
The class of the provider than can handle the source. A subclass of the
186+
The class of the provider that can handle the source. A subclass of the
168187
Provider abc.
169188
170189
new_input : str
@@ -178,19 +197,22 @@ def choose_provider(cli_input):
178197
Raises
179198
------
180199
UnidentifiedSourceError
181-
Raised when the input is neither an existing local file nor a valid url
200+
Raised when the input is neither an existing local file nor a valid url,
201+
and no valid source_type is provided.
182202
183203
InvalidURLError
184-
Raised when the input *is* a valid url, but no provider can handle it.
185-
204+
Raised when the input *is* a valid url (or source_type is "url"), but no
205+
provider can handle it.
186206
"""
187207
provider = cookiejar = None
188-
if LocalFile.validate(cli_input):
189-
# input is a local file
208+
if source_type == "file" or (
209+
source_type is None and LocalFile.validate(cli_input)
210+
):
211+
# input is a local file or user specified source type is file
190212
new_input = cli_input
191213
provider = LocalFile
192-
elif is_url(cli_input):
193-
# input is a url
214+
elif source_type == "url" or (source_type is None and is_url(cli_input)):
215+
# input is a url or user specified source type is url
194216
new_input, cookiejar = follow_redirects(cli_input)
195217
provider = next((p for p in providers if p.validate(new_input)), None)
196218
else:
@@ -292,7 +314,9 @@ def runner(inputs, filenames, options, debug=False):
292314
if not len(inputs) == len(filenames):
293315
raise ValueError("Number of inputs and filenames must be the same")
294316
for cli_input, filename in zip(inputs, filenames):
295-
provider, new_input, cookiejar = choose_provider(cli_input)
317+
provider, new_input, cookiejar = choose_provider(
318+
cli_input, options["core"].get("source")
319+
)
296320
prov = provider(
297321
verbose=options["core"]["verbose"],
298322
upload=options["core"]["upload"],

tests/test_ui.py

+27
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,33 @@ def test_choose_provider_4(self):
232232
with self.assertRaises(InvalidURLError):
233233
choose_provider(url)
234234

235+
def test_choose_provider_with_source(self):
236+
with self.subTest("Test with local file and file source type"):
237+
local_file = "/tmp/test.pdf"
238+
open(local_file, "w").close()
239+
provider, new_input, _ = choose_provider(
240+
local_file, source_type="file"
241+
)
242+
self.assertEqual(provider, LocalFile)
243+
self.assertEqual(new_input, local_file)
244+
os.remove(local_file)
245+
246+
with self.subTest("Test with local file and URL source type"):
247+
# Test with URL source
248+
url = "https://arxiv.org/abs/1234.56789"
249+
provider, new_input, _ = choose_provider(url, source_type="url")
250+
self.assertEqual(provider, Arxiv)
251+
self.assertTrue(new_input.startswith("https://arxiv.org/"))
252+
253+
with self.subTest("Test with URL and file source type"):
254+
# Test with incorrect source type for file
255+
provider, new_input, _ = choose_provider(url, source_type="file")
256+
self.assertEqual(provider, LocalFile)
257+
self.assertEqual(new_input, url)
258+
259+
# Note: we can't test incorrect source type for URL because it will
260+
# raise an exception when determining the correct provider
261+
235262
def test_merge_options_1(self):
236263
config = None
237264
source = "/tmp/local.pdf" # doesn't need to exist

0 commit comments

Comments
 (0)