Skip to content

Commit

Permalink
migrate api paths
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-gee committed Oct 29, 2023
1 parent db0e7f0 commit 3c6024e
Showing 1 changed file with 27 additions and 27 deletions.
54 changes: 27 additions & 27 deletions src/webtranspose/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ async def create_crawl_api(self):
}
out_json = run_webt_api(
create_json,
"v1/crawl/create-dev",
"v1/crawl/create",
self.api_key,
)
self.crawl_id = out_json["crawl_id"]
Expand Down Expand Up @@ -261,7 +261,7 @@ async def crawl(self):
}
run_webt_api(
crawl_json,
"v1/crawl/resume-dev",
"v1/crawl/resume",
self.api_key,
)
return self
Expand Down Expand Up @@ -296,7 +296,7 @@ def get_queue(self, n=10):
}
out_json = run_webt_api(
queue_json,
"v1/crawl/get-queue-dev",
"v1/crawl/get-queue",
self.api_key,
)
return out_json["urls"]
Expand All @@ -311,8 +311,8 @@ def set_allowed_urls(self, allowed_urls):
Returns:
self: The Crawl object.
"""
self.allowed_urls = allowed_urls
if not self.created:
self.allowed_urls = allowed_urls
self.to_metadata()
else:
update_json = {
Expand All @@ -321,32 +321,32 @@ def set_allowed_urls(self, allowed_urls):
}
run_webt_api(
update_json,
"v1/crawl/set-allowed-dev",
"v1/crawl/set-allowed",
self.api_key,
)
return self

def set_ignored_urls(self, ignored_urls):
def set_banned_urls(self, banned_urls):
"""
Set the ignored URLs for the crawl.
Set the banned URLs for the crawl.
Args:
ignored_urls (list): A list of ignored URLs.
banned_urls (list): A list of ignored URLs.
Returns:
self: The Crawl object.
"""
self.banned_urls = banned_urls
if not self.created:
self.ignored_urls = ignored_urls
self.to_metadata()
else:
update_json = {
"crawl_id": self.crawl_id,
"ignored_urls": ignored_urls,
"banned_urls": banned_urls,
}
run_webt_api(
update_json,
"v1/crawl/set-ignored-dev",
"v1/crawl/set-banned",
self.api_key,
)
return self
Expand Down Expand Up @@ -389,7 +389,7 @@ def set_max_pages(self, max_pages):
}
run_webt_api(
max_pages_json,
"v1/crawl/set-max-pages-dev",
"v1/crawl/set-max-pages",
self.api_key,
)
return self
Expand Down Expand Up @@ -421,7 +421,7 @@ def status(self):
}
crawl_status = run_webt_api(
status_json,
"v1/crawl/get-dev",
"v1/crawl/get",
self.api_key,
)
crawl_status["loc"] = "cloud"
Expand All @@ -442,27 +442,27 @@ def get_visited(self):
}
out_json = run_webt_api(
visited_json,
"v1/crawl/get-dev/visited",
"v1/crawl/get/visited",
self.api_key,
)
return out_json["pages"]

def get_ignored(self):
def get_banned(self):
"""
Get a list of ignored URLs.
Get a list of banned URLs.
Returns:
list: A list of ignored URLs.
list: A list of banned URLs.
"""
if not self.created:
return list(self.ignored_urls)
return list(self.banned_urls)

ignored_json = {
banned_json = {
"crawl_id": self.crawl_id,
}
out_json = run_webt_api(
ignored_json,
"v1/crawl/get-dev/ignored",
banned_json,
"v1/crawl/get/banned",
self.api_key,
)
return out_json["pages"]
Expand All @@ -477,7 +477,7 @@ def download(self):
}
out_json = run_webt_api(
download_json,
"v1/crawl/download-dev",
"v1/crawl/download",
self.api_key,
)
presigned_url = out_json["url"]
Expand Down Expand Up @@ -579,7 +579,7 @@ def from_cloud(crawl_id, api_key=None):
get_json = {
"crawl_id": crawl_id,
}
out_json = run_webt_api(get_json, "v1/crawl/get-dev", api_key)
out_json = run_webt_api(get_json, "v1/crawl/get", api_key)
crawl = Crawl(
out_json["base_url"],
out_json["allowed_urls"],
Expand Down Expand Up @@ -621,7 +621,7 @@ def status(self):
}
crawl_status = run_webt_api(
status_json,
"v1/crawl/get-dev",
"v1/crawl/get",
self.api_key,
)
return crawl_status
Expand Down Expand Up @@ -695,7 +695,7 @@ def get_page(self, url):
}
out_json = run_webt_api(
get_json,
"v1/crawl/get-page-dev",
"v1/crawl/get-page",
self.api_key,
)
return out_json
Expand Down Expand Up @@ -729,7 +729,7 @@ def get_child_urls(self, url):
}
out_json = run_webt_api(
get_json,
"v1/crawl/get-child-urls-dev",
"v1/crawl/get-child-urls",
self.api_key,
)
return out_json
Expand Down Expand Up @@ -768,7 +768,7 @@ def list_crawls(loc="cloud", api_key=None):
if api_key is not None and loc == "cloud":
crawl_list_data = run_webt_api(
{},
"v1/crawl/list-dev",
"v1/crawl/list",
api_key,
)
return crawl_list_data["crawls"]
Expand Down

0 comments on commit 3c6024e

Please sign in to comment.