diff --git a/src/test.ipynb b/src/test.ipynb deleted file mode 100644 index f7d1434..0000000 --- a/src/test.ipynb +++ /dev/null @@ -1,1327 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "42e53efd", - "metadata": {}, - "outputs": [], - "source": [ - "import webtranspose as webt\n", - "import asyncio" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b411d07d", - "metadata": {}, - "outputs": [], - "source": [ - "crawl = webt.Crawl(\n", - " \"https://maxhodak.com/\",\n", - " allowed_urls=[\n", - " \"https://maxhodak.com/nonfiction/*\",\n", - " \"https://maxhodak.com/\",\n", - " \"https://maxhodak.com/writings/\",\n", - " ],\n", - " banned_urls=[\"https://maxhodak.com/*\"],\n", - " n_workers=3,\n", - " max_pages=6,\n", - " api_key=\"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "5ce95bbe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawl_id': None,\n", - " 'n_workers': 3,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'max_pages': 6,\n", - " 'num_visited': 0,\n", - " 'num_ignored': 0,\n", - " 'num_queued': 1,\n", - " 'banned_urls': ['https://maxhodak.com/*'],\n", - " 'allowed_urls': ['https://maxhodak.com/nonfiction/*',\n", - " 'https://maxhodak.com/',\n", - " 'https://maxhodak.com/writings/']}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.status()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "24682598", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "WebTransposeCrawl(\n", - "Crawl ID: a858329c-f141-41b1-a219-bb3377c81bb6\n", - "Number of Workers: cloud\n", - "Base URL: https://maxhodak.com/\n", - "Max Pages: 6\n", - "Number of Visited URLs: 0\n", - "Number of Ignored URLs: 0\n", - "Number of Queued URLs: 0\n", - "Banned URLs: ['https://maxhodak.com/*']\n", - "Allowed URLs: ['https://maxhodak.com/nonfiction/*', 'https://maxhodak.com/', 'https://maxhodak.com/writings/'])" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "await crawl.crawl()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "68952763", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawl_id': 'a858329c-f141-41b1-a219-bb3377c81bb6',\n", - " 'created_at': '2023-10-24T01:51:40.757Z',\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'max_pages': 6,\n", - " 'render_js': False,\n", - " 'num_visited': 1,\n", - " 'num_ignored': 4,\n", - " 'num_queued': 0,\n", - " 'banned_urls': ['https://maxhodak.com/*'],\n", - " 'allowed_urls': ['https://maxhodak.com/nonfiction/*',\n", - " 'https://maxhodak.com/',\n", - " 'https://maxhodak.com/writings/']}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.status()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f4551210", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://science.xyz',\n", - " 'http://www.github.com/maxhodak',\n", - " 'https://www.neuralink.com',\n", - " 'https://www.transcriptic.com']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.get_ignored()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "ae815f30", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://maxhodak.com/',\n", - " 'https://maxhodak.com/photography/',\n", - " 'https://maxhodak.com/writings/']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.get_visited()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "96054145", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c6a2f7c2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "07a635d6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['https://science.xyz',\n", - " 'http://www.github.com/maxhodak',\n", - " 'https://www.neuralink.com',\n", - " 'https://www.transcriptic.com',\n", - " 'https://www.youtube.com/watch',\n", - " 'https://sciencemastodon.com/@maxh',\n", - " 'https://youtu.be/r-vbh3t7WVI',\n", - " 'https://strateos.com/',\n", - " 'http://boss.blogs.nytimes.com/2009/11/18/tune-in-start-up-drop-out',\n", - " \"http://www.theverge.com/2013/12/18/5216738/inside-transcriptic-the-secret-robot-lab-that's-shaking-up-science\",\n", - " 'https://web.archive.org/web/20120503041911/http://www.sandbox-network.com/meet-a-sandboxer/meet-a-sandboxer-max-hodak/',\n", - " 'http://www.whitehouse.gov/blog/2013/06/06/collaborating-transform-scientific-research']" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.get_ignored()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3d458745", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "WebTransposeCrawl(\n", - "Crawl ID: a858329c-f141-41b1-a219-bb3377c81bb6\n", - "Number of Workers: cloud\n", - "Base URL: https://maxhodak.com/\n", - "Max Pages: 10\n", - "Number of Visited URLs: 8\n", - "Number of Ignored URLs: 12\n", - "Number of Queued URLs: 11\n", - "Banned URLs: ['https://maxhodak.com/*']\n", - "Allowed URLs: ['https://maxhodak.com/nonfiction/*', 'https://maxhodak.com/', 'https://maxhodak.com/writings/'])" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.set_max_pages(10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17b906c8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf51c619", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d0975924", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'webtranspose-out/maxhodak.com/https%3A%2F%2Fmaxhodak.com%2Fnonfiction%2F2023%2F01%2F17%2Funcertainty.json'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.get_filename(\"https://maxhodak.com/nonfiction/2023/01/17/uncertainty\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "72b61b10", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "WebTransposeCrawl(\n", - "Crawl ID: a858329c-f141-41b1-a219-bb3377c81bb6\n", - "Number of Workers: cloud\n", - "Base URL: https://maxhodak.com/\n", - "Max Pages: 10\n", - "Number of Visited URLs: 11\n", - "Number of Ignored URLs: 25\n", - "Number of Queued URLs: 48\n", - "Banned URLs: ['https://maxhodak.com/*']\n", - "Allowed URLs: [])" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "crawl.set_allowed_urls([])" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "4ce0ca7b", - "metadata": {}, - "outputs": [], - "source": [ - "crawl.download()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "faf2671e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "6686eea7", - "metadata": {}, - "source": [ - "# Create" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "6ae9eb67", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {\n", - " \"url\": \"https://www.maxhodak.com/\",\n", - " \"render_js\": False,\n", - " \"max_pages\": 5,\n", - "}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/create-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5376ae36", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawl_id': '67e01d72-4802-4dcf-b29f-9172049affa3'}" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6fdf80ca", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "ca1911ca", - "metadata": {}, - "source": [ - "# resume" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "18d58557", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {\n", - " \"crawl_id\": \"67e01d72-4802-4dcf-b29f-9172049affa3\",\n", - "}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/resume-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "079fc2d5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawl_id': '67e01d72-4802-4dcf-b29f-9172049affa3', 'resumed': True}" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19d24e69", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03bb92a0", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "a6b2b619", - "metadata": {}, - "source": [ - "# List" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3186266f", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/list-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6b21eddf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawls': [{'id': 'a02d27de-e57e-4b70-911d-e469d677400f',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/media/99812/download',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-01T20:56:23.017Z'},\n", - " {'id': 'b61bb0a0-59f1-4c41-a08f-6cbe293b5903',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:07:08.989Z'},\n", - " {'id': '566331e9-ab8d-424b-ba25-3e7b0ecf2425',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/black-white-2',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-19T23:46:11.184Z'},\n", - " {'id': '2d658a0f-5a1a-4846-be30-3959ef6cc91d',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-1',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 405,\n", - " 'created_at': '2023-09-18T19:16:37.831Z'},\n", - " {'id': '56b3a128-4868-4a70-b655-5fcdb5e49c43',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.outreachsage.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:32:27.340Z'},\n", - " {'id': 'e1a06cb1-33ad-4399-b235-c39c8230288e',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-1',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-18T20:13:14.256Z'},\n", - " {'id': '59fb9c02-260c-488b-afc7-8997a20d7126',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/media/99812/download',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-01T20:53:35.231Z'},\n", - " {'id': '43100414-4c0f-4f06-bb19-9f78cc4061a5',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/arrokuda',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-09T20:54:20.905Z'},\n", - " {'id': '436952a5-9582-478a-ab17-0e51058dbda3',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-03T01:23:50.704Z'},\n", - " {'id': '7c703e9c-ddac-49ca-98db-8a466289da0c',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:03:23.417Z'},\n", - " {'id': '76d90093-8ef1-4995-8f37-11592e2204db',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:15:37.180Z'},\n", - " {'id': '1a107de5-207c-42e3-850a-bcc6ea61cfb6',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-06T17:04:51.746Z'},\n", - " {'id': '1292c870-052b-43f3-a7af-eb878dc124bd',\n", - " 'force_js': True,\n", - " 'base_url': 'https://pokemondb.net/pokedex',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-14T21:45:23.997Z'},\n", - " {'id': '6aa01d79-aa9c-4108-a992-a00ebc28409b',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.mgee.me/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-09T18:54:55.001Z'},\n", - " {'id': 'd6117fa2-ff08-4819-adcc-a27f448519af',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T19:23:37.467Z'},\n", - " {'id': '56197ea7-a7d1-4f24-b5e2-2096179131cf',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.chewy.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-14T21:59:36.342Z'},\n", - " {'id': '6f39714f-bc3a-4104-8d5e-97629b86b439',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.corusent.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 11,\n", - " 'created_at': '2023-09-21T02:41:34.797Z'},\n", - " {'id': 'ffae3d1e-17bb-4bd9-a7b8-2cfd717a5354',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-3',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-18T20:20:49.280Z'},\n", - " {'id': 'c36007b7-d1f6-4588-ab0a-d58f2f39315e',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-03T01:18:45.374Z'},\n", - " {'id': '5a20d946-6616-443d-8758-dad8c0521288',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T17:42:50.900Z'},\n", - " {'id': 'bb0265e0-f100-4d1c-aed5-3b8159f80238',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-12T23:31:34.311Z'},\n", - " {'id': 'fb271e4b-558d-4903-9f40-14eeffe44f82',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 30,\n", - " 'created_at': '2023-09-17T03:10:47.224Z'},\n", - " {'id': 'd9a05b0e-4fe2-4c3f-bf6d-72f1954f66b9',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.corusent.com/media-centre/press-releases',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-21T03:54:31.166Z'},\n", - " {'id': 'efe179f2-304c-4a03-b302-da06a849da29',\n", - " 'force_js': False,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 25,\n", - " 'created_at': '2023-10-13T18:13:08.361Z'},\n", - " {'id': '153f5e25-642b-4f7b-bf0b-2d0c3e12e197',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/writings',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:09:08.804Z'},\n", - " {'id': 'ed969070-b6a9-45bf-8876-638b3306d81e',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-3',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-18T20:32:12.722Z'},\n", - " {'id': '4f501f87-96e3-4d71-8978-6b7470415225',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:28:33.192Z'},\n", - " {'id': 'fc2034a5-dc98-4da1-b8eb-27bb676a6071',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-06T17:29:53.658Z'},\n", - " {'id': 'b4a055a3-e4c2-43cb-897b-c84b0fc56c22',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.sec.gov/rules/2022/05/private-fund-advisers-documentation-registered-investment-adviser-compliance-reviews#IA-6383',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T02:43:27.896Z'},\n", - " {'id': '619bfa77-529f-45ab-99d2-c16b5c0f6249',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/media/99812/download',\n", - " 'status': 'loading',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-01T20:52:09.610Z'},\n", - " {'id': 'b90acb08-61fc-4f73-ba7e-33ad49ce469f',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:01:23.080Z'},\n", - " {'id': 'b0d01f79-7194-4d9b-bff9-7f167d3bc340',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:02:02.130Z'},\n", - " {'id': '4e2e7062-9d6d-4e13-88e0-11f34042ddd4',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-08-31T16:14:25.166Z'},\n", - " {'id': '32b9cb87-8c17-4904-bf09-0a9dc7d0da70',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.mikegee.dev/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-12T23:38:58.638Z'},\n", - " {'id': 'ce4e97c6-a4ef-4bd1-a12f-38526eba977c',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:34:04.744Z'},\n", - " {'id': '503c08c2-bf14-4ba6-944b-86b41f370ab0',\n", - " 'force_js': True,\n", - " 'base_url': 'https://mikegee.dev',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T14:26:05.585Z'},\n", - " {'id': 'e74f033e-0c4b-4767-b1bf-499151f2aeed',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-06T17:14:59.852Z'},\n", - " {'id': '6b6b952e-0237-4481-a772-c8ea78ce6a1b',\n", - " 'force_js': True,\n", - " 'base_url': 'http://fda.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:20:20.448Z'},\n", - " {'id': 'b80775d2-f84e-4667-9f61-038492165a19',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-3',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-19T23:40:49.969Z'},\n", - " {'id': 'd5d8c910-af37-40f2-9f82-1d4be2fbbea8',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-3',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 20,\n", - " 'created_at': '2023-09-18T20:35:50.061Z'},\n", - " {'id': '18885139-87d4-4128-89f7-40699371d58b',\n", - " 'force_js': True,\n", - " 'base_url': 'https://news.ycombinator.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-14T21:45:00.372Z'},\n", - " {'id': '0271c9bb-be8e-408c-9d84-1343d3f268b8',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:37:56.614Z'},\n", - " {'id': '1d397d88-b5dc-431b-be9d-92a50ac65f52',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-2',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-18T20:20:46.386Z'},\n", - " {'id': '74120a6e-33ca-4520-93d2-a64997717167',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 9,\n", - " 'created_at': '2023-09-04T03:20:53.898Z'},\n", - " {'id': '67c88ae5-4e4b-48dd-b2a1-c946fe8037b7',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/game/scarlet-violet',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 21,\n", - " 'created_at': '2023-09-09T20:35:05.231Z'},\n", - " {'id': '1c99ab37-8758-4b2f-a7b4-df632aefbef1',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 7,\n", - " 'created_at': '2023-09-15T07:13:58.989Z'},\n", - " {'id': 'ba32515c-e1a2-4200-8aa7-8adfadafece7',\n", - " 'force_js': False,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 20,\n", - " 'created_at': '2023-09-10T00:23:57.656Z'},\n", - " {'id': 'df0fd9dc-929e-4985-94d8-74d2840e44ca',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 74,\n", - " 'created_at': '2023-09-01T16:42:40.432Z'},\n", - " {'id': '2e6ee76f-79a5-4fa3-a886-7feea5af92dd',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb23423423.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:15:45.048Z'},\n", - " {'id': '9a64b0c3-076e-42db-a5c2-09537a5e59b3',\n", - " 'force_js': True,\n", - " 'base_url': 'https://news.mit.edu/2023/mit-pillar-ai-collective-first-seed-grant-recipients-0622',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-05T15:40:02.354Z'},\n", - " {'id': '83f4174d-1bcb-4f33-b4c0-2e638aaab256',\n", - " 'force_js': True,\n", - " 'base_url': 'https://mikegee.dev',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 7,\n", - " 'created_at': '2023-09-05T16:51:17.766Z'},\n", - " {'id': 'd039288b-0aec-4825-a071-fb9acff4ad85',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 14,\n", - " 'created_at': '2023-09-03T00:35:05.565Z'},\n", - " {'id': 'f7ac61ed-75ae-416c-ab75-94bb563bf03c',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.chewy.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 30,\n", - " 'created_at': '2023-10-10T00:02:00.041Z'},\n", - " {'id': '668903ac-7c99-4e7b-ad67-418158831129',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T18:15:33.319Z'},\n", - " {'id': '855bcf24-cbc7-4892-8315-f4298e631fc8',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'invalid_credits',\n", - " 'total_pages_requested': 4,\n", - " 'created_at': '2023-09-03T00:25:00.815Z'},\n", - " {'id': '1a660687-51c6-4946-a8cd-269c9cfec17d',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/media/99812/download',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-01T20:54:23.692Z'},\n", - " {'id': '6ccacac9-eb79-4b8a-9b15-ee737ad18dd6',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'loading',\n", - " 'total_pages_requested': 10,\n", - " 'created_at': '2023-09-03T01:48:57.008Z'},\n", - " {'id': 'a0bf63d6-4d8e-4e6c-8c60-974a9e7b0067',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-03T01:42:39.470Z'},\n", - " {'id': '77ca979d-65ec-4a70-ba8b-ace582bc4f08',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.nih.gov/grants-funding',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 30,\n", - " 'created_at': '2023-09-01T17:10:57.100Z'},\n", - " {'id': '969d6735-3334-433f-8fc3-c242b4789df2',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 10,\n", - " 'created_at': '2023-10-22T18:15:06.006Z'},\n", - " {'id': '30da1efe-e5c1-4502-9c59-389945f7327b',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/writings',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:22:43.284Z'},\n", - " {'id': 'a5dffb62-6421-4618-ae3a-064432a54b30',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'loading',\n", - " 'total_pages_requested': 10,\n", - " 'created_at': '2023-09-04T03:11:49.674Z'},\n", - " {'id': 'fcc314a5-9349-4231-affb-12cc5ea2c120',\n", - " 'force_js': True,\n", - " 'base_url': 'https://news.mit.edu/2023/mit-pillar-ai-collective-first-seed-grant-recipients-0622',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-05T01:40:45.945Z'},\n", - " {'id': 'a6c81720-c6fe-48e6-af0e-e562d0c293fe',\n", - " 'force_js': False,\n", - " 'base_url': 'https://mikegee.dev',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-06T17:53:56.533Z'},\n", - " {'id': '05a4e055-2fa6-4497-9f31-fc0948e72f10',\n", - " 'force_js': False,\n", - " 'base_url': 'http://paulgraham.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-13T18:07:53.102Z'},\n", - " {'id': '0a5a2cdd-81ff-4ac3-9333-fedd5cf734f6',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T15:59:58.194Z'},\n", - " {'id': 'cdaf86b8-4d6f-41c6-8dac-636012e129ba',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 20,\n", - " 'created_at': '2023-09-01T03:42:39.884Z'},\n", - " {'id': '53cce0d8-a92c-4d4b-aae6-eea083ff7091',\n", - " 'force_js': True,\n", - " 'base_url': 'http://fda.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:24:15.411Z'},\n", - " {'id': 'c0c71dfb-a1c2-45bd-af58-781d3088a4ad',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/writings',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T19:52:24.575Z'},\n", - " {'id': '9a79522d-9f2d-4f01-b474-649fad04e163',\n", - " 'force_js': False,\n", - " 'base_url': 'https://news.ycombinator.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-13T18:10:48.677Z'},\n", - " {'id': '7b45a975-e215-4db7-9ac9-c19aee853681',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/game/scarlet-violet',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 12,\n", - " 'created_at': '2023-09-06T18:39:56.444Z'},\n", - " {'id': 'bd572bef-1161-4bcb-bb95-0acdb8a0e2cc',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-03T01:38:29.269Z'},\n", - " {'id': '56bdf5e5-ad9c-4652-b3c1-fba3bb9d41ec',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-1',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-18T19:19:29.414Z'},\n", - " {'id': '2a4d9994-658c-4b36-be4c-d98c8f76ef0d',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:31:14.301Z'},\n", - " {'id': '2acdec6a-f252-443b-8044-7bfd6c8baf11',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.corusent.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 47,\n", - " 'created_at': '2023-09-21T02:15:48.714Z'},\n", - " {'id': 'd4e4860c-064a-4eb5-ba8e-2f9cb9406d58',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.sec.gov/news/pressreleases',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 16,\n", - " 'created_at': '2023-09-06T12:57:15.352Z'},\n", - " {'id': '48b6bb1b-cf85-4611-9dbc-d9e9ce3eb19f',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/game/scarlet-violet',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 12,\n", - " 'created_at': '2023-09-06T18:52:36.867Z'},\n", - " {'id': '25a74460-6fc5-41fb-9155-29ff52e7cccb',\n", - " 'force_js': False,\n", - " 'base_url': 'https://darrelllea.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 2,\n", - " 'created_at': '2023-09-18T20:36:12.415Z'},\n", - " {'id': 'bfd76506-6d9e-4110-951e-e8c4e98f1e83',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T02:38:29.603Z'},\n", - " {'id': '9e8d9142-4ca7-4ba0-9e63-fa8b9bf71610',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex/national#gen-1',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-09-18T20:19:55.635Z'},\n", - " {'id': 'daa946e8-102a-430e-ad52-e0c7387b34c3',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/writings',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:01:19.660Z'},\n", - " {'id': 'e32db457-9dee-4fbe-85d5-d4faf05267d8',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.cs.cmu.edu/~crary/317-f18/recitations/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-10T21:11:02.474Z'},\n", - " {'id': '0a2089e7-2e68-42a1-96ba-a09c13a219f8',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.mikegee.dev/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-12T23:40:29.081Z'},\n", - " {'id': '19cede6c-0ac7-461a-9aa2-b21cea03b942',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-03T01:31:10.578Z'},\n", - " {'id': 'ac67ffc6-bd91-43f3-aa40-0626b0a5c91f',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 10,\n", - " 'created_at': '2023-08-31T16:11:31.876Z'},\n", - " {'id': 'ab62bdef-885b-4d54-aa60-8dde9cfe9eea',\n", - " 'force_js': True,\n", - " 'base_url': 'https://blog.samaltman.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 10,\n", - " 'created_at': '2023-08-31T17:05:29.165Z'},\n", - " {'id': '197eadd7-0d26-4fa4-837d-601f1befae0a',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/writings',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:16:28.063Z'},\n", - " {'id': '4b679cc0-601f-4cc6-8350-79c0db5e9bc8',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.sec.gov/files/rules/final/2023/ia-6383.pdf',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-06T02:44:08.483Z'},\n", - " {'id': '3fe859c3-9c09-4fe2-93bd-bdd5a752a4be',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 39,\n", - " 'created_at': '2023-09-04T04:24:00.411Z'},\n", - " {'id': '5cff7a2c-ff4b-4dc7-b8c4-e2b7df868063',\n", - " 'force_js': True,\n", - " 'base_url': 'https://www.cs.cmu.edu/~crary/317-f18/recitations/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-10T21:14:32.400Z'},\n", - " {'id': '7c6e3f95-6342-4fff-9137-9c85add9237a',\n", - " 'force_js': True,\n", - " 'base_url': 'http://fda.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-08-31T20:36:49.725Z'},\n", - " {'id': '76a81b6a-a08f-460a-8320-570d57fce8c2',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.pokemondb.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T17:47:22.636Z'},\n", - " {'id': '5feabe27-0599-42c5-a336-c00c8b3437fe',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.soompi.com/latest',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 3,\n", - " 'created_at': '2023-09-06T17:53:05.915Z'},\n", - " {'id': '2d5b7982-2e84-4965-8a6d-b1e53db02dc6',\n", - " 'force_js': False,\n", - " 'base_url': 'https://pokemondb.net/pokedex',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-10-22T17:45:28.912Z'},\n", - " {'id': 'b5fb1d4d-de61-461c-b17b-a6b57294b6db',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.fda.gov/medical-devices/device-approvals-denials-and-clearances/510k-clearances',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 15,\n", - " 'created_at': '2023-10-12T23:29:26.414Z'},\n", - " {'id': '779f1d46-f3ad-4f81-b53d-89be4d4d315a',\n", - " 'force_js': False,\n", - " 'base_url': 'https://www.mgee.me/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 1,\n", - " 'created_at': '2023-09-09T18:46:54.195Z'},\n", - " {'id': '833d194b-1fe2-4beb-8958-8170e3ae9782',\n", - " 'force_js': True,\n", - " 'base_url': 'http://fda.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 5,\n", - " 'created_at': '2023-09-01T03:28:05.603Z'},\n", - " {'id': '1328e0c2-100f-439b-a80a-d7e241206b79',\n", - " 'force_js': True,\n", - " 'base_url': 'https://grants.nih.gov/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 30,\n", - " 'created_at': '2023-09-01T17:11:59.511Z'},\n", - " {'id': 'aba60db0-5f72-4a2d-a726-d27b593514c9',\n", - " 'force_js': True,\n", - " 'base_url': 'https://maxhodak.com/',\n", - " 'status': 'paused',\n", - " 'total_pages_requested': 4,\n", - " 'created_at': '2023-09-03T00:30:03.147Z'},\n", - " {'id': '71f8c055-3f9e-4d53-8d8a-1bb43b1e73b1',\n", - " 'force_js': False,\n", - " 'base_url': 'https://darrelllea.com/',\n", - " 'status': 'complete',\n", - " 'total_pages_requested': 2,\n", - " 'created_at': '2023-09-18T20:36:14.688Z'}]}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "85584fcb", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "4645169c", - "metadata": {}, - "source": [ - "# Get" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "a9329bf7", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {\"crawl_id\": \"67e01d72-4802-4dcf-b29f-9172049affa3\"}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/get-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "612a1e7a", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'crawl_id': '67e01d72-4802-4dcf-b29f-9172049affa3',\n", - " 'created_at': '2023-10-23T18:28:49.994Z',\n", - " 'base_url': 'https://www.maxhodak.com/',\n", - " 'max_pages': 5,\n", - " 'render_js': False,\n", - " 'num_visited': 6,\n", - " 'num_ignored': 13,\n", - " 'num_queued': 53,\n", - " 'banned_urls': [],\n", - " 'allowed_urls': []}" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "90e3679c", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "4084d00e", - "metadata": {}, - "source": [ - "# Get queue" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "808a755f", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {\"crawl_id\": \"67e01d72-4802-4dcf-b29f-9172049affa3\"}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/queue-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "aab49c44", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "637f6e12", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "68b9348c", - "metadata": {}, - "source": [ - "# Download" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "b80f2869", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "headers = {\n", - " \"X-API-Key\": \"8c5379cb-6f08-4662-97dc-7ff1d5d06479\",\n", - "}\n", - "body = {\"crawl_id\": \"2aacbea1-09c7-42b2-bad5-1e9814403078\"}\n", - "\n", - "api_endpoint = \"https://api.webtranspose.com/v1/crawl/download-dev\"\n", - "response = requests.post(api_endpoint, headers=headers, json=body)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "c3c00d33", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'url': 'https://scrape-cache-prod.s3.amazonaws.com/private/us-east-2%3Ae409c0a5-25fb-48df-b9bd-a4417b514443/downloads/2aacbea1-09c7-42b2-bad5-1e9814403078.zip?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAVDUMC4RO7Y4MCUJK%2F20231024%2Fus-east-2%2Fs3%2Faws4_request&X-Amz-Date=20231024T011840Z&X-Amz-Expires=300&X-Amz-SignedHeaders=host&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEBEaCXVzLWVhc3QtMiJHMEUCIAjahD18B9Cd52Vrmpcumf6unzbTFEJ09xh1ix%2BCFPRSAiEAwgVe5BDyUDwg%2Bl9qZKUW0vpASKSEyFV3iA9%2FjhbUkpMq7AIIOhAAGgwzNTE0MDczNjcyNjEiDAPQ6ZVy8FRtYYviHSrJApxMvGVT5EF8xpzjO2bXOAGkr1P8F%2BShDpdgnylxusAIAa1KwPtFjOYDVMlJqS1SEtMml2CeDGN9gGKoMHYiZT5ipfShlHtBCJWzFPR2gWMwTExsXwhfgf4FmclFErrw3eQdsCpgV8VolVHp1tM3gbe7q4ypMtyyPPsVtN38vowRKgBz1DcoY4SAhas8C0h4TDuDlzJLuBuN%2B3GliNOSU0uJUaNcoRC01G61G4YbIUVV%2Fe4SSjVyS1xphGjmRTeyi6xA%2BqJ6kf8yST409nezoAk6%2Bq7NYFCTX6Wl0a98M%2Fe7roIapWS3QnO3F32%2BD%2FhYC5EDz25izzpfNYIBY%2FdiE3uvxUnqZZ58BoyB5%2FjtqHsYsS%2BGe0miyLrZSZdGFMQk%2B%2BhapCZVP5Nw7tZziJzSoRLdZSioKN15TU9LLaH5s8qJm%2F5gEuxpw0v4MKO03KkGOp4BVkpmUvUL8kstEfs1xFc53%2FcFea1pojXZWA%2BbrqBffIMZaSxM1ex%2F4PIHbRDw7KE5v4Qd08Trth%2FFOSBrA86c2jmRL2TVzR4AQyu%2FCk8o3RUbX0Jg68afSFdCYA6PIYgXpY1djYgLybL%2FC%2BZZfq3%2FLPVuAHhP6WW4OWfbTRu9w99lhGFOONl%2BGOtXympYw00dEBk8XYYmrnn4aZ1TOOw%3D&X-Amz-Signature=35c2ab7268816f67101f63a94e6f29c55ba0f6803cc440539712ae8d48c587af'}" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "response.json()" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "efc93ca8", - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "module 'webtranspose' has no attribute 'list_crawls'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/var/folders/rh/0zrsw9xd3qnbggwbk10z77380000gn/T/ipykernel_95232/3685262029.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mwebt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlist_crawls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m: module 'webtranspose' has no attribute 'list_crawls'" - ] - } - ], - "source": [ - "webt.list_crawls()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "867d1214", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "webt", - "language": "python", - "name": "webt" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.5" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}