From b6cb1a615285e0ee2da37511f2f424fd9129f42e Mon Sep 17 00:00:00 2001 From: Galen Reich <54807169+GalenReich@users.noreply.github.com> Date: Tue, 2 Jul 2024 12:22:32 +0100 Subject: [PATCH] Support exact search with quotes in the notebook (#33) * Remove sys.exit from cli.py * Convert notebook to handle exceptions rather than sys exit * Bump version --- edgar_tool/cli.py | 31 +++++++-------- notebook/Bellingcat_EDGAR_Tool.ipynb | 57 ++++++++++++++++------------ pyproject.toml | 2 +- 3 files changed, 48 insertions(+), 42 deletions(-) diff --git a/edgar_tool/cli.py b/edgar_tool/cli.py index d4be8f6..06b8ae5 100644 --- a/edgar_tool/cli.py +++ b/edgar_tool/cli.py @@ -132,23 +132,20 @@ def text_search( destination=output, ) scraper = EdgarTextSearcher() - try: - scraper.text_search( - keywords=keywords, - entity_id=entity_id, - filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form), - single_forms=single_forms, - start_date=start_date, - end_date=end_date, - min_wait_seconds=min_wait, - max_wait_seconds=max_wait, - retries=retries, - destination=output, - peo_in=peo_in, - inc_in=inc_in - ) - except NoResultsFoundError as e: - sys.exit(2) + scraper.text_search( + keywords=keywords, + entity_id=entity_id, + filing_form=TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING.get(filing_form), + single_forms=single_forms, + start_date=start_date, + end_date=end_date, + min_wait_seconds=min_wait, + max_wait_seconds=max_wait, + retries=retries, + destination=output, + peo_in=peo_in, + inc_in=inc_in + ) @staticmethod def rss( diff --git a/notebook/Bellingcat_EDGAR_Tool.ipynb b/notebook/Bellingcat_EDGAR_Tool.ipynb index eb8f57f..09766ec 100644 --- a/notebook/Bellingcat_EDGAR_Tool.ipynb +++ b/notebook/Bellingcat_EDGAR_Tool.ipynb @@ -62,6 +62,7 @@ "from ipywidgets import widgets\n", "from IPython.display import display\n", "from IPython import get_ipython\n", + "from contextlib import redirect_stdout\n", "\n", "data_table.enable_dataframe_formatter()\n", "\n", @@ -74,33 +75,41 @@ "\n", "# Install the EDGAR search tool on the first run\n", "![ ! -f \"edgar_tool_installed\" ] && echo -n \"Loading the EDGAR Tool on first search...\" && pip install edgar-tool >> {logfile} 2>&1 && pip install pandas==1.5.3 >> {logfile} 2>&1 && touch edgar_tool_installed && echo \"Loaded.\"\n", + "from edgar_tool.cli import SecEdgarScraperCli as edgar_tool\n", + "from edgar_tool.page_fetcher import NoResultsFoundError\n", "\n", "# Run the tool with the query\n", - "!echo -n \"Searching EDGAR...\"\n", - "!edgar-tool text_search {search_keywords} --start_date {start_date} --end_date {end_date} --filing_form {filing_type} --entity_id {company_cik} --output {output} --browser firefox --min-wait 0.5 --max-wait 1.5 {loc_filter} >> {logfile} 2>&1\n", - "exit_code = get_ipython().__dict__['user_ns']['_exit_code']\n", - "!echo \"Done.\"\n", - "\n", - "# Error handling\n", - "if exit_code == 2:\n", + "print(\"Searching EDGAR...\")\n", + "try:\n", + " with open(logfile, 'a') as f:\n", + " with redirect_stdout(f):\n", + " edgar_tool.text_search(\n", + " search_keywords,\n", + " start_date=start_date, \n", + " end_date=end_date,\n", + " filing_form=filing_type,\n", + " entity_id=company_cik,\n", + " output=output,\n", + " peo_in=location if filter_by_location==\"Principal executive offices in\" else None,\n", + " inc_in=location if filter_by_location==\"Incorporated in\" else None,\n", + " )\n", + " print(\"Done.\")\n", + " # Load results\n", + " results = pd.read_csv(output)\n", + "\n", + " # Show download button\n", + " btn = widgets.Button(description='Download Results')\n", + " btn.on_click(lambda x: files.download(output))\n", + " display(btn)\n", + "\n", + " # Display the results in a data table\n", + " display(results)\n", + "except NoResultsFoundError:\n", " print(\"\\x1b[33m No results were found for your query.\\x1b[0m\")\n", - "elif exit_code != 0:\n", - " print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\")\n", - "else:\n", - " try:\n", - " # Load results\n", - " results = pd.read_csv(output)\n", - "\n", - " # Show download button\n", - " btn = widgets.Button(description='Download Results')\n", - " btn.on_click(lambda x: files.download(output))\n", - " display(btn)\n", - "\n", - " # Display the results in a data table\n", - " display(results)\n", - " except FileNotFoundError as e:\n", - " print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at contact-tech@bellingcat.com and help us improve the tool for everyone. \\x1b[0m\")\n", - "\n" + "except FileNotFoundError as e:\n", + " print(\"\\x1b[31m Something went wrong with the EDGAR tool, please get in touch at contact-tech@bellingcat.com and help us improve the tool for everyone. \\x1b[0m\")\n", + "except Exception as e:\n", + " print(\"\\x1b[31m Something went wrong with the EDGAR tool, check your search and try again.\\x1b[0m\") " ] } ], diff --git a/pyproject.toml b/pyproject.toml index e17b80f..a973f33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "edgar-tool" -version = "1.3.1" +version = "1.3.2" description = "Search and retrieve corporate and financial data from the United States Securities and Exchange Commission (SEC)." authors = ["Bellingcat"] license = "GNU General Public License v3 (GPLv3)"