Lint

navarone-feekery · navarone-feekery · commit 439efa2f5401 · 2023-09-28T13:23:51.000+02:00
diff --git a/connectors/source.py b/connectors/source.py
@@ -724,7 +724,12 @@ def is_file_size_within_limit(self, file_size, filename):
         return True
 
     async def download_and_extract_file(
-        self, doc, source_filename, file_extension, download_func, return_doc_if_failed=False
+        self,
+        doc,
+        source_filename,
+        file_extension,
+        download_func,
+        return_doc_if_failed=False,
     ):
         """
         Performs all the steps required for handling binary content:
diff --git a/connectors/sources/salesforce.py b/connectors/sources/salesforce.py
@@ -4,23 +4,18 @@
 # you may not use this file except in compliance with the Elastic License 2.0.
 #
 """Salesforce source module responsible to fetch documents from Salesforce."""
-import asyncio
 import os
 from functools import cached_property, partial
 from itertools import groupby
 
-import aiofiles
 import aiohttp
-from aiofiles.os import remove
-from aiofiles.tempfile import NamedTemporaryFile
 from aiohttp.client_exceptions import ClientResponseError
 
 from connectors.logger import logger
 from connectors.source import BaseDataSource
 from connectors.utils import (
     TIKA_SUPPORTED_FILETYPES,
     CancellableSleeps,
-    convert_to_b64,
     retryable,
 )
 
@@ -293,15 +288,6 @@ async def get_case_feeds(self, case_ids):
 
         return all_case_feeds
 
-    # async def download_content_documents(self, content_documents):
-    #     for content_document in content_documents:
-    #         content_version = content_document.get("LatestPublishedVersion", {}) or {}
-    #         download_url = content_version.get("VersionDataUrl")
-    #         if download_url:
-    #             content_document["_attachment"] = await self._download(download_url)
-    #
-    #         yield content_document
-
     async def queryable_sobjects(self):
         """Cached async property"""
         if self._queryable_sobjects is not None:
@@ -425,34 +411,6 @@ async def _execute_non_paginated_query(self, soql_query):
         )
         return response.get("records")
 
-    async def _download(self, download_url):
-        attachment = None
-        source_file_name = ""
-
-        try:
-            async with NamedTemporaryFile(mode="wb", delete=False) as async_buffer:
-                resp = await self._get(download_url)
-                async for data in resp.content.iter_chunked(CHUNK_SIZE):
-                    await async_buffer.write(data)
-                source_file_name = async_buffer.name
-
-            await asyncio.to_thread(
-                convert_to_b64,
-                source=source_file_name,
-            )
-
-            async with aiofiles.open(file=source_file_name, mode="r") as target_file:
-                attachment = (await target_file.read()).strip()
-        except Exception as e:
-            self._logger.error(
-                f"Exception encountered when processing file: {source_file_name}. Exception: {e}"
-            )
-        finally:
-            if source_file_name:
-                await remove(str(source_file_name))
-
-        return attachment
-
     async def _auth_headers(self):
         token = await self.api_token.token()
         return {"authorization": f"Bearer {token}"}
@@ -1432,9 +1390,13 @@ async def get_docs(self, filtering=None):
         # Note: this could possibly be done on the fly if memory becomes an issue
         content_docs = self._combine_duplicate_content_docs(content_docs)
         for content_doc in content_docs:
-            download_url = (content_doc.get("LatestPublishedVersion", {}) or {}).get("VersionDataUrl")
+            download_url = (content_doc.get("LatestPublishedVersion", {}) or {}).get(
+                "VersionDataUrl"
+            )
             if not download_url:
-                self._logger.debug(f"No download URL found for {content_doc.get('title')}, skipping.")
+                self._logger.debug(
+                    f"No download URL found for {content_doc.get('title')}, skipping."
+                )
                 continue
 
             doc = self.doc_mapper.map_content_document(content_doc)
@@ -1460,7 +1422,7 @@ async def get_content(self, doc, download_url):
                     download_url,
                 ),
             ),
-            return_doc_if_failed=True, # we still ingest on download failure for Salesforce
+            return_doc_if_failed=True,  # we still ingest on download failure for Salesforce
         )
 
     def _parse_content_documents(self, record):
diff --git a/tests/sources/test_salesforce.py b/tests/sources/test_salesforce.py
@@ -422,7 +422,9 @@
 
 
 @asynccontextmanager
-async def create_salesforce_source(use_text_extraction_service=False, mock_token=True, mock_queryables=True):
+async def create_salesforce_source(
+    use_text_extraction_service=False, mock_token=True, mock_queryables=True
+):
     async with create_source(
         SalesforceDataSource,
         domain=TEST_DOMAIN,