Skip to content

Commit 439efa2

Browse files
Lint
1 parent bec7ff0 commit 439efa2

File tree

3 files changed

+16
-47
lines changed

3 files changed

+16
-47
lines changed

connectors/source.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,12 @@ def is_file_size_within_limit(self, file_size, filename):
724724
return True
725725

726726
async def download_and_extract_file(
727-
self, doc, source_filename, file_extension, download_func, return_doc_if_failed=False
727+
self,
728+
doc,
729+
source_filename,
730+
file_extension,
731+
download_func,
732+
return_doc_if_failed=False,
728733
):
729734
"""
730735
Performs all the steps required for handling binary content:

connectors/sources/salesforce.py

+7-45
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,18 @@
44
# you may not use this file except in compliance with the Elastic License 2.0.
55
#
66
"""Salesforce source module responsible to fetch documents from Salesforce."""
7-
import asyncio
87
import os
98
from functools import cached_property, partial
109
from itertools import groupby
1110

12-
import aiofiles
1311
import aiohttp
14-
from aiofiles.os import remove
15-
from aiofiles.tempfile import NamedTemporaryFile
1612
from aiohttp.client_exceptions import ClientResponseError
1713

1814
from connectors.logger import logger
1915
from connectors.source import BaseDataSource
2016
from connectors.utils import (
2117
TIKA_SUPPORTED_FILETYPES,
2218
CancellableSleeps,
23-
convert_to_b64,
2419
retryable,
2520
)
2621

@@ -293,15 +288,6 @@ async def get_case_feeds(self, case_ids):
293288

294289
return all_case_feeds
295290

296-
# async def download_content_documents(self, content_documents):
297-
# for content_document in content_documents:
298-
# content_version = content_document.get("LatestPublishedVersion", {}) or {}
299-
# download_url = content_version.get("VersionDataUrl")
300-
# if download_url:
301-
# content_document["_attachment"] = await self._download(download_url)
302-
#
303-
# yield content_document
304-
305291
async def queryable_sobjects(self):
306292
"""Cached async property"""
307293
if self._queryable_sobjects is not None:
@@ -425,34 +411,6 @@ async def _execute_non_paginated_query(self, soql_query):
425411
)
426412
return response.get("records")
427413

428-
async def _download(self, download_url):
429-
attachment = None
430-
source_file_name = ""
431-
432-
try:
433-
async with NamedTemporaryFile(mode="wb", delete=False) as async_buffer:
434-
resp = await self._get(download_url)
435-
async for data in resp.content.iter_chunked(CHUNK_SIZE):
436-
await async_buffer.write(data)
437-
source_file_name = async_buffer.name
438-
439-
await asyncio.to_thread(
440-
convert_to_b64,
441-
source=source_file_name,
442-
)
443-
444-
async with aiofiles.open(file=source_file_name, mode="r") as target_file:
445-
attachment = (await target_file.read()).strip()
446-
except Exception as e:
447-
self._logger.error(
448-
f"Exception encountered when processing file: {source_file_name}. Exception: {e}"
449-
)
450-
finally:
451-
if source_file_name:
452-
await remove(str(source_file_name))
453-
454-
return attachment
455-
456414
async def _auth_headers(self):
457415
token = await self.api_token.token()
458416
return {"authorization": f"Bearer {token}"}
@@ -1432,9 +1390,13 @@ async def get_docs(self, filtering=None):
14321390
# Note: this could possibly be done on the fly if memory becomes an issue
14331391
content_docs = self._combine_duplicate_content_docs(content_docs)
14341392
for content_doc in content_docs:
1435-
download_url = (content_doc.get("LatestPublishedVersion", {}) or {}).get("VersionDataUrl")
1393+
download_url = (content_doc.get("LatestPublishedVersion", {}) or {}).get(
1394+
"VersionDataUrl"
1395+
)
14361396
if not download_url:
1437-
self._logger.debug(f"No download URL found for {content_doc.get('title')}, skipping.")
1397+
self._logger.debug(
1398+
f"No download URL found for {content_doc.get('title')}, skipping."
1399+
)
14381400
continue
14391401

14401402
doc = self.doc_mapper.map_content_document(content_doc)
@@ -1460,7 +1422,7 @@ async def get_content(self, doc, download_url):
14601422
download_url,
14611423
),
14621424
),
1463-
return_doc_if_failed=True, # we still ingest on download failure for Salesforce
1425+
return_doc_if_failed=True, # we still ingest on download failure for Salesforce
14641426
)
14651427

14661428
def _parse_content_documents(self, record):

tests/sources/test_salesforce.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,9 @@
422422

423423

424424
@asynccontextmanager
425-
async def create_salesforce_source(use_text_extraction_service=False, mock_token=True, mock_queryables=True):
425+
async def create_salesforce_source(
426+
use_text_extraction_service=False, mock_token=True, mock_queryables=True
427+
):
426428
async with create_source(
427429
SalesforceDataSource,
428430
domain=TEST_DOMAIN,

0 commit comments

Comments
 (0)