Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(load): update error message in case of dataset not found locally and missing api keys #1589

Merged
merged 5 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,17 @@ def load(dataset_path: str) -> DataFrame:
raise ValueError("The path must be in the format 'organization/dataset'.")

dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path)
if not os.path.exists(dataset_full_path):

local_dataset_exists = os.path.exists(dataset_full_path)

if not local_dataset_exists:
api_key = os.environ.get("PANDABI_API_KEY", None)
api_url = os.environ.get("PANDABI_API_URL", DEFAULT_API_URL)

if not api_url or not api_key:
gventuri marked this conversation as resolved.
Show resolved Hide resolved
raise PandaAIApiKeyError()
raise PandaAIApiKeyError(
f'The dataset "{dataset_path}" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.'
)

request_session = get_pandaai_session()

Expand All @@ -232,7 +238,16 @@ def load(dataset_path: str) -> DataFrame:
zip_file.extractall(dataset_full_path)

loader = DatasetLoader.create_loader_from_path(dataset_path)
return loader.load()
df = loader.load()

message = (
"Dataset loaded successfully."
if local_dataset_exists
else "Dataset fetched successfully from the remote server."
)
print(message)
gventuri marked this conversation as resolved.
Show resolved Hide resolved

return df


def read_csv(filepath: str) -> DataFrame:
Expand Down
25 changes: 22 additions & 3 deletions tests/unit_tests/test_pandasai_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,20 +137,39 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file)
with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {})
@patch("pandasai.get_pandaai_session")
def test_load_missing_not_found_locally_and_no_remote_key(
self, mock_session, mock_exists
):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
mock_response = MagicMock()
mock_response.status_code = 404
mock_session.return_value.get.return_value = mock_response
dataset_path = "org/dataset_name"

with pytest.raises(
PandaAIApiKeyError,
match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.',
):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"})
def test_load_missing_api_url(self, mock_exists):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
dataset_path = "org/dataset_name"

with pytest.raises(PandaAIApiKeyError):
with pytest.raises(DatasetNotFound):
pandasai.load(dataset_path)

@patch("pandasai.os.path.exists")
@patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"})
@patch("pandasai.get_pandaai_session")
def test_load_missing_api_url(self, mock_session, mock_exists):
def test_load_missing_not_found(self, mock_session, mock_exists):
"""Test loading when API URL is missing."""
mock_exists.return_value = False
mock_response = MagicMock()
Expand Down Expand Up @@ -202,7 +221,7 @@ def test_load_without_api_credentials(
pandasai.load("test/dataset")
assert (
str(exc_info.value)
== "PandaAI API key not found. Please set your API key using PandaAI.set_api_key() or by setting the PANDASAI_API_KEY environment variable."
== 'The dataset "test/dataset" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.'
)

def test_clear_cache(self):
Expand Down
Loading