diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..b91f6a4 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,27 @@ +name: Tests + +on: + push: + branches: + - main + - dev +jobs: + unit-tests: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install the app + run: | + python -m pip install .[test] + - name: Testing + id: test + run: | + # Ignore the network marks from the remote test environment + python -m pytest --color=yes -m "not network" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d77e3b9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,43 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.venv +.env +*.egg-info +.aws +external +.idea/ + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# CDK asset staging directory +.cdk.staging +cdk.out +aws +awscliv2.zip +cdk.context.json + +# Individual development CDK apps +app*dev.py + +# Sphinx documentation +docs/build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c2c9e40 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,24 @@ +ci: + autofix_prs: false + autoupdate_schedule: 'quarterly' +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-added-large-files + - id: detect-aws-credentials + args: [ --allow-missing-credentials ] + - id: detect-private-key + - id: mixed-line-ending + - id: trailing-whitespace + - id: no-commit-to-branch + args: [--branch, main] + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: 'v0.0.276' + hooks: + - id: ruff + args: [--fix] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..65c4e61 --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# SDS-access-lib + +This is a simple python script that allows a user to upload, query, and download data from a a Science Data System as set up from: https://github.com/IMAP-Science-Operations-Center/sds-data-manager + +## Uploading Data Example + +``` +>>> import sds_api +>>> response = sds_api.upload(file_location='helloworld.txt', file_name='imap_l0_sci_mag_2024_2_ThisWillMakeThingsFail.pkts') +Could not generate an upload URL with the following error: "A pre-signed URL could not be generated. Please ensure that the file name matches mission file naming conventions." +>>> response = sds_api.upload(file_location='helloworld.txt', file_name='imap_l0_sci_mag_2024_2.pkts') +``` + +## Querying Data Example +``` +>>> results = sds_api.query(instrument='imap-lo') +>>> print(results) +[] +>>> results = sds_api.query(instrument='mag') +>>> print(results) +[{'_index': 'metadata', '_type': '_doc', '_source': {'date': '2024', 'mission': 'imap', 'extension': 'pkts', 'level': 'l0', 'instrument': 'mag', 'type': 'sci', 'version': '2'}, '_id': 's3://sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_2.pkts', '_score': 0.18232156}, {'_index': 'metadata', '_type': '_doc', '_source': {'date': '2024', 'mission': 'imap', 'extension': 'pkts', 'level': 'l0', 'instrument': 'mag', 'type': 'sci', 'version': ''}, '_id': 's3://sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_.pkts', '_score': 0.18232156}] +``` + +## Downloading Data Example +``` +>>> response = sds_api.download(results[0]['_id']) +Downloading sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_2.pkts +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f2ccff6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,62 @@ +[project] +name = "sds-access-lib" +version = "0.1.0" +description = "IMAP Science Operations Center AWS data acccess" +authors = [{name = "IMAP SDS Developers", email = "imap.sdc@lists.lasp.colorado.edu"}] +readme = "README.md" +license = {text = "MIT"} +keywords = ["IMAP", "SDC", "SOC", "SDS", "Science Operations"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: SDS Users", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Software Development", + "Topic :: Scientific/Engineering", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX", + "Operating System :: Unix", + "Operating System :: MacOS", +] +dependencies = [ + "requests", +] + +[project.urls] +homepage = "https://github.com/IMAP-Science-Operations-Center" +repository = "https://github.com/IMAP-Science-Operations-Center/sds-access-lib" + + +[project.optional-dependencies] +test = [ + "black==21.9b0", + "pre-commit==2.15.0", + "mypy==0.910", + "pytest==6.2.5", + "pytest-cov==3.0.0", + "requests-mock", +] + +[tool.pytest.ini_options] +testpaths = [ + "tests", +] +addopts = "-ra" +markers = [ + "network: Test that requires network access", +] +filterwarnings = [ + "ignore::DeprecationWarning:importlib*", + "ignore::DeprecationWarning:jsii*", +] + +[tool.ruff] +target-version = "py39" +select = ["B", "E", "F", "I", "N", "W", "PL", "PT", "UP", "RUF"] +# Ignore import sorting for now until lines_after_imports is respected +# by ruff and we can replace isort +ignore = ["PLW0603"] \ No newline at end of file diff --git a/sds_api.py b/sds_api.py new file mode 100644 index 0000000..1b95371 --- /dev/null +++ b/sds_api.py @@ -0,0 +1,190 @@ +import datetime +import json +import os + +import requests + +# CONFIGURATION + +# Enter in the URL of the API that we're testing. +API_URL = "" # ex - "https://api.prod.imap-mission.com" + +# When an authentication system is set up, these must be set to log in to the APIs +AWS_REGION = "" # ex - "us-west-2" +COGNITO_CLIENT_ID = "" # random string of letters assigned to the congito client + +# GLOBAL VARIABLES + +# These variables are set when a user is logged in. +USER_TOKEN = None +LOGIN_TIME = None + +# These variables are never changed +EXPIRE_TIME = 3600 +STATUS_OK = 200 +STATUS_NOT_FOUND = 404 +STATUS_BAD_REQUEST = 400 + + +def _set_user_token(t): + global LOGIN_TIME + global USER_TOKEN + + LOGIN_TIME = datetime.datetime.now() + USER_TOKEN = t + + +def _get_user_token(): + if LOGIN_TIME is None: + print("New login needed. Login is valid for 60 minutes.") + elif (datetime.datetime.now() - LOGIN_TIME).total_seconds() >= EXPIRE_TIME: + print("Login expired. Please log in again.") + else: + return USER_TOKEN + + t = get_sdc_token() + + return t + + +def get_sdc_token(user_name=None, password=None): + """ + This function authenticates the user. An access token is automatically stored in + the USER_TOKEN variable in this file, and functions will attempt to find a valid + user token in that variable. + + :param user_name: User's SDC username + :param password: User's SDC password + + :return: A string that also gets stored in the USER_TOKEN variable in this file. + You don't need this string unless you plan on making your own API calls, + using functions outside of this file. + """ + + if user_name is None: + user_name = input("Username:") + if password is None: + import getpass + + password = getpass.getpass("Password for " + user_name + ":") + + authentication_url = f"https://cognito-idp.{AWS_REGION}.amazonaws.com/" + authentication_headers = { + "X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth", + "Content-Type": "application/x-amz-json-1.1", + } + data = json.dumps( + { + "ClientId": COGNITO_CLIENT_ID, + "AuthFlow": "USER_PASSWORD_AUTH", + "AuthParameters": {"USERNAME": user_name, "PASSWORD": password}, + } + ) + + # Attempt to grab the SDC token. + try: + token_response = requests.post( + authentication_url, data=data, headers=authentication_headers + ) + t = token_response.json()["AuthenticationResult"]["AccessToken"] + except KeyError: + print("Invalid username and/or password. Please try again. ") + return + + _set_user_token(t) + + return t + + +def _execute_api_get(endpoint, login, **kwargs): + if login: + token = _get_user_token() + headers = {"Authorization": token} + else: + headers = {} + query_parameters = [] + for kw in kwargs: + query_parameters.append(kw + "=" + str(kwargs[kw])) + query_parameters = "&".join(query_parameters) + url_with_parameters = API_URL + "/" + endpoint + "?" + query_parameters + print(url_with_parameters) + try: + response = requests.get(url_with_parameters, headers=headers) + except Exception as e: + print(f"Could not finish query due to error {e!s}") + return + return response + + +def download(filename, download_dir=".", login=False): + """ + This function is used to download files from the SDS. + + :param filename: The full S3 URI to download + :param download_dir: The directory on the local machine to download the file to. + + :return: None, but downloads the file to the specified download directory + """ + endpoint = "download" + download_url = _execute_api_get(endpoint, login, s3_uri=filename) + + if download_url.status_code == STATUS_BAD_REQUEST: + print("Not a valid S3 URI. Example input: s3://bucket/path/file.ext") + return + elif download_url.status_code == STATUS_NOT_FOUND: + print("No files were found matching the given URI.") + return + + file_name_and_path = os.path.join(download_dir, filename[5:]) + download_dir = os.path.dirname(file_name_and_path) + if not os.path.exists(download_dir): + os.makedirs(download_dir) + + with open(file_name_and_path, "wb") as file: + print(f"Downloading {file_name_and_path}") + file_location = requests.get(download_url.json()["download_url"]) + file.write(file_location.content) + + return file_name_and_path + + +def query(login=False, **kwargs): + """ + This function is used to query files from the SDS. + There are no required arguments, the search strings will depend on the mission + + :return: This returns JSON with all information about the files. + """ + endpoint = "query" + response = _execute_api_get(endpoint, login, **kwargs) + return response.json() + + +def upload(local_file_location, remote_file_name, login=False, **kwargs): + """ + This function is used to upload files to the SDS. + + :param local_file_location: The full filename and path to the file on the + local machine to upload to the SDS. + :param remote_file_name: The name of the file you'd like the uploaded file to be + :param kwargs: Any additional key word arguments passed into this function + are stored as tags on the SDS. + + :return: This returns a requests response object. + If the upload was successful, it'll be code 200. + """ + endpoint = "upload" + response = _execute_api_get(endpoint, login, filename=remote_file_name, **kwargs) + + if response.status_code != STATUS_OK: + print( + "Could not generate an upload URL with the following error: " + + response.text + ) + return + + with open(local_file_location, "rb") as object_file: + object_text = object_file.read() + + response = requests.put(response.json(), data=object_text) + return response diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 0000000..5a50066 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,50 @@ +import sds_api + + +def test_query(requests_mock): + # Set up the fake API endpoint + sds_api.API_URL = "https://imap_sds_api.com" + requests_mock.get("https://imap_sds_api.com/query", json={"file": "imap.csv"}) + + # Query and ensure a match + assert {"file": "imap.csv"} == sds_api.query(login=False) + + +def test_download_success(requests_mock): + # Set up the fake API endpoints + sds_api.API_URL = "https://imap_sds_api.com" + requests_mock.get( + "https://imap_sds_api.com/download", + json={"download_url": "https://big-s3-signed-url.com"}, + status_code="200", + ) + requests_mock.get("https://big-s3-signed-url.com", content=b"Hello World!") + + # Download a file with the name "imap.txt", and contents of "Hello World!" + file_location = sds_api.download("s3://imap.txt", login=False) + + assert file_location == "./imap.txt" + + # Download a file + with open(file_location) as f: + contents = f.read() + + # assert the contents of the file are "Hello World!" + assert contents == "Hello World!" + + +def test_download_fail(requests_mock): + # Set up the fake API endpoints + sds_api.API_URL = "https://imap_sds_api.com" + requests_mock.get( + "https://imap_sds_api.com/download", + json={"download_url": "https://big-s3-signed-url.com"}, + status_code=400, + ) + requests_mock.get("https://big-s3-signed-url.com", content=b"Hello World!") + + # Download a file with the name "imap.txt", and contents of "Hello World!" + file_location = sds_api.download("s3://imap.txt", login=False) + + # assert no file was created + assert file_location is None