Initializing the Repository (#4)

Initial commit of the repo
IMAP-Science-Operations-Center · Aug 10, 2023 · 002949e · 002949e
1 parent 6be939a
commit 002949e
Show file tree

Hide file tree

Showing 8 changed files with 424 additions and 0 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,27 @@
+name: Tests
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install the app
+        run: |
+          python -m pip install .[test]
+      - name: Testing
+        id: test
+        run: |
+          # Ignore the network marks from the remote test environment
+          python -m pytest --color=yes -m "not network"
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,43 @@
+*.swp
+package-lock.json
+__pycache__
+.pytest_cache
+.venv
+.env
+*.egg-info
+.aws
+external
+.idea/
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# CDK asset staging directory
+.cdk.staging
+cdk.out
+aws
+awscliv2.zip
+cdk.context.json
+
+# Individual development CDK apps
+app*dev.py
+
+# Sphinx documentation
+docs/build/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,24 @@
+ci:
+  autofix_prs: false
+  autoupdate_schedule: 'quarterly'
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-added-large-files
+      - id: detect-aws-credentials
+        args: [ --allow-missing-credentials ]
+      - id: detect-private-key
+      - id: mixed-line-ending
+      - id: trailing-whitespace
+      - id: no-commit-to-branch
+        args: [--branch, main]
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+    - id: black
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: 'v0.0.276'
+    hooks:
+    - id: ruff
+      args: [--fix]
diff --git a/README.md b/README.md
@@ -0,0 +1,28 @@
+# SDS-access-lib
+
+This is a simple python script that allows a user to upload, query, and download data from a a Science Data System as set up from: https://github.com/IMAP-Science-Operations-Center/sds-data-manager
+
+## Uploading Data Example
+
+```
+>>> import sds_api
+>>> response = sds_api.upload(file_location='helloworld.txt', file_name='imap_l0_sci_mag_2024_2_ThisWillMakeThingsFail.pkts')
+Could not generate an upload URL with the following error: "A pre-signed URL could not be generated. Please ensure that the file name matches mission file naming conventions."
+>>> response = sds_api.upload(file_location='helloworld.txt', file_name='imap_l0_sci_mag_2024_2.pkts')
+```
+
+## Querying Data Example
+```
+>>> results = sds_api.query(instrument='imap-lo')
+>>> print(results)
+[]
+>>> results = sds_api.query(instrument='mag')
+>>> print(results)
+[{'_index': 'metadata', '_type': '_doc', '_source': {'date': '2024', 'mission': 'imap', 'extension': 'pkts', 'level': 'l0', 'instrument': 'mag', 'type': 'sci', 'version': '2'}, '_id': 's3://sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_2.pkts', '_score': 0.18232156}, {'_index': 'metadata', '_type': '_doc', '_source': {'date': '2024', 'mission': 'imap', 'extension': 'pkts', 'level': 'l0', 'instrument': 'mag', 'type': 'sci', 'version': ''}, '_id': 's3://sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_.pkts', '_score': 0.18232156}]
+```
+
+## Downloading Data Example
+```
+>>> response = sds_api.download(results[0]['_id'])
+Downloading sds-data-harter-upload-testing/imap/l0/imap_l0_sci_mag_2024_2.pkts
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,62 @@
+[project]
+name = "sds-access-lib"
+version = "0.1.0"
+description = "IMAP Science Operations Center AWS data acccess"
+authors = [{name = "IMAP SDS Developers", email = "[email protected]"}]
+readme = "README.md"
+license = {text = "MIT"}
+keywords = ["IMAP", "SDC", "SOC", "SDS", "Science Operations"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: SDS Users",
+    "License :: OSI Approved :: MIT License",
+    "Natural Language :: English",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Topic :: Software Development",
+    "Topic :: Scientific/Engineering",
+    "Operating System :: Microsoft :: Windows",
+    "Operating System :: POSIX",
+    "Operating System :: Unix",
+    "Operating System :: MacOS",
+]
+dependencies = [
+    "requests",
+]
+
+[project.urls]
+homepage = "https://github.com/IMAP-Science-Operations-Center"
+repository = "https://github.com/IMAP-Science-Operations-Center/sds-access-lib"
+
+
+[project.optional-dependencies]
+test = [
+    "black==21.9b0",
+    "pre-commit==2.15.0",
+    "mypy==0.910",
+    "pytest==6.2.5",
+    "pytest-cov==3.0.0",
+    "requests-mock",
+]
+
+[tool.pytest.ini_options]
+testpaths = [
+  "tests",
+]
+addopts = "-ra"
+markers = [
+    "network: Test that requires network access",
+]
+filterwarnings = [
+    "ignore::DeprecationWarning:importlib*",
+    "ignore::DeprecationWarning:jsii*",
+]
+
+[tool.ruff]
+target-version = "py39"
+select = ["B", "E", "F", "I", "N", "W", "PL", "PT", "UP", "RUF"]
+# Ignore import sorting for now until lines_after_imports is respected
+# by ruff and we can replace isort
+ignore = ["PLW0603"]
diff --git a/sds_api.py b/sds_api.py
@@ -0,0 +1,190 @@
+import datetime
+import json
+import os
+
+import requests
+
+# CONFIGURATION
+
+# Enter in the URL of the API that we're testing.
+API_URL = ""  # ex - "https://api.prod.imap-mission.com"
+
+# When an authentication system is set up, these must be set to log in to the APIs
+AWS_REGION = ""  # ex - "us-west-2"
+COGNITO_CLIENT_ID = ""  # random string of letters assigned to the congito client
+
+# GLOBAL VARIABLES
+
+# These variables are set when a user is logged in.
+USER_TOKEN = None
+LOGIN_TIME = None
+
+# These variables are never changed
+EXPIRE_TIME = 3600
+STATUS_OK = 200
+STATUS_NOT_FOUND = 404
+STATUS_BAD_REQUEST = 400
+
+
+def _set_user_token(t):
+    global LOGIN_TIME
+    global USER_TOKEN
+
+    LOGIN_TIME = datetime.datetime.now()
+    USER_TOKEN = t
+
+
+def _get_user_token():
+    if LOGIN_TIME is None:
+        print("New login needed.  Login is valid for 60 minutes.")
+    elif (datetime.datetime.now() - LOGIN_TIME).total_seconds() >= EXPIRE_TIME:
+        print("Login expired.  Please log in again.")
+    else:
+        return USER_TOKEN
+
+    t = get_sdc_token()
+
+    return t
+
+
+def get_sdc_token(user_name=None, password=None):
+    """
+    This function authenticates the user.  An access token is automatically stored in
+    the USER_TOKEN variable in this file, and functions will attempt to find a valid
+    user token in that variable.
+
+    :param user_name: User's SDC username
+    :param password: User's SDC password
+
+    :return: A string that also gets stored in the USER_TOKEN variable in this file.
+             You don't need this string unless you plan on making your own API calls,
+             using functions outside of this file.
+    """
+
+    if user_name is None:
+        user_name = input("Username:")
+    if password is None:
+        import getpass
+
+        password = getpass.getpass("Password for " + user_name + ":")
+
+    authentication_url = f"https://cognito-idp.{AWS_REGION}.amazonaws.com/"
+    authentication_headers = {
+        "X-Amz-Target": "AWSCognitoIdentityProviderService.InitiateAuth",
+        "Content-Type": "application/x-amz-json-1.1",
+    }
+    data = json.dumps(
+        {
+            "ClientId": COGNITO_CLIENT_ID,
+            "AuthFlow": "USER_PASSWORD_AUTH",
+            "AuthParameters": {"USERNAME": user_name, "PASSWORD": password},
+        }
+    )
+
+    # Attempt to grab the SDC token.
+    try:
+        token_response = requests.post(
+            authentication_url, data=data, headers=authentication_headers
+        )
+        t = token_response.json()["AuthenticationResult"]["AccessToken"]
+    except KeyError:
+        print("Invalid username and/or password.  Please try again.  ")
+        return
+
+    _set_user_token(t)
+
+    return t
+
+
+def _execute_api_get(endpoint, login, **kwargs):
+    if login:
+        token = _get_user_token()
+        headers = {"Authorization": token}
+    else:
+        headers = {}
+    query_parameters = []
+    for kw in kwargs:
+        query_parameters.append(kw + "=" + str(kwargs[kw]))
+    query_parameters = "&".join(query_parameters)
+    url_with_parameters = API_URL + "/" + endpoint + "?" + query_parameters
+    print(url_with_parameters)
+    try:
+        response = requests.get(url_with_parameters, headers=headers)
+    except Exception as e:
+        print(f"Could not finish query due to error {e!s}")
+        return
+    return response
+
+
+def download(filename, download_dir=".", login=False):
+    """
+    This function is used to download files from the SDS.
+
+    :param filename: The full S3 URI to download
+    :param download_dir: The directory on the local machine to download the file to.
+
+    :return: None, but downloads the file to the specified download directory
+    """
+    endpoint = "download"
+    download_url = _execute_api_get(endpoint, login, s3_uri=filename)
+
+    if download_url.status_code == STATUS_BAD_REQUEST:
+        print("Not a valid S3 URI.  Example input: s3://bucket/path/file.ext")
+        return
+    elif download_url.status_code == STATUS_NOT_FOUND:
+        print("No files were found matching the given URI.")
+        return
+
+    file_name_and_path = os.path.join(download_dir, filename[5:])
+    download_dir = os.path.dirname(file_name_and_path)
+    if not os.path.exists(download_dir):
+        os.makedirs(download_dir)
+
+    with open(file_name_and_path, "wb") as file:
+        print(f"Downloading {file_name_and_path}")
+        file_location = requests.get(download_url.json()["download_url"])
+        file.write(file_location.content)
+
+    return file_name_and_path
+
+
+def query(login=False, **kwargs):
+    """
+    This function is used to query files from the SDS.
+    There are no required arguments, the search strings will depend on the mission
+
+    :return: This returns JSON with all information about the files.
+    """
+    endpoint = "query"
+    response = _execute_api_get(endpoint, login, **kwargs)
+    return response.json()
+
+
+def upload(local_file_location, remote_file_name, login=False, **kwargs):
+    """
+    This function is used to upload files to the SDS.
+
+    :param local_file_location: The full filename and path to the file on the
+                                local machine to upload to the SDS.
+    :param remote_file_name: The name of the file you'd like the uploaded file to be
+    :param kwargs: Any additional key word arguments passed into this function
+                   are stored as tags on the SDS.
+
+    :return: This returns a requests response object.
+             If the upload was successful, it'll be code 200.
+    """
+    endpoint = "upload"
+    response = _execute_api_get(endpoint, login, filename=remote_file_name, **kwargs)
+
+    if response.status_code != STATUS_OK:
+        print(
+            "Could not generate an upload URL with the following error: "
+            + response.text
+        )
+        return
+
+    with open(local_file_location, "rb") as object_file:
+        object_text = object_file.read()
+
+    response = requests.put(response.json(), data=object_text)
+    return response
diff --git a/tests/__init__.py b/tests/__init__.py