Skip to content

Commit

Permalink
Merge pull request #34 from ClimateCompatibleGrowth/development
Browse files Browse the repository at this point in the history
Improves handling of output ingest
  • Loading branch information
willu47 authored Feb 7, 2025
2 parents 5184ead + 1e81271 commit effc716
Show file tree
Hide file tree
Showing 13 changed files with 731 additions and 321 deletions.
37 changes: 23 additions & 14 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,49 @@ The package is not yet deployed to PyPI. Only an editable (development) install

1. Provide a list of DOIs in a CSV file format `list_of_dois.csv`
2. Clone the repository `git clonehttps://github.com/ClimateCompatibleGrowth/research_index_backend.git`
2. Change directory `cd research_index_backend`
2. Install the package `pip install -e .` as an editable package (development install)
3. Obtain an OpenAIRE Graph refresh token and create a .env file with the following parameters:
3. Change directory `cd research_index_backend`
4. Install the package `pip install -e .` as an editable package (development install)
5. Obtain an OpenAIRE Graph refresh token and create a .env file with the following parameters:
```MG_HOST=
MG_PORT=
MG_PORT_ALT=
MG_USER=
MG_PASS=
ORCID_NAME_SIMILARITY_THRESHOLD=
NAME_SIMILARITY_THRESHOLD=
OPENAIRE_API="https://api.openaire.eu"
OPENAIRE_SERVICE="https://services.openaire.eu"
REFRESH_TOKEN=
```

4. Provision Memgraph graph database and set up environment variables
6. Provision Memgraph graph database and set up environment variables

Once the VM is up and running, SSH into the VM, download and install memgraph

$ curl -O https://download.memgraph.com/memgraph/v2.14.1/ubuntu-20.04/memgraph_2.14.1-1_amd64.deb
$ sudo dpkg -i /memgraph_2.14.1-1_amd64.deb

5. Run the backend:

$ research_index --help
usage: research_index [-h] [--initialise INITIALISE] list_of_dois
7. Run the backend:

research_index --help
usage: research_index [-h] [-i] [-l LIMIT] [-u] list_of_dois
positional arguments:
list_of_dois Provide the path to CSV file containing a list of dois
list_of_dois Path to CSV file containing list of DOIs

options:
-h, --help show this help message and exit
--initialise INITIALISE
Deletes any existing data and creates a new database

$ research_index list_of_dois.csv --initalise
-h, --help Show this help message and exit
-i, --initialise Delete existing data and create new database
-l, --limit N Limit number of DOIs to process (default: 50)
-u, --update-metadata Update metadata for existing DOIs
-w, --write-metadata Save JSON responses to disk

Examples:
-> Process 10 DOIs from file:
$ research_index list_of_dois.csv -l 10 # Process 10 DOIs from file

-> Update metadata for existing DOIs and save metadata
$ research_index list_of_dois.csv --update-metadata --write-metadata

# Development

Expand Down
70 changes: 47 additions & 23 deletions src/research_index_backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def __init__(self):
self.mg_host: str = os.getenv("MG_HOST", "127.0.0.1")
self.mg_port: int = int(os.getenv("MG_PORT", 7687))
self.mg_port_alt: int = int(os.getenv("MG_PORT_ALT", 7444))
self.mg_user: str = os.getenv("MG_USER")
self.mg_pass: str = os.getenv("MG_PASS")

self.orcid_name_similarity_threshold: float = float(
os.getenv("ORCID_NAME_SIMILARITY_THRESHOLD", 0.8)
Expand All @@ -30,22 +32,28 @@ def __init__(self):
)

self.openaire_token_endpoint = f"{self.openaire_service}/uoa-user-management/api/users/getAccessToken"
self.refresh_token: str = ""
self.token = None

@property
def refresh_token(self):
return os.getenv("REFRESH_TOKEN", None)
self._refresh_token: str = ""
self._token: str = ""
self._validate()

@property
def token(self):
if self.token:
return self.token
@property
def refresh_token(self):
if self._refresh_token:
return self._refresh_token
else:
self._refresh_token = os.getenv("REFRESH_TOKEN", None)
if self._refresh_token:
return self._refresh_token
else:
self.token = self._get_personal_token()
return self.token
raise ValueError("No refresh token provided")

self._validate()
@property
def token(self):
if self._token:
return self._token
else:
self._token = self._get_personal_token()
return self._token

def _validate(self):
if not 0 <= self.orcid_name_similarity_threshold <= 1:
Expand All @@ -62,17 +70,33 @@ def _get_personal_token(self) -> str:
if refresh_token := os.getenv("REFRESH_TOKEN"):
logger.info("Found refresh token. Obtaining personal token.")
query = f"?refreshToken={refresh_token}"
response = requests.get(self.openaire_token_endpoint + query)
logger.info(f"Status code: {response.status_code}")
try:
response_json = response.json()
logger.debug(response_json)
return response_json["access_token"]
except requests.JSONDecodeError as e:
logger.error(f"Error decoding JSON response: {e}")
raise ValueError(
"Failed to obtain personal token due to JSON decode error"
)
response = requests.get(self.openaire_token_endpoint + query)
logger.info(f"Status code: {response.status_code}")
response.raise_for_status()
except requests.exceptions.HTTPError:
if 400 <= response.status_code < 500:
raise ValueError(
"OpenAire refresh token is invalid or expired. Please update token and try again."
)
elif 500 <= response.status_code < 600:
raise
else:
raise
else:
try:
response_json = response.json()
logger.debug(response_json)
return response_json["access_token"]
except requests.JSONDecodeError as e:
logger.error(f"Error decoding JSON response: {e}")
raise ValueError(
"Failed to obtain personal token due to JSON decode error"
)
except Exception as e:
msg = str(e)
logger.error(f"{msg}")
raise
else:
raise ValueError(
"No refresh token found, could not obtain personal token"
Expand Down
Loading

0 comments on commit effc716

Please sign in to comment.