Skip to content

Commit

Permalink
add hra fetch script
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Jun 11, 2024
1 parent 8598f95 commit 0ebbb51
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 0 deletions.
2 changes: 2 additions & 0 deletions 00_fetch_data/hra_kg/blazegraph.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
com.bigdata.journal.AbstractJournal.file=/data/blazegraph.jnl

39 changes: 39 additions & 0 deletions 00_fetch_data/hra_kg/dump.dockerpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env python3

import requests
import os

def dump_named_graphs(blazegraph_url):
query = """
SELECT DISTINCT ?graph WHERE {
GRAPH ?graph {?s ?p ?o}
}
"""
params = {
'query': query
}
response = requests.get(blazegraph_url, params=params, headers={'Accept': 'application/json'})

if response.status_code == 200:
data = response.json()
named_graphs = [result['graph']['value'] for result in data['results']['bindings']]

for named_graph in named_graphs:
params['query'] = f"CONSTRUCT {{ ?s ?p ?o }} WHERE {{ GRAPH <{named_graph}> {{ ?s ?p ?o }} }}"
response = requests.get(blazegraph_url, params=params,headers={'Accept': 'text/x-nquads'})

if response.status_code == 200:
nquads = response.text.split('\n')
for nquad in nquads:
if nquad:
stripped = nquad.strip('.').strip()
print(f"{stripped} <{named_graph}> .")
else:
print(f"Failed to fetch data for named graph '{named_graph}'")
else:
print("Failed to fetch named graphs")

if __name__ == "__main__":
blazegraph_url = "http://127.0.0.1:8080/blazegraph/sparql"
dump_named_graphs(blazegraph_url)

13 changes: 13 additions & 0 deletions 00_fetch_data/hra_kg/entrypoint.dockersh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

apt-get update
apt install -y python3 python3-pip
pip3 install requests

/blazegraph/entrypoint.sh &
sleep 15

python3 /data/dump.dockerpy | gzip > /data/hra.nq.gz

sync

7 changes: 7 additions & 0 deletions 00_fetch_data/hra_kg/fetch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

rm -f https://cdn.humanatlas.io/digital-objects/blazegraph.jnl
wget https://cdn.humanatlas.io/digital-objects/blazegraph.jnl

docker run --entrypoint /data/entrypoint.dockersh -v $(pwd):/data ghcr.io/ebispot/blazegraph-docker:2.1.5

0 comments on commit 0ebbb51

Please sign in to comment.