From 8fb5898df6264490bea4dc97405712051a16c297 Mon Sep 17 00:00:00 2001 From: fnielsen Date: Tue, 18 Dec 2018 12:17:30 +0100 Subject: [PATCH 01/47] Make queries transitive on sponsor Queries are now transivity on sponsor so the works sponsored by projects that are sponsored by other sponsors show up on the top sponsor. Also changed link prefixes to direct choose the appropriate aspect for all items in the tables. --- scholia/app/templates/sponsor.html | 41 +++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/scholia/app/templates/sponsor.html b/scholia/app/templates/sponsor.html index 84b264623..efc09c104 100644 --- a/scholia/app/templates/sponsor.html +++ b/scholia/app/templates/sponsor.html @@ -7,9 +7,10 @@ recentlyPublishedSponsoredWorkSparql = ` SELECT ?publication_date ?number_of_citations ?work ?workLabel ?quote WITH { - SELECT (MIN(?date) AS ?publication_date) (COUNT(?citing_work) as ?number_of_citations) ?work (SAMPLE(?quote_) AS ?quote) WHERE { - ?work p:P859 ?sponsor_statement . - ?sponsor_statement ps:P859 wd:{{ q }} . + SELECT (MIN(?date) AS ?publication_date) (COUNT(?citing_work) as ?number_of_citations) ?work (SAMPLE(?quote_) AS ?quote) + WHERE { + ?work p:P859+ ?sponsor_statement . + ?sponsor_statement ps:P859+ wd:{{ q }} . OPTIONAL { ?sponsor_statement prov:wasDerivedFrom/pr:P1683 ?quote_ . } OPTIONAL { ?work wdt:P577 ?datetime . @@ -32,7 +33,7 @@ WITH { SELECT ?author (count(?work) as ?publication_count) (SAMPLE(?work) AS ?example_work) WHERE { - ?work wdt:P859 wd:{{ q }} . + ?work wdt:P859+ wd:{{ q }} . ?work wdt:P50 ?author . } GROUP BY ?author @@ -48,9 +49,10 @@ # Co-founders including an example funded work SELECT ?count ?sponsor ?sponsorLabel ?example_work ?example_workLabel WITH { - SELECT (COUNT(?work) AS ?count) ?sponsor (SAMPLE(?work) AS ?example_work) WHERE { - ?work wdt:P859 wd:{{ q }} . - ?work wdt:P859 ?sponsor . + SELECT (COUNT(?work) AS ?count) ?sponsor (SAMPLE(?work) AS ?example_work) + WHERE { + ?work wdt:P859+ wd:{{ q }} . + ?work wdt:P859+ ?sponsor . FILTER (wd:{{ q }} != ?sponsor) } GROUP BY ?sponsor @@ -64,10 +66,25 @@ ` $(document).ready(function() { - sparqlToDataTable(recentlyPublishedSponsoredWorkSparql, - "#recently-published-sponsored-work"); - sparqlToDataTable(authorsOnSponsoredWorkSparql, "#authors-on-sponsored-work"); - sparqlToDataTable(coSponsorsSparql, "#co-sponsors"); + sparqlToDataTable( + recentlyPublishedSponsoredWorkSparql, + "#recently-published-sponsored-work", { + linkPrefixes: { + work: "../work/", + } + }); + sparqlToDataTable(authorsOnSponsoredWorkSparql, "#authors-on-sponsored-work", { + linkPrefixes: { + author: "../author/", + example_work: "../work/", + }, + }); + sparqlToDataTable(coSponsorsSparql, "#co-sponsors", { + linkPrefixes: { + sponsor: "./", + example_work: "../work/", + }, + }); }); @@ -96,7 +113,7 @@
- +
From 4a37610dd08ec78ec98379e010f0caed0bcaa51e Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Sun, 23 Dec 2018 15:54:22 +0100 Subject: [PATCH 02/47] No duplicate topics --- scholia/app/templates/chemical.html | 2 +- scholia/app/templates/chemical_class.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scholia/app/templates/chemical.html b/scholia/app/templates/chemical.html index 6c4cb971b..7c072815e 100644 --- a/scholia/app/templates/chemical.html +++ b/scholia/app/templates/chemical.html @@ -90,7 +90,7 @@ } } AS %works WITH { - SELECT (MAX(?dates) as ?datetime) ?work (GROUP_CONCAT(DISTINCT ?type_label; separator=", ") AS ?type) (GROUP_CONCAT(?topic_label; separator=" // ") AS ?topics) WHERE { + SELECT (MAX(?dates) as ?datetime) ?work (GROUP_CONCAT(DISTINCT ?type_label; separator=", ") AS ?type) (GROUP_CONCAT(DISTINCT ?topic_label; separator=" // ") AS ?topics) WHERE { INCLUDE %works ?work wdt:P921 ?topic . OPTIONAL { ?work wdt:P31 ?type_ . ?type_ rdfs:label ?type_label . FILTER (LANG(?type_label) = 'en') } diff --git a/scholia/app/templates/chemical_class.html b/scholia/app/templates/chemical_class.html index ee068d3ed..7cf344212 100644 --- a/scholia/app/templates/chemical_class.html +++ b/scholia/app/templates/chemical_class.html @@ -75,7 +75,7 @@ } } AS %works WITH { - SELECT (MAX(?dates) as ?datetime) ?work (GROUP_CONCAT(DISTINCT ?type_label; separator=", ") AS ?type) (GROUP_CONCAT(?topic_label; separator=" // ") AS ?topics) WHERE { + SELECT (MAX(?dates) as ?datetime) ?work (GROUP_CONCAT(DISTINCT ?type_label; separator=", ") AS ?type) (GROUP_CONCAT(DISTINCT ?topic_label; separator=" // ") AS ?topics) WHERE { INCLUDE %works ?work wdt:P921 ?topic . OPTIONAL { ?work wdt:P31 ?type_ . ?type_ rdfs:label ?type_label . FILTER (LANG(?type_label) = 'en') } From 32f6308e298f00ee9575e03bd0029e5ab0b54656 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Sun, 23 Dec 2018 17:04:21 +0100 Subject: [PATCH 03/47] Recognize chemicals too (when they have useful info for that aspect) --- scholia/app/templates/base.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scholia/app/templates/base.html b/scholia/app/templates/base.html index dbcced3a6..35f34157f 100644 --- a/scholia/app/templates/base.html +++ b/scholia/app/templates/base.html @@ -132,6 +132,10 @@ UNION { [] wdt:P1433 wd:{{ q }} . BIND("venue" AS ?aspect) } UNION + { wd:{{ q }} wdt:P235 [] . BIND("chemical" AS ?aspect) } + UNION + { [] wdt:P279 | wdt:P31 wd:{{ q }} . BIND("chemical" AS ?aspect) } + UNION { wd:{{ q }} wdt:P50 | wdt:P2093 [] . BIND("work" AS ?aspect) } } `; From 66c232297ad032a7e80af10a6d584068a4bd8ad3 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Sun, 23 Dec 2018 19:05:56 +0100 Subject: [PATCH 04/47] Added a redirect based on a PubMed identifier --- scholia/app/templates/work_empty.html | 6 ++++ scholia/app/views.py | 19 +++++++++++- scholia/query.py | 43 ++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/scholia/app/templates/work_empty.html b/scholia/app/templates/work_empty.html index 9b0070344..6a21b5e6c 100644 --- a/scholia/app/templates/work_empty.html +++ b/scholia/app/templates/work_empty.html @@ -62,6 +62,12 @@

Redirects

Redirect based on DOI.
+
+
+ pubmed/29029422 +
+
Redirect based on PubMed identifier.
+
diff --git a/scholia/app/views.py b/scholia/app/views.py index 784e64329..2865ab836 100644 --- a/scholia/app/views.py +++ b/scholia/app/views.py @@ -14,7 +14,7 @@ from ..query import (arxiv_to_qs, cas_to_qs, doi_to_qs, github_to_qs, inchikey_to_qs, issn_to_qs, orcid_to_qs, viaf_to_qs, q_to_class, random_author, twitter_to_qs, - cordis_to_qs, mesh_to_qs) + cordis_to_qs, mesh_to_qs, pubmed_to_qs) from ..utils import sanitize_q from ..wikipedia import q_to_bibliography_templates @@ -747,6 +747,23 @@ def redirect_orcid(orcid): return render_template('404.html') +@main.route('/pubmed/') +def redirect_pubmed(pmid): + """Detect and redirect for PubMed identifiers. + + Parameters + ---------- + pmid : str + PubMed identifier. + + """ + qs = pubmed_to_qs(pmid) + if len(qs) > 0: + q = qs[0] + return redirect(url_for('app.show_work', q=q), code=302) + return render_template('404.html') + + @main.route('/viaf/') def redirect_viaf(viaf): """Detect and redirect for VIAF identifiers. diff --git a/scholia/query.py b/scholia/query.py index 040a3950c..282cf5121 100644 --- a/scholia/query.py +++ b/scholia/query.py @@ -12,11 +12,12 @@ scholia.query issn-to-q scholia.query mesh-to-q scholia.query orcid-to-q + scholia.query pubmed-to-q scholia.query q-to-label - scholia.query viaf-to-q scholia.query q-to-class scholia.query random-author scholia.query twitter-to-q + scholia.query viaf-to-q scholia.query website-to-q Examples @@ -220,6 +221,41 @@ def doi_to_qs(doi): for item in data['results']['bindings']] +def pubmed_to_qs(pmid): + """Convert a PubMed identifier to Wikidata ID. + + Wikidata Query Service is used to resolve the PubMed identifier. + + The PubMed identifier string is converted to uppercase before any + query is made. + + Parameters + ---------- + pmid : str + PubMed identifier + + Returns + ------- + qs : list of str + List of strings with Wikidata IDs. + + Examples + -------- + >>> pubmed_to_qs('29029422') == ['Q42371516'] + True + + """ + query = 'select ?work where {{ ?work wdt:P698 "{pmid}" }}'.format(pmid=pmid) + + url = 'https://query.wikidata.org/sparql' + params = {'query': query, 'format': 'json'} + response = requests.get(url, params=params, headers=HEADERS) + data = response.json() + + return [item['work']['value'][31:] + for item in data['results']['bindings']] + + def issn_to_qs(issn): """Convert ISSN to Wikidata ID. @@ -968,6 +1004,11 @@ def main(): if len(qs) > 0: print(qs[0]) + elif arguments['pubmed-to-q']: + qs = pubmed_to_qs(arguments['']) + if len(qs) > 0: + print(qs[0]) + elif arguments['viaf-to-q']: qs = viaf_to_qs(arguments['']) if len(qs) > 0: From 01295cbe26fb5df074a15561e50691ceff4e7e85 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Sun, 23 Dec 2018 19:41:52 +0100 Subject: [PATCH 05/47] Fixed linking out to JSmol for some InChIs (fixes #391) --- scholia/app/templates/chemical.html | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scholia/app/templates/chemical.html b/scholia/app/templates/chemical.html index 6c4cb971b..a05399652 100644 --- a/scholia/app/templates/chemical.html +++ b/scholia/app/templates/chemical.html @@ -16,8 +16,18 @@ } } LIMIT 500 } AS %RESULTS { - INCLUDE %RESULTS - BIND(IRI(REPLACE(?formatterurl, '\\\\$1', str(?id))) AS ?idUrls). + { SELECT * WHERE { + INCLUDE %RESULTS + FILTER (?IDpred = wd:P234) + BIND(IRI(REPLACE(?formatterurl, '\\$1', REPLACE(REPLACE(?id, "\\+", '%2B'), '/', '%2F'))) AS ?idUrls). + } + } UNION { + SELECT * WHERE { + INCLUDE %RESULTS + FILTER (?IDpred != wd:P234) + BIND(IRI(REPLACE(?formatterurl, '\\$1', str(?id))) AS ?idUrls). + } + } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } GROUP BY ?IDpred ?IDpredLabel ?id ORDER BY ASC(?IDpredLabel) From 25afbec1e3cc933ae9d44adb9e007e70edbdfe60 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Mon, 24 Dec 2018 08:15:38 +0100 Subject: [PATCH 06/47] Implemented an RSS feed for recent papers funded by some sponsor (implements #342) --- scholia/app/templates/sponsor.html | 2 +- scholia/app/views.py | 25 +++++++++- scholia/rss.py | 78 ++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 2 deletions(-) diff --git a/scholia/app/templates/sponsor.html b/scholia/app/templates/sponsor.html index efc09c104..fb0db8551 100644 --- a/scholia/app/templates/sponsor.html +++ b/scholia/app/templates/sponsor.html @@ -100,7 +100,7 @@

Sponsor

- +
diff --git a/scholia/app/views.py b/scholia/app/views.py index 784e64329..98471fa4f 100644 --- a/scholia/app/views.py +++ b/scholia/app/views.py @@ -8,7 +8,8 @@ from ..api import entity_to_name, entity_to_smiles, search, wb_get_entities from ..rss import (wb_get_author_latest_works, wb_get_venue_latest_works, - wb_get_topic_latest_works, wb_get_organization_latest_works) + wb_get_topic_latest_works, wb_get_organization_latest_works, + wb_get_sponsor_latest_works) from ..arxiv import metadata_to_quickstatements, string_to_arxiv from ..arxiv import get_metadata as get_arxiv_metadata from ..query import (arxiv_to_qs, cas_to_qs, doi_to_qs, github_to_qs, @@ -1472,6 +1473,28 @@ def show_sponsor_empty(): return render_template('sponsor_empty.html') +@main.route('/sponsor/' + q_pattern + '/latest-works/rss') +def show_sponsor_rss(q): + """Return a RSS feed for specific sponsor. + + Parameters + ---------- + q : str + Wikidata item identifier. + + Returns + ------- + rss : str + RSS feed. + + """ + response_body = wb_get_sponsor_latest_works(q) + response = Response(response=response_body, + status=200, mimetype="application/rss+xml") + response.headers["Content-Type"] = "text/xml; charset=utf-8" + return response + + @main.route('/use/' + q_pattern) def show_use(q): """Return HTML rendering for specific use. diff --git a/scholia/rss.py b/scholia/rss.py index f664debe0..a3a505025 100644 --- a/scholia/rss.py +++ b/scholia/rss.py @@ -5,6 +5,7 @@ scholia.rss venue-latest-works scholia.rss topic-latest-works scholia.rss organization-latest-works + scholia.rss sponsor-latest-works Description: Functions related to feed. @@ -21,6 +22,9 @@ ... $ python -m scholia.rss organization-latest-works Q1137652 + ... + + $ python -m scholia.rss sponsor-latest-works Q1377836 References ---------- @@ -168,6 +172,31 @@ ORDER BY DESC(?date) """ +ORGANIZATION_SPARQL_QUERY = """ +SELECT ?work ?workLabel ?date (?author AS ?description) +WITH {{ + SELECT + (MIN(?dates) AS ?date) + ?work + (GROUP_CONCAT(DISTINCT ?author_label; separator=', ') AS ?author) + WHERE {{ + ?work p:P859 ?sponsor_statement ; + wdt:P577 ?datetimes . + ?sponsor_statement ps:P859 wd:{q} . + BIND(xsd:date(?datetimes) AS ?dates) + OPTIONAL {{ + ?work wdt:P50 / rdfs:label ?author_label . + FILTER(LANG(?author_label) = 'en') + }} + }} + GROUP BY ?work + LIMIT 10 +}} AS %result +WHERE {{ + INCLUDE %result + SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} +}} ORDER BY DESC(?date) +""" def _value(item, field): return item[field]['value'] if field in item else '' @@ -419,6 +448,52 @@ def wb_get_organization_latest_works(q): return rss_body +def wb_get_sponsor_latest_works(q): + """Return feed for latest work from a sponsor. + + Parameters + ---------- + q : str + Wikidata identifer + + Returns + ------- + rss : str + RSS-formatted feed with latest work from a sponsor. + + """ + if not q: + return '' + + rss_body = '\n' + rss_body += '\n' + rss_body += ' \n' + rss_body += ' Scholia - Latest articles sponsored by ' + \ + q + '\n' + rss_body += " The sponsor's most " + \ + "recent articles\n" + rss_body += (' https://tools.wmflabs.org/' + 'scholia/sponsor/\n') + rss_body += ' \n' + + query = ORGANIZATION_SPARQL_QUERY.format(q=q) + url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql' + params = {'query': query, 'format': 'json'} + response = requests.get(url, params=params) + data = response.json() + + rss_body += entities_to_works_rss(data['results']['bindings']) + + rss_body += ' \n' + rss_body += '' + + return rss_body + + def main(): """Handle command-line arguments.""" from docopt import docopt @@ -438,6 +513,9 @@ def main(): elif arguments['organization-latest-works']: q = arguments[''] print(wb_get_organization_latest_works(q)) + elif arguments['sponsor-latest-works']: + q = arguments[''] + print(wb_get_sponsor_latest_works(q)) else: assert False From 98b22f140a4cef33979753928222675b127173e8 Mon Sep 17 00:00:00 2001 From: Daniel Mietchen Date: Tue, 1 Jan 2019 01:34:09 +0100 Subject: [PATCH 07/47] typo fix --- scholia/app/templates/author.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scholia/app/templates/author.html b/scholia/app/templates/author.html index fed66a6a1..286fee810 100644 --- a/scholia/app/templates/author.html +++ b/scholia/app/templates/author.html @@ -303,6 +303,6 @@

Citing authors

-Missing citing authors here? Then go to missing page. +Missing citing authors here? Then go to the missing page. {% endblock %} From 99d3eccf31acb0d9110d574bb0b21ddb07000370 Mon Sep 17 00:00:00 2001 From: Egon Willighagen Date: Fri, 4 Jan 2019 12:05:40 +0100 Subject: [PATCH 08/47] Recognize preprints as works. --- scholia/query.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scholia/query.py b/scholia/query.py index 040a3950c..681ec9d74 100644 --- a/scholia/query.py +++ b/scholia/query.py @@ -556,6 +556,7 @@ def q_to_class(q): 'Q571', # book 'Q191067', # article 'Q253623', # patent + 'Q580922', # preprint 'Q1980247', # chapter 'Q3331189', # edition 'Q5707594', # news article From 870cf5dfbfc3f447a3bc5957924a366f806b608a Mon Sep 17 00:00:00 2001 From: fnielsen Date: Mon, 7 Jan 2019 21:08:49 +0100 Subject: [PATCH 09/47] Fix long line for style check --- scholia/query.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scholia/query.py b/scholia/query.py index 282cf5121..6e2ac838c 100644 --- a/scholia/query.py +++ b/scholia/query.py @@ -245,7 +245,8 @@ def pubmed_to_qs(pmid): True """ - query = 'select ?work where {{ ?work wdt:P698 "{pmid}" }}'.format(pmid=pmid) + query = 'select ?work where {{ ?work wdt:P698 "{pmid}" }}'.format( + pmid=pmid) url = 'https://query.wikidata.org/sparql' params = {'query': query, 'format': 'json'} From f1808d19ce45773a9c440b896651c038e9a88f95 Mon Sep 17 00:00:00 2001 From: fnielsen Date: Mon, 7 Jan 2019 21:09:15 +0100 Subject: [PATCH 10/47] Fix whitespace for style check --- scholia/scrape/nips.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scholia/scrape/nips.py b/scholia/scrape/nips.py index 44963687d..d410c9788 100644 --- a/scholia/scrape/nips.py +++ b/scholia/scrape/nips.py @@ -334,7 +334,7 @@ def scrape_paper_from_url(url): # entry['title'] = tree.xpath("//h2[@class='subtitle']")[0].text title_element = tree.xpath("//h2[@class='subtitle']")[0] entry['title'] = "".join(text for text in title_element.itertext()) - + authors_element = tree.xpath("//ul[@class='authors']")[0] entry['authors'] = [element.text for element in authors_element.xpath('li/a')] From 79cb5ce502267cacd5c614634218f0f75b3a7424 Mon Sep 17 00:00:00 2001 From: fnielsen Date: Mon, 7 Jan 2019 21:21:27 +0100 Subject: [PATCH 11/47] Fix style --- scholia/rss.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scholia/rss.py b/scholia/rss.py index a3a505025..d575ec179 100644 --- a/scholia/rss.py +++ b/scholia/rss.py @@ -194,10 +194,13 @@ }} AS %result WHERE {{ INCLUDE %result - SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }} + SERVICE wikibase:label {{ + bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". + }} }} ORDER BY DESC(?date) """ + def _value(item, field): return item[field]['value'] if field in item else '' From d7d07e16c1fdb28a42d486643b366185f7914fa9 Mon Sep 17 00:00:00 2001 From: fnielsen Date: Tue, 8 Jan 2019 01:20:31 +0100 Subject: [PATCH 12/47] Implement #611 --- scholia/app/templates/event_empty.html | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/scholia/app/templates/event_empty.html b/scholia/app/templates/event_empty.html index 94438b8f4..25de69326 100644 --- a/scholia/app/templates/event_empty.html +++ b/scholia/app/templates/event_empty.html @@ -5,6 +5,39 @@