Skip to content

Commit

Permalink
citations
Browse files Browse the repository at this point in the history
  • Loading branch information
laugustyniak committed Jun 13, 2023
1 parent d70eed6 commit dc736be
Show file tree
Hide file tree
Showing 4 changed files with 3,140 additions and 29 deletions.
2 changes: 1 addition & 1 deletion mms_benchmark/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
'doc_host': 'https://Brand24-AI.github.io/mms_benchmark',
'git_url': 'https://github.com/Brand24-AI/mms_benchmark',
'lib_path': 'mms_benchmark'},
'syms': {}}
'syms': {'mms_benchmark.citations': {}}}
16 changes: 16 additions & 0 deletions mms_benchmark/citations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from typing import Dict, Union

import bibtexparser
from datasets import DatasetDict


def get_citations(dataset: DatasetDict, citation_as_dict: bool = True) -> Dict[str, Union[Dict, str]]:
lines = dataset.citation.split("% Datasets: ")[1:]
original_dataset_to_bibtex = {}
for line in lines:
original_datasets, citation = line.split("\n", maxsplit=1)
dataset_list = original_datasets.split(", ")

for dataset in dataset_list:
original_dataset_to_bibtex[dataset] = bibtexparser.loads(citation).entries[0] if citation_as_dict else citation
return original_dataset_to_bibtex
42 changes: 14 additions & 28 deletions nbs/00_dataset_card.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
"- bibliography: references.bib\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
"from nbdev.showdoc import *"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down Expand Up @@ -518,17 +528,11 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": [
"#| eval: false\n",
"from typing import Dict, Union\n",
"\n",
"import bibtexparser\n",
"\n",
"from datasets import DatasetDict"
"We can load citations as strings - easy adding to bibtex."
]
},
{
Expand All @@ -537,25 +541,7 @@
"metadata": {},
"outputs": [],
"source": [
"#| eval: false\n",
"def get_citations(dataset: DatasetDict, citation_as_dict: bool = True) -> Dict[str, Union[Dict, str]]:\n",
" lines = dataset.citation.split(\"% Datasets: \")[1:]\n",
" original_dataset_to_bibtex = {}\n",
" for line in lines:\n",
" original_datasets, citation = line.split(\"\\n\", maxsplit=1)\n",
" dataset_list = original_datasets.split(\", \")\n",
"\n",
" for dataset in dataset_list:\n",
" original_dataset_to_bibtex[dataset] = bibtexparser.loads(citation).entries[0] if citation_as_dict else citation\n",
" return original_dataset_to_bibtex"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"We can load citations as strings - easy adding to bibtex."
"from mms_benchmark.citations import get_citations"
]
},
{
Expand Down
Loading

0 comments on commit dc736be

Please sign in to comment.