Skip to content

Commit

Permalink
Merge e7ea3bb into e1c8d8e
Browse files Browse the repository at this point in the history
  • Loading branch information
mariagrandury authored Sep 13, 2023
2 parents e1c8d8e + e7ea3bb commit 7b0e605
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 19 deletions.
25 changes: 25 additions & 0 deletions .github/scripts/reorder_table_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pandas as pd

file_name = 'datasets.csv'

original_index = 5
final_index = 4

df = pd.read_csv(file_name)

if final_index < 0 or original_index < 0 or final_index >= df.shape[1] or original_index >= df.shape[1]:
print("Invalid column index specified.")
else:
columns_except_original = [col for col in df.columns if col != df.columns[original_index]]

new_columns = (
columns_except_original[:final_index] +
[df.columns[original_index]] +
columns_except_original[final_index:]
)

df_reordered = df[new_columns]

df_reordered.to_csv(file_name, index=False)

print("Columns reordered successfully. Result saved to", file_name)
38 changes: 19 additions & 19 deletions datasets.csv
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
nombre,tareas,idioma,página_web,github,paper,hf_dataset_name,hf_contributor_handle,dominio,pais
BasCrawl,modelado del lenguaje,eu,https://doi.org/10.5281/zenodo.7313092,,,,,general,España
Biomedical Spanish CBOW Word Embeddings in Floret,"modelado del lenguaje,CBOW (Continuous Bag Of Words)",es,https://doi.org/10.5281/zenodo.7314041,https://arxiv.org/abs/2109.07765,,,,clinico,España
CSIC Spanish Corpus,modelado del lenguaje,es,https://doi.org/10.5281/zenodo.7313126,,,,,academico,España
Catalonia Independence Corpus,clasificación de sentimientos,"ca, es",,https://github.com/ixa-ehu/catalonia-independence-corpus,https://www.aclweb.org/anthology/2020.lrec-1.171/,catalonia_independence,lewtun,rrss,España
HEAD-QA,preguntas de opción múltiple,es,https://aghie.github.io/head-qa/,https://github.com/aghie/head-qa,https://www.aclweb.org/anthology/P19-1092/,head_qa,mariagrandury,clinico,España
InfoLibros Corpus,modelado del lenguaje,es,https://doi.org/10.5281/zenodo.7313105,,,,,literatura,Varios
Large Spanish Corpus,"modelado del lenguaje,pre-entrenamiento",es,,https://github.com/josecannete/spanish-corpora,,large_spanish_corpus,lewtun,general,Varios
Mucho Cine,clasificación de sentimientos,es,http://www.lsi.us.es/~fermin/index.php/Datasets,,,muchocine,mapmeld,general,?
Spanish Billion Words,"modelado del lenguaje,pre-entrenamiento",es,https://crscardellino.github.io/SBWCE/,,,spanish_billion_words,mariagrandury,general,Varios
Spanish Biomedical Crawled Corpus,modelado del lenguaje,es,https://doi.org/10.5281/zenodo.5513237,,https://arxiv.org/abs/2109.07765,,,clinico,España
Spanish CBOW Word Embeddings in FastText,"modelado del lenguaje,FastText",es,https://doi.org/10.5281/zenodo.5044988,,,http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6405,,genera,España
Spanish CBOW Word Embeddings in Floret,"modelado del lenguaje,CBOW (Continuous Bag Of Words)",es,https://doi.org/10.5281/zenodo.7314098,,,,,general,España
Spanish Legal Domain Corpora,modelado del lenguaje,es,https://doi.org/10.5281/zenodo.5495529,https://github.com/PlanTL-GOB-ES/lm-legal-es,https://arxiv.org/abs/2110.12201,,,legal,España
Spanish Legal Domain Word & Sub-Word Embeddings,modelado del lenguaje,es,https://doi.org/10.5281/zenodo.5036147,https://github.com/PlanTL-GOB-ES/lm-legal-es,https://arxiv.org/abs/2110.12201,,,legal,España
Spanish Skip-Gram Word Embeddings in FastText,"modelado del lenguaje,FastText",es,https://doi.org/10.5281/zenodo.5046525,,,http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6405,,general,España
TDX Thesis Spanish Corpus,modelado del lenguaje,"ca, es",https://doi.org/10.5281/zenodo.7313149,,,,,academico,España
WikiCorpus,"modelado del lenguaje,POS (Part of Speech)","ca, en, es",https://www.cs.upc.edu/~nlp/wikicorpus/,,https://www.cs.upc.edu/~nlp/papers/reese10.pdf,wikicorpus,albertvillanova,general,Varios
eHealth-KD,NER (Named Entity Recognition),es,https://knowledge-learning.github.io/ehealthkd-2020/,https://github.com/knowledge-learning/ehealthkd-2020,http://ceur-ws.org/Vol-2664/eHealth-KD_overview.pdf,ehealth_kd,mariagrandury,clinico,España
nombre,tareas,dominio,idioma,pais,página_web,github,paper,hf_dataset_name,hf_contributor_handle
BasCrawl,modelado del lenguaje,general,euskera,España,https://doi.org/10.5281/zenodo.7313092,,,,
Biomedical Spanish CBOW Word Embeddings in Floret,"modelado del lenguaje,CBOW (Continuous Bag Of Words)",clinico,español,España,https://doi.org/10.5281/zenodo.7314041,https://arxiv.org/abs/2109.07765,,,
CSIC Spanish Corpus,modelado del lenguaje,academico,español,España,https://doi.org/10.5281/zenodo.7313126,,,,
Catalonia Independence Corpus,clasificación de sentimientos,rrss,"catalán, español",España,,https://github.com/ixa-ehu/catalonia-independence-corpus,https://www.aclweb.org/anthology/2020.lrec-1.171/,catalonia_independence,lewtun
HEAD-QA,preguntas de opción múltiple,clinico,español,España,https://aghie.github.io/head-qa/,https://github.com/aghie/head-qa,https://www.aclweb.org/anthology/P19-1092/,head_qa,mariagrandury
InfoLibros Corpus,modelado del lenguaje,literatura,español,Varios,https://doi.org/10.5281/zenodo.7313105,,,,
Large Spanish Corpus,"modelado del lenguaje,pre-entrenamiento",general,español,Varios,,https://github.com/josecannete/spanish-corpora,,large_spanish_corpus,lewtun
Mucho Cine,clasificación de sentimientos,general,español,Varios,http://www.lsi.us.es/~fermin/index.php/Datasets,,,muchocine,mapmeld
Spanish Billion Words,"modelado del lenguaje,pre-entrenamiento",general,español,Varios,https://crscardellino.github.io/SBWCE/,,,spanish_billion_words,mariagrandury
Spanish Biomedical Crawled Corpus,modelado del lenguaje,clinico,español,España,https://doi.org/10.5281/zenodo.5513237,,https://arxiv.org/abs/2109.07765,,
Spanish CBOW Word Embeddings in FastText,"modelado del lenguaje,FastText",general,español,España,https://doi.org/10.5281/zenodo.5044988,,http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6405,,
Spanish CBOW Word Embeddings in Floret,"modelado del lenguaje,CBOW (Continuous Bag Of Words)",general,español,España,https://doi.org/10.5281/zenodo.7314098,,,,
Spanish Legal Domain Corpora,modelado del lenguaje,legal,español,España,https://doi.org/10.5281/zenodo.5495529,https://github.com/PlanTL-GOB-ES/lm-legal-es,https://arxiv.org/abs/2110.12201,,
Spanish Legal Domain Word & Sub-Word Embeddings,modelado del lenguaje,legal,español,España,https://doi.org/10.5281/zenodo.5036147,https://github.com/PlanTL-GOB-ES/lm-legal-es,https://arxiv.org/abs/2110.12201,,
Spanish Skip-Gram Word Embeddings in FastText,"modelado del lenguaje,FastText",general,español,España,https://doi.org/10.5281/zenodo.5046525,,http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6405,,
TDX Thesis Spanish Corpus,modelado del lenguaje,academico,"catalán, español",España,https://doi.org/10.5281/zenodo.7313149,,,,
WikiCorpus,"modelado del lenguaje,POS (Part of Speech)",general,"catalán, español, inglés",Varios,https://www.cs.upc.edu/~nlp/wikicorpus/,,https://www.cs.upc.edu/~nlp/papers/reese10.pdf,wikicorpus,albertvillanova
eHealth-KD,reconocimiento de entidades nombradas (NER),clinico,es,España,https://knowledge-learning.github.io/ehealthkd-2020/,https://github.com/knowledge-learning/ehealthkd-2020,http://ceur-ws.org/Vol-2664/eHealth-KD_overview.pdf,ehealth_kd,mariagrandury

0 comments on commit 7b0e605

Please sign in to comment.