Skip to content


Merge branch 'main' of
Browse files Browse the repository at this point in the history
  • Loading branch information
cbadenes committed May 5, 2022
2 parents 622e78d + b502336 commit 31107fe
Show file tree
Hide file tree
Showing 23 changed files with 549 additions and 33,018 deletions.
5,163 changes: 0 additions & 5,163 deletions application/datasets/LC-QuAD_2.0/data/LC-Quad_Dataset.csv

This file was deleted.

4 changes: 3 additions & 1 deletion application/datasets/VANiLLA/
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import multiprocessing as mp
import pandas as pd

datasetUrl = "data/VANiLLA_Test.json"

def JSONLineToDict(JSONRoute):
Funcion auxiliar que dado un archivo json con JSONObjects en cada linea,
Expand Down Expand Up @@ -116,4 +118,4 @@ def retriever(pool, rows, counter, JSONroute, queryURL, csvRoute, writeHeader =
queryUrl = "http://localhost:5000/eqakg/dbpedia/en?evidence=true"
#queryUrl = ""

retriever(pool,rows,counter,"data/Vanilla_Dataset_Test.json",queryUrl,"results/VANiLLA.csv", writeHeader=True)
retriever(pool,rows,counter,datasetUrl,queryUrl,"results/VANiLLA.csv", writeHeader=True)
6 changes: 3 additions & 3 deletions application/datasets/VQuAnDa/
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import time
import multiprocessing as mp
import pandas as pd
import traceback
from pprint import pprint

datasetUrl = "data/VQuAnDA_test.json"

def jsonToDict(route):
Expand Down Expand Up @@ -120,4 +120,4 @@ def retriever(pool, rows, counter, JSONroute, queryURL, csvRoute, writeHeader =
queryUrl = "http://localhost:5000/muheqa/dbpedia/en?evidence=true"
#queryUrl = ""

retriever(pool,rows,counter,"data/test.json",queryUrl,"results/VQuAnDa.csv", writeHeader=True)
retriever(pool,rows,counter,datasetUrl,queryUrl,"results/VQuAnDa.csv", writeHeader=True)
72 changes: 0 additions & 72 deletions application/datasets/

This file was deleted.

40 changes: 40 additions & 0 deletions ui/pages/
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import streamlit as st
from utils import dbManager
from utils import parseDatasets

dbDirection = "mongodb://localhost:27017"

def main():

#Subtitulo de la seccion de gestion de conjuntos de datos
st.subheader('Dataset Management')

#Texto del cuerpo de la pagina web
You may upload your dataset below. For it to be processed and uploaded to our database, please follow these guidelines:
- 1. Upload your dataset either on .CSV or .JSON format.
- 2. JSONs may be on JSON lines or JSON array format.
- 3. Answers should be on the "answer" column/key, and Questions on the "question" column/key.
- 4. If your Answer is verbalized, you shall name its key/column "verbalized_answer", and format it with the answer between brackets, i.e. "Fernando Alonso was born in [Oviedo]."
""", unsafe_allow_html=True)

inputBuffer = st.file_uploader("Upload an Image", type=["csv","json"])

if inputBuffer:
db = dbManager.DbManager(dbDirection)
filename =
splitFilename = filename.split(".")
datasetDict = parseDatasets.parseDataset(inputBuffer, isCsv=(splitFilename[1] == "csv"))
datasetName = splitFilename[0].lower()
if datasetDict:
db.importDataset(datasetDict, datasetName)
if datasetName in db.getCollections():
st.success("✨ Your dataset has been registered on our database!")
st.write("A dataset with name ", datasetName, "and length ", len(datasetDict), " questions has been registered on MongoDB")
st.error("We could not upload your dataset on our database. Please contact the administrator.")
st.error("Your dataset could not be processed correctly. Please revise the format or contact the administrator")
except Exception as e:
13 changes: 0 additions & 13 deletions ui/pages/

This file was deleted.

140 changes: 72 additions & 68 deletions ui/pages/ β†’ ui/pages/
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
import streamlit as st
import pytz
import requests
from annotated_text import annotated_text
import operator
from utils import db
import random
from utils import dbManager
import streamlit as st
from utils import spreadManager
from datetime import datetime
from annotated_text import annotated_text

Variables globales:
- timezone: Huso horario cuyas horas vamos a usar en nuestra hoja
- knowledgeBases: Lista de bases de conocimiento para nuestra consulta
- QAService: Url del servicio de Question-Answering
- dbDirection: Direccion de la base de datos
- spreadsheet: Nombre del Libro de Calculo
- spreadsheet_id: Identificador de nuestro Libro de Calculo
- validationSheet: Nombre de la Hoja a modificar (hoja de validacion)

timezone = pytz.timezone("Europe/Madrid")

knowledgeBases = ["wikidata","dbpedia","cord19"]
QAService = ""
dbDirection = "mongodb://localhost:27017"

spreadsheet = "MuHeQa_Validation"
spreadsheetId = "1TY6Tj1OwITOW3o1nYRFFRY1bunvHNImUj-J0omRq4-I"
validationSheet = "Validation"

def queryJSON(queryURL, question):
Expand All @@ -20,48 +43,43 @@ def queryJSON(queryURL, question):
def main():

@st.cache(show_spinner=False, allow_output_mutation=True)
def getAnswers(data):
def getAnswers(question):
Funcion auxiliar que obtiene una lista con todas las respuestas sobre las distintas bases de conocimiento
answerList = [


for i in knowledgeBases:
queryURL = "" + i + "/en?evidence=true"
answer = queryJSON(queryURL,data["question"])
queryURL = QAService + i + "/en?evidence=true"
answer = queryJSON(queryURL,question)
#Si la respuesta es distinta de None, guardamos la fuente y agregamos la respuesta a la lista de contestaciones
if answer:
answer["source"] = i

return answerList

def annotateContext(response):
def annotateContext(response, answer, context, answerStart, answerEnd):
Funcion auxiliar que anota la respuesta sobre el texto de evidencia
#Por defecto la etiqueta del texto anotado sera "ANSWER" y el color verde
tag = "ANSWER"
color = "#adff2f"
#Guardamos la respuesta, el contexto, y su principio y final en el texto
answer = response["answer"]
context = response["evidence"]["summary"]
answerStart = response["evidence"]["start"]
answerEnd = response["evidence"]["end"]
#Buscamos la respuesta en el texto
answerInText = (response["evidence"]["summary"])[answerStart:answerEnd]
#Si la respuesta en el texto es distinta de la respuesta en el json:
if answer != answerInText:
#Cambiamos la etiqueta a "EVIDENCE" y el color a a azul
tag = "EVIDENCE"
color = "#8ef"
#Marcamos en el texto de evidencia la respuesta
#Marcamos en el texto de evidencia la respuesta y lo mostramos en la interfaz

#Creamos la conexion para la base de datos de validacion
worksheet = db.connectToSheet()

#Creamos la conexion para la base de datos (datasets) y el Libro de Calculo (validacion)
spread = spreadManager.SpreadManager(spreadsheet, spreadsheetId, validationSheet)
db = dbManager.DbManager(dbDirection)

#Subtitulo de la seccion de pregunta y respuesta
st.subheader('MuHeQa UI - Question Answering over Multiple and Heterogeneous Knowledge Bases')

Expand All @@ -70,22 +88,12 @@ def annotateContext(response):
Write any question below or use a random one from a pre-loaded datasets!
""", unsafe_allow_html=True)

#Lista de Hojas de Calculo con Datasets en nuestro Libro
datasetList = db.getDatasetsInSheet(worksheet)

#Obtenemos el contenido de cada una de estas hojas
recordList = []
#Creamos una lista de listas para dicho contenido, donde cada lista sera un dataset (hoja)
for i in datasetList:

#Lista de Hojas de Calculo con Datasets en nuestra base de datos
selectorList = ["All"]

#Buscador para realizar preguntas
question = st.text_input("")

#Creamos la lista para el selector
selectorList = ["All"]
#Quitamos "_Validation" del nombre de las hojas del Libro de Calculo
selectorList.extend([i.split("_")[0] for i in datasetList])

#Selector para el Dataset del que provendran las preguntas aleatorias
dataset = st.selectbox("Select a DataSet", selectorList)
Expand All @@ -96,24 +104,14 @@ def annotateContext(response):
modelAnswer = None

if randomQuestion:
randomDict = random.choice(random.choices(recordList, weights=map(len, recordList))[0])
randomDict = db.getRandomDocument(1,dataset)[0]
question = randomDict["question"]
modelAnswer = randomDict["answer"]

data = {
'question': question,
'answerNumber': 10

#Establecemos el titulo de la barra lateral
#Control deslizante para el numero de respuestas a mostrar
answerNumber = st.sidebar.slider('How many relevant answers do you want?', 1, 10, 5)

#Lista de bases de conocimiento sobre las que haremos nuestra consulta
knowledgeBases = ["wikidata","dbpedia","cord19"]

answerNumber = st.sidebar.slider('How many relevant answers do you want?', 1, 10, 1)
if question:
st.write("**Question: **", question)
if modelAnswer:
Expand All @@ -125,37 +123,43 @@ def annotateContext(response):
#Mensaje de carga para las preguntas. Se muestra mientras que estas se obtienen.
with st.spinner(text=':hourglass: Looking for answers...'):
counter = 0
buttonKey = 1
results = getAnswers(data)
highestScoreAnswer = {}
results = getAnswers(question)
results.sort(key = operator.itemgetter('confidence'), reverse = True)
for response in results:
for idx,response in enumerate(results):
if counter >= answerNumber:
counter += 1
answer = response['answer']
if answer:
if answer and answer != "-":
context = "..." + response["evidence"]["summary"] + "..."
source = response["source"]
relevance = response["confidence"]
confidence = response["confidence"]
annotateContext(response, answer, context, response["evidence"]["start"], response["evidence"]["end"])
st.write("**Answer: **", answer)
st.write('**Relevance:** ', relevance , '**Source:** ' , source)
col1, col2 = st.columns([1,1])
with col1:
isRight = st.button("πŸ‘", buttonKey)
with col2:
isWrong = st.button("πŸ‘Ž", buttonKey + 1)
buttonKey += 2
#Si se pulsa el boton de correcto/incorrecto:
if isRight or isWrong:
#Mensaje de que el input del usuario ha sido registrado
st.success("✨ Thanks for your input!")
#Insertamos en la Spreadsheet de Google
#db.insert(conn, [[question,source,answer,isRight]])
#Reseteamos los valores de los botones
isRight = False
isWrong = False

st.write('**Relevance:** ', confidence , '**Source:** ' , source)
if idx == 0:
highestScoreAnswer = {
"answer": answer,
"confidence": confidence
st.write("Please rate if our answer has been helpful to you so we can further improve our system!")
#Botones para validar la respuesta por parte del usuario en columnas separadas
col1, col2 = st.columns([1,1])
with col1:
isRight = st.button("πŸ‘")
with col2:
isWrong = st.button("πŸ‘Ž")

#Si se pulsa el boton de correcto/incorrecto:
if isRight or isWrong:
#Insertamos en la Spreadsheet de Google
spread.insertRow([[question, highestScoreAnswer["answer"], str(highestScoreAnswer["confidence"]), isRight, str(]])
#Reseteamos los valores de los botones
isRight = False
isWrong = False
#Mensaje de que el input del usuario ha sido registrado
st.success("✨ Thanks for your input!")

#Checkbox. Si tenemos respuesta y la caja es marcada, imprimimos las respuestas JSON obtenidas.
if question and st.sidebar.checkbox('Show JSON Response', key = 0):
Expand Down

0 comments on commit 31107fe

Please sign in to comment.