Skip to content

Commit

Permalink
Merge branch 'main' of github.com:librairy/muheqa
Browse files Browse the repository at this point in the history
  • Loading branch information
cbadenes committed May 5, 2022
2 parents 622e78d + b502336 commit 31107fe
Show file tree
Hide file tree
Showing 23 changed files with 549 additions and 33,018 deletions.
5,163 changes: 0 additions & 5,163 deletions application/datasets/LC-QuAD_2.0/data/LC-Quad_Dataset.csv

This file was deleted.

4 changes: 3 additions & 1 deletion application/datasets/VANiLLA/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import multiprocessing as mp
import pandas as pd

datasetUrl = "data/VANiLLA_Test.json"

def JSONLineToDict(JSONRoute):
'''
Funcion auxiliar que dado un archivo json con JSONObjects en cada linea,
Expand Down Expand Up @@ -116,4 +118,4 @@ def retriever(pool, rows, counter, JSONroute, queryURL, csvRoute, writeHeader =
queryUrl = "http://localhost:5000/eqakg/dbpedia/en?evidence=true"
#queryUrl = "https://librairy.linkeddata.es/eqakg/dbpedia/en?text=false"

retriever(pool,rows,counter,"data/Vanilla_Dataset_Test.json",queryUrl,"results/VANiLLA.csv", writeHeader=True)
retriever(pool,rows,counter,datasetUrl,queryUrl,"results/VANiLLA.csv", writeHeader=True)
6 changes: 3 additions & 3 deletions application/datasets/VQuAnDa/retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import time
import multiprocessing as mp
import pandas as pd
import traceback
from pprint import pprint

datasetUrl = "data/VQuAnDA_test.json"

def jsonToDict(route):
'''
Expand Down Expand Up @@ -120,4 +120,4 @@ def retriever(pool, rows, counter, JSONroute, queryURL, csvRoute, writeHeader =
queryUrl = "http://localhost:5000/muheqa/dbpedia/en?evidence=true"
#queryUrl = "https://librairy.linkeddata.es/muheqa/dbpedia/en?evidence=true"

retriever(pool,rows,counter,"data/test.json",queryUrl,"results/VQuAnDa.csv", writeHeader=True)
retriever(pool,rows,counter,datasetUrl,queryUrl,"results/VQuAnDa.csv", writeHeader=True)
72 changes: 0 additions & 72 deletions application/datasets/parseDatasets.py

This file was deleted.

40 changes: 40 additions & 0 deletions ui/pages/datasetManagement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import streamlit as st
from utils import dbManager
from utils import parseDatasets

dbDirection = "mongodb://localhost:27017"

def main():

#Subtitulo de la seccion de gestion de conjuntos de datos
st.subheader('Dataset Management')

#Texto del cuerpo de la pagina web
st.markdown("""
You may upload your dataset below. For it to be processed and uploaded to our database, please follow these guidelines:
- 1. Upload your dataset either on .CSV or .JSON format.
- 2. JSONs may be on JSON lines or JSON array format.
- 3. Answers should be on the "answer" column/key, and Questions on the "question" column/key.
- 4. If your Answer is verbalized, you shall name its key/column "verbalized_answer", and format it with the answer between brackets, i.e. "Fernando Alonso was born in [Oviedo]."
""", unsafe_allow_html=True)

inputBuffer = st.file_uploader("Upload an Image", type=["csv","json"])

if inputBuffer:
try:
db = dbManager.DbManager(dbDirection)
filename = inputBuffer.name
splitFilename = filename.split(".")
datasetDict = parseDatasets.parseDataset(inputBuffer, isCsv=(splitFilename[1] == "csv"))
datasetName = splitFilename[0].lower()
if datasetDict:
db.importDataset(datasetDict, datasetName)
if datasetName in db.getCollections():
st.success("✨ Your dataset has been registered on our database!")
st.write("A dataset with name ", datasetName, "and length ", len(datasetDict), " questions has been registered on MongoDB")
else:
st.error("We could not upload your dataset on our database. Please contact the administrator.")
else:
st.error("Your dataset could not be processed correctly. Please revise the format or contact the administrator")
except Exception as e:
st.exception(e)
13 changes: 0 additions & 13 deletions ui/pages/dataset_upload.py

This file was deleted.

140 changes: 72 additions & 68 deletions ui/pages/question_answering.py β†’ ui/pages/questionAnswering.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,32 @@
import streamlit as st
import pytz
import requests
from annotated_text import annotated_text
import operator
from utils import db
import random
from utils import dbManager
import streamlit as st
from utils import spreadManager
from datetime import datetime
from annotated_text import annotated_text

"""
Variables globales:
- timezone: Huso horario cuyas horas vamos a usar en nuestra hoja
- knowledgeBases: Lista de bases de conocimiento para nuestra consulta
- QAService: Url del servicio de Question-Answering
- dbDirection: Direccion de la base de datos
- spreadsheet: Nombre del Libro de Calculo
- spreadsheet_id: Identificador de nuestro Libro de Calculo
- validationSheet: Nombre de la Hoja a modificar (hoja de validacion)
"""

timezone = pytz.timezone("Europe/Madrid")

knowledgeBases = ["wikidata","dbpedia","cord19"]
QAService = "http://127.0.0.1:5000/muheqa/"
dbDirection = "mongodb://localhost:27017"

spreadsheet = "MuHeQa_Validation"
spreadsheetId = "1TY6Tj1OwITOW3o1nYRFFRY1bunvHNImUj-J0omRq4-I"
validationSheet = "Validation"

def queryJSON(queryURL, question):
"""
Expand All @@ -20,48 +43,43 @@ def queryJSON(queryURL, question):
def main():

@st.cache(show_spinner=False, allow_output_mutation=True)
def getAnswers(data):
def getAnswers(question):
"""
Funcion auxiliar que obtiene una lista con todas las respuestas sobre las distintas bases de conocimiento
"""
answerList = [

]

for i in knowledgeBases:
queryURL = "http://127.0.0.1:5000/muheqa/" + i + "/en?evidence=true"
answer = queryJSON(queryURL,data["question"])
queryURL = QAService + i + "/en?evidence=true"
answer = queryJSON(queryURL,question)
#Si la respuesta es distinta de None, guardamos la fuente y agregamos la respuesta a la lista de contestaciones
if answer:
answer["source"] = i
answerList.append(answer)

return answerList

def annotateContext(response):
def annotateContext(response, answer, context, answerStart, answerEnd):
'''
Funcion auxiliar que anota la respuesta sobre el texto de evidencia
'''
#Por defecto la etiqueta del texto anotado sera "ANSWER" y el color verde
tag = "ANSWER"
color = "#adff2f"
#Guardamos la respuesta, el contexto, y su principio y final en el texto
answer = response["answer"]
context = response["evidence"]["summary"]
answerStart = response["evidence"]["start"]
answerEnd = response["evidence"]["end"]
#Buscamos la respuesta en el texto
answerInText = (response["evidence"]["summary"])[answerStart:answerEnd]
#Si la respuesta en el texto es distinta de la respuesta en el json:
if answer != answerInText:
#Cambiamos la etiqueta a "EVIDENCE" y el color a a azul
tag = "EVIDENCE"
color = "#8ef"
#Marcamos en el texto de evidencia la respuesta
#Marcamos en el texto de evidencia la respuesta y lo mostramos en la interfaz
annotated_text(context[:answerStart],(answerInText,tag,color),context[answerEnd:],)

#Creamos la conexion para la base de datos de validacion
worksheet = db.connectToSheet()

#Creamos la conexion para la base de datos (datasets) y el Libro de Calculo (validacion)
spread = spreadManager.SpreadManager(spreadsheet, spreadsheetId, validationSheet)
db = dbManager.DbManager(dbDirection)

#Subtitulo de la seccion de pregunta y respuesta
st.subheader('MuHeQa UI - Question Answering over Multiple and Heterogeneous Knowledge Bases')

Expand All @@ -70,22 +88,12 @@ def annotateContext(response):
Write any question below or use a random one from a pre-loaded datasets!
""", unsafe_allow_html=True)

#Lista de Hojas de Calculo con Datasets en nuestro Libro
datasetList = db.getDatasetsInSheet(worksheet)

#Obtenemos el contenido de cada una de estas hojas
recordList = []
#Creamos una lista de listas para dicho contenido, donde cada lista sera un dataset (hoja)
for i in datasetList:
recordList.append(db.getRecordsInSheet(i))

#Lista de Hojas de Calculo con Datasets en nuestra base de datos
selectorList = ["All"]
selectorList.extend(db.getCollections())

#Buscador para realizar preguntas
question = st.text_input("")

#Creamos la lista para el selector
selectorList = ["All"]
#Quitamos "_Validation" del nombre de las hojas del Libro de Calculo
selectorList.extend([i.split("_")[0] for i in datasetList])

#Selector para el Dataset del que provendran las preguntas aleatorias
dataset = st.selectbox("Select a DataSet", selectorList)
Expand All @@ -96,24 +104,14 @@ def annotateContext(response):
modelAnswer = None

if randomQuestion:
randomDict = random.choice(random.choices(recordList, weights=map(len, recordList))[0])
print(randomDict)
randomDict = db.getRandomDocument(1,dataset)[0]
question = randomDict["question"]
modelAnswer = randomDict["answer"]

data = {
'question': question,
'answerNumber': 10
}

#Establecemos el titulo de la barra lateral
st.sidebar.subheader('Options')
#Control deslizante para el numero de respuestas a mostrar
answerNumber = st.sidebar.slider('How many relevant answers do you want?', 1, 10, 5)

#Lista de bases de conocimiento sobre las que haremos nuestra consulta
knowledgeBases = ["wikidata","dbpedia","cord19"]

answerNumber = st.sidebar.slider('How many relevant answers do you want?', 1, 10, 1)
if question:
st.write("**Question: **", question)
if modelAnswer:
Expand All @@ -125,37 +123,43 @@ def annotateContext(response):
#Mensaje de carga para las preguntas. Se muestra mientras que estas se obtienen.
with st.spinner(text=':hourglass: Looking for answers...'):
counter = 0
buttonKey = 1
results = getAnswers(data)
highestScoreAnswer = {}
results = getAnswers(question)
results.sort(key = operator.itemgetter('confidence'), reverse = True)
for response in results:
for idx,response in enumerate(results):
if counter >= answerNumber:
break
counter += 1
answer = response['answer']
if answer:
if answer and answer != "-":
context = "..." + response["evidence"]["summary"] + "..."
source = response["source"]
relevance = response["confidence"]
annotateContext(response)
confidence = response["confidence"]
annotateContext(response, answer, context, response["evidence"]["start"], response["evidence"]["end"])
st.write("**Answer: **", answer)
st.write('**Relevance:** ', relevance , '**Source:** ' , source)
col1, col2 = st.columns([1,1])
with col1:
isRight = st.button("πŸ‘", buttonKey)
with col2:
isWrong = st.button("πŸ‘Ž", buttonKey + 1)
buttonKey += 2
#Si se pulsa el boton de correcto/incorrecto:
if isRight or isWrong:
#Mensaje de que el input del usuario ha sido registrado
st.success("✨ Thanks for your input!")
#Insertamos en la Spreadsheet de Google
#db.insert(conn, [[question,source,answer,isRight]])
#Reseteamos los valores de los botones
isRight = False
isWrong = False

st.write('**Relevance:** ', confidence , '**Source:** ' , source)
if idx == 0:
highestScoreAnswer = {
"answer": answer,
"confidence": confidence
}
st.write("Please rate if our answer has been helpful to you so we can further improve our system!")
#Botones para validar la respuesta por parte del usuario en columnas separadas
col1, col2 = st.columns([1,1])
with col1:
isRight = st.button("πŸ‘")
with col2:
isWrong = st.button("πŸ‘Ž")

#Si se pulsa el boton de correcto/incorrecto:
if isRight or isWrong:
#Insertamos en la Spreadsheet de Google
spread.insertRow([[question, highestScoreAnswer["answer"], str(highestScoreAnswer["confidence"]), isRight, str(datetime.now(tz=timezone))]])
#Reseteamos los valores de los botones
isRight = False
isWrong = False
#Mensaje de que el input del usuario ha sido registrado
st.success("✨ Thanks for your input!")

#Checkbox. Si tenemos respuesta y la caja es marcada, imprimimos las respuestas JSON obtenidas.
if question and st.sidebar.checkbox('Show JSON Response', key = 0):
Expand Down
Loading

0 comments on commit 31107fe

Please sign in to comment.