-
Notifications
You must be signed in to change notification settings - Fork 0
/
arxivbot.py
58 lines (52 loc) · 2.01 KB
/
arxivbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import aiohttp
import datetime
from bs4 import BeautifulSoup
from maubot import Plugin
from maubot.handlers import command
regex_expr = r'(https?://arxiv\.org/abs/(?:[a-z\-]*/)?[0-9]+\.?[0-9]+)'
class ArXivBot(Plugin):
@staticmethod
async def _request(url):
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
html = await response.text()
return html
@staticmethod
def _output_text(d):
msg = (
f'**Date**: {d["date"]}\n\n'
f'**Title**: {d["title"]}\n\n'
f'**Authors**: {d["authors"]}\n\n'
f'**Abstract**: {d["abstract"]}\n\n'
f'**PDF**: {d["pdf"]}'
)
return msg
async def _parse_arxiv(self, url):
xml = await self._request(url)
soup = BeautifulSoup(xml, features="xml")
authors = []
for elem in soup.head.find_all("meta"):
try:
name = elem["name"]
except KeyError:
continue
if name == "citation_title":
title = elem["content"]
elif name == "citation_author":
author = elem["content"].split(", ")
authors.append(" ".join([author[-1], author[0]]))
elif name == "citation_date":
date = datetime.datetime.strptime(elem["content"], "%Y/%m/%d")
date = date.strftime("%d.%m.%Y")
elif name == "citation_pdf_url":
pdf = elem["content"]
elif name == "citation_abstract":
abstract = elem["content"].strip()
authors = ", ".join(authors)
return {"title": title, "authors": authors, "date": date, "abstract": abstract, "pdf": pdf}
@command.passive(regex_expr, multiple=True, multiline=True)
async def arxiv(self, evt, matches):
for _, match in matches:
d = await self._parse_arxiv(match)
out = self._output_text(d)
await evt.reply(out, markdown=True)