-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser-aoo.go
95 lines (80 loc) · 2.5 KB
/
parser-aoo.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"bytes"
"fmt"
"log"
"strings"
"github.com/PuerkitoBio/goquery"
rss "github.com/gorilla/feeds"
)
// Archive of Our Own notification email parser.
func parseAoo(notifications []*NotificationEmail) []*rss.Item {
items := []*rss.Item{}
for _, notification := range notifications {
// Other emails from AO3 will not contain this string in the subject.
if strings.Contains(notification.Subj, "posted Chapter") {
log.Printf("Subj: %s", notification.Subj)
item := rss.Item{
IsPermaLink: "true",
Author: &rss.Author{},
Link: &rss.Link{
Rel: "alternate",
},
}
// Parse the HTML part of the message.
soup, err := goquery.NewDocumentFromReader(bytes.NewBufferString(notification.Html))
if err != nil {
log.Printf("Message failed to parse, skipping...")
}
type ThingLink struct {
Text string
Url string
}
var author ThingLink
var work ThingLink
var chapter ThingLink
// Produce the information we're going to use in the feed item
// -- here, by investigating every <a> tag
// and picking out the ones we know the meaning of.
soup.Find("a").Each(func(i int, s *goquery.Selection) {
link, exists := s.Attr("href")
if exists {
// Abject idiocy: Apparently, sometimes they have "http" instead of "https" links.
// Why?!
link = strings.Replace(link, "http://", "https://", -1)
switch {
case strings.HasPrefix(link, "https://archiveofourown.org/users"):
author.Text = s.Text()
author.Url = link
case strings.HasPrefix(link, "https://archiveofourown.org/works"):
if strings.Contains(link, "chapters") {
chapter.Url = link
chapter.Text = s.Text()
} else {
work.Url = link
work.Text = s.Text()
}
}
}
})
if (author == ThingLink{} || chapter == ThingLink{} || work == ThingLink{}) {
log.Print("failed to parse critical data out of the message, skipping...")
continue
}
// We have the info we needed, so build the feed item
item.Author.Name = author.Text
item.Id = chapter.Url
item.Link.Href = chapter.Url
item.Title = fmt.Sprintf("%s - %s", work.Text, chapter.Text)
item.Updated = notification.Date
item.Created = notification.Date
item.Description = fmt.Sprintf(`
<p><a href="%s">%s</a> posted an update for story
<a href="%s">%s</a>:
<a href="%s">%s</a></p>
`, author.Url, author.Text, work.Url, work.Text, chapter.Url, chapter.Text)
items = append(items, &item)
}
}
return items
}