-
Notifications
You must be signed in to change notification settings - Fork 2
/
getdoc.go
111 lines (98 loc) · 3.12 KB
/
getdoc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// Package getdoc provides a way to transform Telegram TL documentation into
// machine-readable format.
package getdoc
import (
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/gotd/getdoc/href"
)
// Doc represents full documentation description.
type Doc struct {
Index Index `json:"index"`
Constructors map[string]Constructor `json:"constructors"`
Types map[string]Type `json:"types"`
Methods map[string]Method `json:"methods"`
Errors map[string]Error `json:"errors"`
}
// docTitle extracts title from document.
func docTitle(doc *goquery.Document) string {
return strings.TrimSpace(doc.Find("#dev_page_title").Text())
}
// docDescription extracts description lines from document.
func docDescription(doc *goquery.Document) (desc, links []string) {
doc.Find("#dev_page_content").Each(func(i int, s *goquery.Selection) {
s.Children().EachWithBreak(func(i int, selection *goquery.Selection) bool {
if selection.Is("p") && selection.Text() != "" {
hrefs := href.Replace(selection)
text := strings.TrimSpace(selection.Text())
for _, part := range strings.Split(text, "\n") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
desc = append(desc, part)
}
links = append(links, addHost(hrefs)...)
}
return !selection.HasClass("clearfix")
})
})
return
}
// docTableAfterFunc extracts table after selector "after".
func docTableAfterFunc(doc *goquery.Document, after func(s *goquery.Selection) bool) *goquery.Selection {
var (
meetAfter bool
table *goquery.Selection
)
doc.Find("#dev_page_content").Children().EachWithBreak(func(i int, s *goquery.Selection) bool {
if after(s) {
// Found title of table. Next <table> element will be requested table.
meetAfter = true
return true
}
if meetAfter && s.Is("table") {
// Found requested table, stopping iteration.
table = s
return false
}
return true
})
if table == nil {
return &goquery.Selection{}
}
return table.First().Find("tbody > tr")
}
type ParamDescription struct {
Name string `json:"name"`
Description string `json:"description"`
Links []string `json:"links,omitempty"`
}
// docParams extract parameters documentation from document.
//
// Key is parameter name, value is documentation struct.
func docParams(doc *goquery.Document) map[string]ParamDescription {
fields := make(map[string]ParamDescription)
docTableAfterFunc(doc, func(s *goquery.Selection) bool {
return s.Find("#parameters").Length() > 0 ||
// Some pages have no such selector, so we try to detect "Parameters" header by text.
//
// TODO(tdakkota): try to parse attributes
strings.HasPrefix(s.Text(), "Parameters")
}).Each(func(i int, row *goquery.Selection) {
var rowContents []string
var links []string
row.Find("td").Each(func(i int, column *goquery.Selection) {
links = addHost(href.Replace(column))
rowContents = append(rowContents, column.Text())
})
if len(rowContents) == 3 {
fields[rowContents[0]] = ParamDescription{
Name: rowContents[0],
Description: rowContents[2],
Links: links,
}
}
})
return fields
}