-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathOpenHubExtractor.py
More file actions
96 lines (73 loc) · 2.69 KB
/
OpenHubExtractor.py
File metadata and controls
96 lines (73 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/python
import csv
import sys
import requests
from HTMLParser import HTMLParser
project = ""
class MyHTMLParser(HTMLParser):
foundBlock = False
foundLanguage = False
foundCodeLines = False
foundTotalLines = False
foundPercentage = False
language = ""
codeLines = -1
totalLines = -1
percentage = -1
tdCounter = 0
def handle_starttag(self, tag, attrs):
if self.foundBlock and tag == 'a':
self.foundLanguage = True
elif self.foundLanguage and tag == 'td':
self.foundCodeLines = True
self.foundLanguage = False
elif self.codeLines != -1 and tag == 'td' and len(attrs) > 0 and attrs[0][1] == "center":
if self.tdCounter == 3:
self.tdCounter = 0
self.foundTotalLines = True
else:
self.tdCounter += 1
elif tag == 'tr' and len(attrs) > 0 and attrs[0][0] == "class":
self.foundBlock = True
elif tag == 'span' and len(attrs) > 0 and attrs[0][1] == "pull-right":
self.foundPercentage = True
def handle_data(self, data):
if self.foundLanguage and self.language == "":
self.language = data
elif self.foundCodeLines and self.codeLines == -1:
self.foundCodeLines = False
self.codeLines = data
if self.foundTotalLines:
self.foundTotalLines = False
self.totalLines = data
if self.foundPercentage:
self.foundPercentage = False
self.percentage = data.replace("%","").strip()
def handle_endtag(self, tag):
if tag == 'tr' and self.foundBlock:
#Final output
if float(self.percentage) >= 0.5 and languages.__contains__(self.language):
print(project + "|" + self.language + "|" + self.codeLines + "|" + self.totalLines + "|" + self.percentage)
self.foundBlock = False
self.language = ""
self.codeLines = -1
self.totalLines = -1
self.percentage = -1
#OpenHub main URL
URL = "https://www.openhub.net/p/{0}/analyses/latest/languages_summary"
# Line command CSV file argument
projectFile = sys.argv[1]
languageFile = sys.argv[2]
languages = set()
print("Project|Language|CodeLines|TotalLines|Percentage")
with open(languageFile, 'rU') as f:
freader = csv.reader(f)
for row in freader:
languages.add(row[0])
with open(projectFile, 'rU') as f:
freader = csv.reader(f, delimiter=',', quoting=csv.QUOTE_NONE)
for row in freader:
project = row[0]
resp = requests.get(url=URL.format(project))
parser = MyHTMLParser()
parser.feed(resp.text)