-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathOpenHubCodeLocationExtractor.py
More file actions
45 lines (33 loc) · 1.1 KB
/
OpenHubCodeLocationExtractor.py
File metadata and controls
45 lines (33 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python
import csv
import sys
import requests
from HTMLParser import HTMLParser
project = ""
i = 0
class MyHTMLParser(HTMLParser):
foundCodeLocation = False
codeLocation = ""
def handle_starttag(self, tag, attrs):
if tag == 'td' and len(attrs) > 0 and attrs[0][1] == "col-md-4":
self.foundCodeLocation = True
self.codeLocation = ""
def handle_data(self, data):
if self.foundCodeLocation and self.codeLocation == "":
self.codeLocation = data
if self.codeLocation != "":
print(self.codeLocation)
self.foundCodeLocation = False
self.codeLocation = ""
#OpenHub main URL
URL = "https://www.openhub.net/p/{0}/enlistments"
# Line command CSV file argument
projectFile = sys.argv[1]
with open(projectFile, 'rU') as f:
freader = csv.reader(f, delimiter = '|', quoting=csv.QUOTE_NONE)
for row in freader:
project = row[0]
print(project + "|")
resp = requests.get(url=URL.format(project))
parser = MyHTMLParser()
parser.feed(resp.text)