-
Notifications
You must be signed in to change notification settings - Fork 0
/
Web Scraping.py
85 lines (74 loc) · 2.25 KB
/
Web Scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://dot.report/usdot/KS/Wichita")
roster_tbody = driver.find_elements(By.XPATH, "//table/tbody/tr")
#j = 1
links=[]
print(len(roster_tbody))
for i in range(2,len(roster_tbody)):
element = driver.find_element(By.XPATH, f"(//table/tbody/tr/td/a)[{i}]")
links.append(element.get_attribute('href'))
#j = j + 1
data=[]
for link in links:
driver.get(link)
time.sleep(5)
try:
companyName = driver.find_element(
By.XPATH, '//tbody/tr[2]/td[1]').text
except:
companyName = '-'
try:
Num = driver.find_element(
By.XPATH, '///div[@ class="jumbotron"]/h1').text
except:
Num = '-'
try:
address = driver.find_element(
By.XPATH, '').text
#a=address.split(" ")
add= address.strip().split('\n')[0]
#print(add)
except:
address = '-'
try:
li=address.strip().split('\n')[-1].split(',')[-1].split(' ')
li=list(filter(None, li))
state=li[0]
#print(state)
except:
state='-'
try:
#zip= address.strip().split('\n')[-1].split(',')[-1].split(' ')[-1]
code=li[-1]
#print(code)
except:
code='-'
try:
city = address.strip().split('\n')[-1].split(',')[0]
#print(city)
except:
city='-'
try:
phNum = driver.find_element(
By.XPATH, '//tbody/tr[4]/td[2]').text
except:
phNum = '-'
tempData={"Company Name": companyName,
"Dot Number":Num,
"Address":add,
"Phone Number":phNum,
"City":city,
"State":state,
"Zip":code}
data.append(tempData)
df_data =pd.DataFrame(data)
print(df_data)
df_data.to_csv('Pleasehojabhai.csv', index=False)