-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapVid.py
46 lines (38 loc) · 1.73 KB
/
scrapVid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
class ScrapVideos:
def __init__(self, url, output_folder):
self.url = url
self.output_folder = output_folder
def extract_and_save_videos(self):
try:
# Send an HTTP GET request to the webpage and get the HTML content
response = requests.get(self.url)
response.raise_for_status()
html_content = response.text
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Find all video tags
video_tags = soup.find_all('video')
# Extract video URLs and store them in a list
video_urls = []
for video_tag in video_tags:
if 'src' in video_tag.attrs:
video_url = video_tag['src']
absolute_url = urljoin(self.url, video_url)
video_urls.append(absolute_url)
# Create the output folder if it doesn't exist
os.makedirs(self.output_folder, exist_ok=True)
# Save video URLs to videolink.txt
videolink_path = os.path.join(self.output_folder, 'videolink.txt')
with open(videolink_path, 'w', encoding='utf-8') as file:
file.write('\n'.join(video_urls))
print(f"Video links saved to {videolink_path}")
except requests.exceptions.MissingSchema:
print(f"Skipping download from {self.url} (Invalid URL)")
except requests.exceptions.RequestException as e:
print(f"Failed to fetch content from {self.url}: {e}")
except OSError as e:
print(f"Failed to save video links: {e}")