-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
50 lines (38 loc) · 1.17 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import boto3
import os
import json
import feedparser
from dotenv import load_dotenv
load_dotenv()
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
CHECKPOINT_FILE_NAME = os.getenv("CHECKPOINT_FILE_NAME")
TESTING_MODE = os.getenv("TESTING_MODE")
if not S3_BUCKET_NAME:
print("Populate .env with S3_BUCKET_NAME")
exit(0)
if not CHECKPOINT_FILE_NAME:
print("Populate .env with CHECKPOINT_FILE_NAME")
exit(0)
def get_checkpoint():
s3_client = boto3.client("s3")
checkpoint = s3_client.get_object(
Bucket=S3_BUCKET_NAME,
Key=CHECKPOINT_FILE_NAME
)
checkpoint_object = json.loads(checkpoint['Body'].read())
return checkpoint_object
def set_checkpoint(checkpoint_object):
if TESTING_MODE.lower() == 'false':
s3_client = boto3.client("s3")
s3_client.put_object(
Body=json.dumps(checkpoint_object),
Bucket=S3_BUCKET_NAME,
Key=CHECKPOINT_FILE_NAME
)
else:
print(f"Testing mode on - checkpoint: {checkpoint_object}")
def get_feed_fields(url):
feed = feedparser.parse(url)
if feed.entries:
first_entry = feed.entries[0]
return first_entry.keys()