From 7cc22c9add4ffa94953c7bb2a6c07b0b013b61b3 Mon Sep 17 00:00:00 2001 From: maxmarzolf Date: Fri, 30 Apr 2021 17:23:40 -0400 Subject: [PATCH 1/3] Mostly formatting - Added urllib3 to requirements.txt - Abbreviated props dictionary creation as dictionary literal in utility.py - Fixed typo in get_explanation function - Made formatting PEP 8 compliant --- apod/utility.py | 15 ++++++++------- apod_parser/apod_object_parser.py | 16 ++++++++++------ application.py | 18 ++++++++++-------- requirements.txt | 4 +++- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/apod/utility.py b/apod/utility.py index 16f8e82..5c4360c 100644 --- a/apod/utility.py +++ b/apod/utility.py @@ -13,6 +13,7 @@ import json import re import urllib3 as urllib + # import urllib.request LOG = logging.getLogger(__name__) @@ -46,11 +47,13 @@ def _get_thumbs(data): return video_thumb + # function that returns only last URL if there are multiple URLs stacked together def _get_last_url(data): regex = re.compile("(?:.(?!http[s]?://))+$") return regex.findall(data)[0] + def _get_apod_chars(dt, thumbs): media_type = 'image' if dt: @@ -60,7 +63,7 @@ def _get_apod_chars(dt, thumbs): apod_url = '%sastropix.html' % BASE LOG.debug('OPENING URL:' + apod_url) res = requests.get(apod_url) - + if res.status_code == 404: return None # LOG.error(f'No APOD entry for URL: {apod_url}') @@ -95,10 +98,8 @@ def _get_apod_chars(dt, thumbs): media_type = 'other' data = '' - props = {} + props = {'explanation': _explanation(soup), 'title': _title(soup)} - props['explanation'] = _explanation(soup) - props['title'] = _title(soup) copyright_text = _copyright(soup) if copyright_text: props['copyright'] = copyright_text @@ -130,7 +131,7 @@ def _title(soup): try: # Handler for later APOD entries number_of_center_elements = len(soup.find_all('center')) - if(number_of_center_elements == 2): + if number_of_center_elements == 2: center_selection = soup.find_all('center')[0] bold_selection = center_selection.find_all('b')[0] title = bold_selection.text.strip(' ') @@ -146,7 +147,7 @@ def _title(soup): title = title.encode('latin1').decode('cp1252') except Exception as ex: LOG.error(str(ex)) - + return title except Exception: # Handler for early APOD entries @@ -262,7 +263,7 @@ def _date(soup): _today = datetime.date.today() for line in soup.text.split('\n'): today_year = str(_today.year) - yesterday_year = str((_today-datetime.timedelta(days=1)).year) + yesterday_year = str((_today - datetime.timedelta(days=1)).year) # Looks for the first line that starts with the current year. # This also checks yesterday's year so it doesn't break on January 1st at 00:00 UTC # before apod.nasa.gov uploads a new image. diff --git a/apod_parser/apod_object_parser.py b/apod_parser/apod_object_parser.py index df2d627..c51c1c1 100644 --- a/apod_parser/apod_object_parser.py +++ b/apod_parser/apod_object_parser.py @@ -3,6 +3,7 @@ import os from PIL import Image + def get_data(api_key): raw_response = requests.get(f'https://api.nasa.gov/planetary/apod?api_key={api_key}').text response = json.loads(raw_response) @@ -14,9 +15,9 @@ def get_date(response): return date -def get_explaination(response): - explaination = response['explanation'] - return explaination +def get_explanation(response): + explanation = response['explanation'] + return explanation def get_hdurl(response): @@ -28,7 +29,8 @@ def get_media_type(response): media_type = response['media_type'] return media_type -def get_service_version(response): + +def get_service_version(response): service_version = response['service_version'] return service_version @@ -37,16 +39,18 @@ def get_title(response): service_version = response['title'] return service_version + def get_url(response): url = response['url'] return url + def download_image(url, date): - if os.path.isfile(f'{date}.png') == False: + if not os.path.isfile(f'{date}.png'): raw_image = requests.get(url).content with open(f'{date}.jpg', 'wb') as file: file.write(raw_image) - + else: return FileExistsError diff --git a/application.py b/application.py index d9ce8ac..35f640d 100644 --- a/application.py +++ b/application.py @@ -13,8 +13,9 @@ @author=JustinGOSSES @email=justin.c.gosses@nasa.gov """ import sys + sys.path.insert(0, "../lib") -### justin edit +# justin edit sys.path.insert(1, ".") from datetime import datetime, date @@ -24,8 +25,8 @@ from apod.utility import parse_apod, get_concepts import logging -#### added by justin for EB -#from wsgiref.simple_server import make_server +# added by justin for EB +# from wsgiref.simple_server import make_server application = Flask(__name__) CORS(application) @@ -44,9 +45,9 @@ try: with open('alchemy_api.key', 'r') as f: ALCHEMY_API_KEY = f.read() -#except FileNotFoundError: +# except FileNotFoundError: except IOError: - LOG.info('WARNING: NO alchemy_api.key found, concept_tagging is NOT supported') + LOG.info('WARNING: NO alchemy_api.key found, concept_tagging is NOT supported') def _abort(code, msg, usage=True): @@ -92,7 +93,7 @@ def _apod_handler(dt, use_concept_tags=False, use_default_today_date=False, thum served through the API. """ try: - + page_props = parse_apod(dt, use_default_today_date, thumbs) if not page_props: return None @@ -167,7 +168,7 @@ def _get_json_for_random_dates(count, use_concept_tags, thumbs): for date_ordinal in random_date_ordinals: dt = date.fromordinal(date_ordinal) data = _apod_handler(dt, use_concept_tags, date_ordinal == today_ordinal, thumbs) - + # Handle case where no data is available if not data: continue @@ -214,7 +215,7 @@ def _get_json_for_date_range(start_date, end_date, use_concept_tags, thumbs): while start_ordinal <= end_ordinal: # get data dt = date.fromordinal(start_ordinal) - + data = _apod_handler(dt, use_concept_tags, start_ordinal == today_ordinal, thumbs) # Handle case where no data is available @@ -245,6 +246,7 @@ def home(): methodname=APOD_METHOD_NAME, usage=_usage(joinstr='", "', prestr='"') + '"') + @application.route('/static/') def serve_static(asset_path): return current_app.send_static_file(asset_path) diff --git a/requirements.txt b/requirements.txt index a6c5724..0d835e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,6 @@ nose==1.3.7 setupext-janitor==1.0.0 bs4==0.0.1 mock>=3.0.0 -Pillow==7.1.2 \ No newline at end of file +Pillow==7.1.2 +urllib3~=1.26.4 +setuptools~=56.0.0 \ No newline at end of file From 6bf1d3df7a81ae849fabf0e38a273cc7f034fa00 Mon Sep 17 00:00:00 2001 From: maxmarzolf Date: Tue, 8 Jun 2021 12:27:24 -0400 Subject: [PATCH 2/3] Pytests --- apod/utility.py | 31 ++++++--- requirements.txt | 42 +++++++----- tests/apod/test_utility.py | 137 +++++++++++++++++++++++-------------- 3 files changed, 134 insertions(+), 76 deletions(-) diff --git a/apod/utility.py b/apod/utility.py index 5c4360c..238be2e 100644 --- a/apod/utility.py +++ b/apod/utility.py @@ -23,8 +23,10 @@ BASE = 'https://apod.nasa.gov/apod/' -# function for getting video thumbnails def _get_thumbs(data): + """ + Function for getting video thumbnails + """ global video_thumb if "youtube" in data or "youtu.be" in data: # get ID from YouTube URL @@ -48,19 +50,32 @@ def _get_thumbs(data): return video_thumb -# function that returns only last URL if there are multiple URLs stacked together def _get_last_url(data): + """ + Function that returns only last URL if there are multiple URLs stacked together + """ regex = re.compile("(?:.(?!http[s]?://))+$") return regex.findall(data)[0] -def _get_apod_chars(dt, thumbs): - media_type = 'image' +def _format_url(dt): + """ + Returns url for APOD page + """ if dt: date_str = dt.strftime('%y%m%d') apod_url = '%sap%s.html' % (BASE, date_str) else: apod_url = '%sastropix.html' % BASE + return apod_url + + +def _get_apod_chars(dt, thumbs): + """ + Gets data from APOD page + """ + media_type = 'image' + apod_url = _format_url(dt) LOG.debug('OPENING URL:' + apod_url) res = requests.get(apod_url) @@ -131,7 +146,7 @@ def _title(soup): try: # Handler for later APOD entries number_of_center_elements = len(soup.find_all('center')) - if number_of_center_elements == 2: + if (number_of_center_elements == 2): center_selection = soup.find_all('center')[0] bold_selection = center_selection.find_all('b')[0] title = bold_selection.text.strip(' ') @@ -284,7 +299,7 @@ def _date(soup): raise Exception('Date not found in soup data.') -def parse_apod(dt, use_default_today_date=False, thumbs=False): +def _image_url(dt, use_default_today_date=False, thumbs=False): """ Accepts a date in '%Y-%m-%d' format. Returns the URL of the APOD image of that day, noting that @@ -293,8 +308,8 @@ def parse_apod(dt, use_default_today_date=False, thumbs=False): LOG.debug('apod chars called date:' + str(dt)) try: - return _get_apod_chars(dt, thumbs) - + data = _get_apod_chars(dt, thumbs) + return data['url'] except Exception as ex: # handle edge case where the service local time diff --git a/requirements.txt b/requirements.txt index 0d835e6..c3000e2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,29 @@ -# This requirements file lists all third-party dependencies for this project. -# -# Run 'pip install -r requirements.txt -t lib/' to install these dependencies -# in `lib/` subdirectory. -# -# Note: The `lib` directory is added to `sys.path` by `appengine_config.py`. -flask>=1.0.2 -flask-cors>=3.0.7 -gunicorn==19.5.0 -Jinja2>=2.8 -Werkzeug>=0.10.4 +attrs==21.2.0 beautifulsoup4==4.5.3 -requests>=2.20.0 +bs4==0.0.1 +certifi==2020.12.5 +chardet==4.0.0 +click==7.1.2 coverage==4.1 +Flask==1.1.2 +Flask-Cors==3.0.10 +gunicorn==19.5.0 +idna==2.10 +iniconfig==1.1.1 +itsdangerous==1.1.0 +Jinja2==2.11.3 +MarkupSafe==1.1.1 +mock==4.0.3 nose==1.3.7 -setupext-janitor==1.0.0 -bs4==0.0.1 -mock>=3.0.0 +packaging==20.9 Pillow==7.1.2 -urllib3~=1.26.4 -setuptools~=56.0.0 \ No newline at end of file +pluggy==0.13.1 +py==1.10.0 +pyparsing==2.4.7 +pytest==6.2.4 +requests==2.25.1 +setupext-janitor==1.0.0 +six==1.15.0 +toml==0.10.2 +urllib3==1.26.4 +Werkzeug==1.0.1 diff --git a/tests/apod/test_utility.py b/tests/apod/test_utility.py index ed17e7c..73a40f3 100644 --- a/tests/apod/test_utility.py +++ b/tests/apod/test_utility.py @@ -1,29 +1,29 @@ #!/bin/sh/python -# coding= utf-8 -import unittest -from apod import utility -import logging - -logging.basicConfig(level=logging.DEBUG) +# coding= utf-8 +import pytest +from bs4 import BeautifulSoup +import requests from datetime import datetime -class TestApod(unittest.TestCase): - """Test the extraction of APOD characteristics.""" - - TEST_DATA = { - 'normal page, copyright' : + +import apod.utility + +BASE = 'https://apod.nasa.gov/apod/' +TEST_URL = 'https://apod.nasa.gov/apod/ap170322.html' +TEST_DATA = { + 'normal page, copyright': { "datetime": datetime(2017, 3, 22), "copyright": 'Robert Gendler', - "date": "2017-03-22", - "explanation": "In cosmic brush strokes of glowing hydrogen gas, this beautiful skyscape unfolds across the plane of our Milky Way Galaxy near the northern end of the Great Rift and the center of the constellation Cygnus the Swan. A 36 panel mosaic of telescopic image data, the scene spans about six degrees. Bright supergiant star Gamma Cygni (Sadr) to the upper left of the image center lies in the foreground of the complex gas and dust clouds and crowded star fields. Left of Gamma Cygni, shaped like two luminous wings divided by a long dark dust lane is IC 1318 whose popular name is understandably the Butterfly Nebula. The more compact, bright nebula at the lower right is NGC 6888, the Crescent Nebula. Some distance estimates for Gamma Cygni place it at around 1,800 light-years while estimates for IC 1318 and NGC 6888 range from 2,000 to 5,000 light-years.", - "hdurl": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg", - "media_type": "image", - "service_version": "v1", - "title": "Central Cygnus Skyscape", - "url": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg", + "date": "2017-03-22", + "explanation": "In cosmic brush strokes of glowing hydrogen gas, this beautiful skyscape unfolds across the plane of our Milky Way Galaxy near the northern end of the Great Rift and the center of the constellation Cygnus the Swan. A 36 panel mosaic of telescopic image data, the scene spans about six degrees. Bright supergiant star Gamma Cygni (Sadr) to the upper left of the image center lies in the foreground of the complex gas and dust clouds and crowded star fields. Left of Gamma Cygni, shaped like two luminous wings divided by a long dark dust lane is IC 1318 whose popular name is understandably the Butterfly Nebula. The more compact, bright nebula at the lower right is NGC 6888, the Crescent Nebula. Some distance estimates for Gamma Cygni place it at around 1,800 light-years while estimates for IC 1318 and NGC 6888 range from 2,000 to 5,000 light-years.", + "hdurl": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg", + "media_type": "image", + "service_version": "v1", + "title": "Central Cygnus Skyscape", + "url": "https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg", }, - 'newer page, Reprocessing & copyright' : + 'newer page, Reprocessing & copyright': { "datetime": datetime(2017, 2, 8), "copyright": "Jes�s M.Vargas & Maritxu Poyal", @@ -34,8 +34,8 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "The Butterfly Nebula from Hubble", "url": "https://apod.nasa.gov/apod/image/1702/Butterfly_HubbleVargas_960.jpg" - }, - 'older page, copyright' : + }, + 'older page, copyright': { "datetime": datetime(2015, 11, 15), "copyright": "Sean M. Sabatini", @@ -46,11 +46,11 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "Leonids Over Monument Valley", "url": "https://apod.nasa.gov/apod/image/1511/leonidsmonuments_sabatini_960.jpg" - }, - 'older page, copyright #2' : + }, + 'older page, copyright #2': { "datetime": datetime(2013, 3, 11), - # this illustrates problematic, but still functional parsing of the copyright + # this illustrates problematic, but still functional parsing of the copyright "copyright": 'Martin RietzeAlien Landscapes on Planet Earth', "date": "2013-03-11", "explanation": "Why does a volcanic eruption sometimes create lightning? Pictured above, the Sakurajima volcano in southern Japan was caught erupting in early January. Magma bubbles so hot they glow shoot away as liquid rock bursts through the Earth's surface from below. The above image is particularly notable, however, for the lightning bolts caught near the volcano's summit. Why lightning occurs even in common thunderstorms remains a topic of research, and the cause of volcanic lightning is even less clear. Surely, lightning bolts help quench areas of opposite but separated electric charges. One hypothesis holds that catapulting magma bubbles or volcanic ash are themselves electrically charged, and by their motion create these separated areas. Other volcanic lightning episodes may be facilitated by charge-inducing collisions in volcanic dust. Lightning is usually occurring somewhere on Earth, typically over 40 times each second.", @@ -59,8 +59,8 @@ class TestApod(unittest.TestCase): "service_version": "v1", "title": "Sakurajima Volcano with Lightning", "url": "https://apod.nasa.gov/apod/image/1303/volcano_reitze_960.jpg" - }, - 'older page, no copyright' : + }, + 'older page, no copyright': { "datetime": datetime(1998, 6, 19), "date": "1998-06-19", @@ -72,7 +72,7 @@ class TestApod(unittest.TestCase): "title": "Good Morning Mars", "url": "https://apod.nasa.gov/apod/image/9806/tharsis_mgs.jpg" }, - 'older page, no copyright, #2' : + 'older page, no copyright, #2': { "datetime": datetime(2012, 8, 30), "date": "2012-08-30", @@ -84,27 +84,62 @@ class TestApod(unittest.TestCase): "title": "Apollo 11 Landing Site Panorama", "url": "https://apod.nasa.gov/apod/image/1208/a11pan1040226lftsm600.jpg" }, - } - - def _test_harness(self, test_title, data): - - print ("Testing "+test_title) - - # make the call - values = utility.parse_apod(data['datetime']) - - # Test returned properties - for prop in values.keys(): - if prop == "copyright": - print(str(values['copyright'])) - self.assertEqual(values[prop], data[prop], "Test of property: "+prop) - - - def test_apod_characteristics(self): - - for page_type in TestApod.TEST_DATA.keys(): - self._test_harness(page_type, TestApod.TEST_DATA[page_type]) - - - - +} + + +def soups_on(): + url = apod.utility._format_url(TEST_DATA['normal page, copyright']['datetime']) + response = requests.get(url) + soup = BeautifulSoup(response.text, 'html.parser') + return soup + + +def test__get_last_url(): + urls = 'https://www.google.com https://www.yahoo.com https://www.bing.com' + test_call = apod.utility._get_last_url(urls) + assert test_call == 'https://www.bing.com' + + +# hit other if statement +def test__format_url(): + url = apod.utility._format_url(TEST_DATA['normal page, copyright']['datetime']) + assert url == TEST_URL + + +def test__get_apod_chars(): + data = apod.utility._get_apod_chars(TEST_DATA['normal page, copyright']['datetime'], '') + assert data['copyright'] == 'Robert Gendler' + assert data['date'] == '2017-03-22' + assert data['hdurl'] == 'https://apod.nasa.gov/apod/image/1703/Cygnus-New-L.jpg' + assert data['media_type'] == 'image' + assert data['title'] == 'Central Cygnus Skyscape' + assert data['url'] == 'https://apod.nasa.gov/apod/image/1703/Cygnus-New-1024.jpg' + + +def test__title(): + soup = soups_on() + title = apod.utility._title(soup) + assert title == 'Central Cygnus Skyscape' + + +def test__copyright(): + soup = soups_on() + copyright = apod.utility._copyright(soup) + assert copyright == TEST_DATA['normal page, copyright']['copyright'] + + +def test__explanation(): + soup = soups_on() + explanation = apod.utility._explanation(soup) + assert explanation == TEST_DATA['normal page, copyright']['explanation'] + + +# def test__date(): +# soup = soups_on() +# date = apod.utility._date(soup) +# assert date == TEST_DATA['normal page, copyright']['date'] + + +def test__image_url(): + image_url = apod.utility._image_url(TEST_DATA['normal page, copyright']['datetime']) + assert image_url == TEST_DATA['normal page, copyright']['url'] From 2d4cdfa213e31817da843b32afb089c9a656271e Mon Sep 17 00:00:00 2001 From: Max Date: Wed, 24 Aug 2022 15:05:21 -0400 Subject: [PATCH 3/3] Delete requirements.txt Removing unnecessary packages unique to my environment. --- requirements.txt | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c3000e2..0000000 --- a/requirements.txt +++ /dev/null @@ -1,29 +0,0 @@ -attrs==21.2.0 -beautifulsoup4==4.5.3 -bs4==0.0.1 -certifi==2020.12.5 -chardet==4.0.0 -click==7.1.2 -coverage==4.1 -Flask==1.1.2 -Flask-Cors==3.0.10 -gunicorn==19.5.0 -idna==2.10 -iniconfig==1.1.1 -itsdangerous==1.1.0 -Jinja2==2.11.3 -MarkupSafe==1.1.1 -mock==4.0.3 -nose==1.3.7 -packaging==20.9 -Pillow==7.1.2 -pluggy==0.13.1 -py==1.10.0 -pyparsing==2.4.7 -pytest==6.2.4 -requests==2.25.1 -setupext-janitor==1.0.0 -six==1.15.0 -toml==0.10.2 -urllib3==1.26.4 -Werkzeug==1.0.1