Skip to content

Commit

Permalink
update version and enable exception
Browse files Browse the repository at this point in the history
  • Loading branch information
HurinHu committed Dec 26, 2022
1 parent b5931f6 commit d9936f0
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 20 deletions.
54 changes: 36 additions & 18 deletions GoogleNews/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

### MODULES
import re
import urllib.request
Expand Down Expand Up @@ -79,11 +78,15 @@ def __init__(self,lang="en",period="",start="",end="",encode="utf-8",region=None
self.__start = start
self.__end = end
self.__encode = encode
self.__version = '1.6.5'
self.__exception = False
self.__version = '1.6.6'

def getVersion(self):
return self.__version

def enableException(self, enable=True):
self.__exception = enable

def set_lang(self, lang):
self.__lang = lang

Expand Down Expand Up @@ -142,7 +145,6 @@ def build_response(self):
def page_at(self, page=1):
"""
Retrieves a specific page from google.com in the news sections into __results.
Parameter:
page = number of the page to be retrieved
"""
Expand Down Expand Up @@ -191,13 +193,15 @@ def page_at(self, page=1):
self.response.close()
except Exception as e_parser:
print(e_parser)
pass
if self.__exception:
raise Exception(e_parser)
else:
pass
return results

def get_page(self, page=1):
"""
Retrieves a specific page from google.com in the news sections into __results.
Parameter:
page = number of the page to be retrieved
"""
Expand Down Expand Up @@ -232,7 +236,7 @@ def get_page(self, page=1):
tmp_date = ''
tmp_datetime=None
try:
tmp_desc = item.find("div", {"role" : "heading"}).next_sibling.text
tmp_desc = item.find("div", {"role" : "heading"}).next_sibling.text.replace('\n','')
except Exception:
tmp_desc = ''
try:
Expand All @@ -245,30 +249,33 @@ def get_page(self, page=1):
self.response.close()
except Exception as e_parser:
print(e_parser)
pass
if self.__exception:
raise Exception(e_parser)
else:
pass

def getpage(self, page=1):
"""Don't remove this, will affect old version user when upgrade"""
self.get_page(page)

def get_news(self, key="",deamplify=False):
if key != '':
key = "+".join(key.split(" "))
# if period is set, add it to the query
if self.__period != "":
key += f"+when:{self.__period}"
self.url = 'https://news.google.com/search?q={}&hl={}'.format(key,self.__lang.lower())
key = "+".join(key.split(" "))
else:
# if no query, users still can use period
if self.__period != "":
self.url += f"when:{self.__period}"
self.url = 'https://news.google.com/?hl={}'.format(self.__lang)
key += f"when:{self.__period}"
key = urllib.request.quote(key.encode(self.__encode))
self.url = 'https://news.google.com/search?q={}&hl={}'.format(key,self.__lang.lower())

print(self.url)
try:
self.req = urllib.request.Request(self.url, headers=self.headers)
self.response = urllib.request.urlopen(self.req)
self.page = self.response.read()
self.content = Soup(self.page, "html.parser")
articles = self.content.select('div[class="NiLAwe y6IFtc R7GTQ keNKEd j7vNaf nID9nc"]')
articles = self.content.select('article')
for article in articles:
try:
# title
Expand All @@ -278,7 +285,7 @@ def get_news(self, key="",deamplify=False):
title=None
# description
try:
desc=article.find('span').text
desc=None
except:
desc=None
# date
Expand Down Expand Up @@ -316,21 +323,28 @@ def get_news(self, key="",deamplify=False):
site=article.find("time").parent.find("a").text
except:
site=None
try:
media=article.find("div").find("a").text
except:
media=None
# collection
self.__results.append({'title':title,
'desc':desc,
'date':date,
'datetime':define_date(date),
'link':link,
'img':img,
'media':None,
'media':media,
'site':site})
except Exception as e_article:
print(e_article)
self.response.close()
except Exception as e_parser:
print(e_parser)
pass
if self.__exception:
raise Exception(e_parser)
else:
pass

def total_count(self):
return self.__totalcount
Expand All @@ -348,6 +362,10 @@ def results(self,sort=False):
results.sort(key = lambda x:x['datetime'],reverse=True)
except Exception as e_sort:
print(e_sort)
if self.__exception:
raise Exception(e_sort)
else:
pass
results=self.__results
return results

Expand All @@ -367,4 +385,4 @@ def clear(self):
self.__texts = []
self.__links = []
self.__results = []
self.__totalcount = 0
self.__totalcount = 0
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ googlenews = GoogleNews()
```
print(googlenews.getVersion())
```
- Enable to throw exception
```
googlenews.enableException(True)
```
- Optional choose language
```
googlenews = GoogleNews(lang='en')
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="GoogleNews",
version="1.6.5",
version="1.6.6",
author="Hurin Hu",
author_email="[email protected]",
description="Google News search for Python",
Expand Down
2 changes: 1 addition & 1 deletion test/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class TestStringMethods(unittest.TestCase):

def testVersion(self):
googlenews = GoogleNews()
version = '1.6.5'
version = '1.6.6'
self.assertIn(version, googlenews.getVersion())
print('Latest version matched')

Expand Down

0 comments on commit d9936f0

Please sign in to comment.