From d9936f08b9ddfd788365631db79bfd328b81d0ca Mon Sep 17 00:00:00 2001 From: HurinHu Date: Tue, 27 Dec 2022 11:55:45 +1300 Subject: [PATCH] update version and enable exception --- GoogleNews/__init__.py | 54 ++++++++++++++++++++++++++++-------------- README.md | 4 ++++ setup.py | 2 +- test/test_search.py | 2 +- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/GoogleNews/__init__.py b/GoogleNews/__init__.py index e9a2700..35cf1b2 100644 --- a/GoogleNews/__init__.py +++ b/GoogleNews/__init__.py @@ -1,4 +1,3 @@ - ### MODULES import re import urllib.request @@ -79,11 +78,15 @@ def __init__(self,lang="en",period="",start="",end="",encode="utf-8",region=None self.__start = start self.__end = end self.__encode = encode - self.__version = '1.6.5' + self.__exception = False + self.__version = '1.6.6' def getVersion(self): return self.__version + def enableException(self, enable=True): + self.__exception = enable + def set_lang(self, lang): self.__lang = lang @@ -142,7 +145,6 @@ def build_response(self): def page_at(self, page=1): """ Retrieves a specific page from google.com in the news sections into __results. - Parameter: page = number of the page to be retrieved """ @@ -191,13 +193,15 @@ def page_at(self, page=1): self.response.close() except Exception as e_parser: print(e_parser) - pass + if self.__exception: + raise Exception(e_parser) + else: + pass return results def get_page(self, page=1): """ Retrieves a specific page from google.com in the news sections into __results. - Parameter: page = number of the page to be retrieved """ @@ -232,7 +236,7 @@ def get_page(self, page=1): tmp_date = '' tmp_datetime=None try: - tmp_desc = item.find("div", {"role" : "heading"}).next_sibling.text + tmp_desc = item.find("div", {"role" : "heading"}).next_sibling.text.replace('\n','') except Exception: tmp_desc = '' try: @@ -245,7 +249,10 @@ def get_page(self, page=1): self.response.close() except Exception as e_parser: print(e_parser) - pass + if self.__exception: + raise Exception(e_parser) + else: + pass def getpage(self, page=1): """Don't remove this, will affect old version user when upgrade""" @@ -253,22 +260,22 @@ def getpage(self, page=1): def get_news(self, key="",deamplify=False): if key != '': - key = "+".join(key.split(" ")) - # if period is set, add it to the query if self.__period != "": key += f"+when:{self.__period}" - self.url = 'https://news.google.com/search?q={}&hl={}'.format(key,self.__lang.lower()) + key = "+".join(key.split(" ")) else: - # if no query, users still can use period if self.__period != "": - self.url += f"when:{self.__period}" - self.url = 'https://news.google.com/?hl={}'.format(self.__lang) + key += f"when:{self.__period}" + key = urllib.request.quote(key.encode(self.__encode)) + self.url = 'https://news.google.com/search?q={}&hl={}'.format(key,self.__lang.lower()) + + print(self.url) try: self.req = urllib.request.Request(self.url, headers=self.headers) self.response = urllib.request.urlopen(self.req) self.page = self.response.read() self.content = Soup(self.page, "html.parser") - articles = self.content.select('div[class="NiLAwe y6IFtc R7GTQ keNKEd j7vNaf nID9nc"]') + articles = self.content.select('article') for article in articles: try: # title @@ -278,7 +285,7 @@ def get_news(self, key="",deamplify=False): title=None # description try: - desc=article.find('span').text + desc=None except: desc=None # date @@ -316,6 +323,10 @@ def get_news(self, key="",deamplify=False): site=article.find("time").parent.find("a").text except: site=None + try: + media=article.find("div").find("a").text + except: + media=None # collection self.__results.append({'title':title, 'desc':desc, @@ -323,14 +334,17 @@ def get_news(self, key="",deamplify=False): 'datetime':define_date(date), 'link':link, 'img':img, - 'media':None, + 'media':media, 'site':site}) except Exception as e_article: print(e_article) self.response.close() except Exception as e_parser: print(e_parser) - pass + if self.__exception: + raise Exception(e_parser) + else: + pass def total_count(self): return self.__totalcount @@ -348,6 +362,10 @@ def results(self,sort=False): results.sort(key = lambda x:x['datetime'],reverse=True) except Exception as e_sort: print(e_sort) + if self.__exception: + raise Exception(e_sort) + else: + pass results=self.__results return results @@ -367,4 +385,4 @@ def clear(self): self.__texts = [] self.__links = [] self.__results = [] - self.__totalcount = 0 + self.__totalcount = 0 \ No newline at end of file diff --git a/README.md b/README.md index 339c011..97ab9c5 100755 --- a/README.md +++ b/README.md @@ -29,6 +29,10 @@ googlenews = GoogleNews() ``` print(googlenews.getVersion()) ``` +- Enable to throw exception +``` +googlenews.enableException(True) +``` - Optional choose language ``` googlenews = GoogleNews(lang='en') diff --git a/setup.py b/setup.py index 7c05fa3..fb0aad5 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="GoogleNews", - version="1.6.5", + version="1.6.6", author="Hurin Hu", author_email="hurin@live.ca", description="Google News search for Python", diff --git a/test/test_search.py b/test/test_search.py index b548fce..3b09514 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -51,7 +51,7 @@ class TestStringMethods(unittest.TestCase): def testVersion(self): googlenews = GoogleNews() - version = '1.6.5' + version = '1.6.6' self.assertIn(version, googlenews.getVersion()) print('Latest version matched')