From 2eee77d6eb804940928d211ddfc80106c85064d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E9=98=B3?= <1070942929@qq.com>
Date: Thu, 7 Dec 2017 11:42:19 +0800
Subject: [PATCH 1/2] MacOS Python3.6.3 PyCharm

---
 .gitignore    |  3 +++
 .idea/vcs.xml |  6 +++++
 download.py   | 63 ++++++++++++++++++++++++++++-----------------------
 3 files changed, 44 insertions(+), 28 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 .idea/vcs.xml
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b2f5cb7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/.idea
+/pdfs
+**/.DS_Store
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/download.py b/download.py
index 1442527..d0ec812 100644
--- a/download.py
+++ b/download.py
@@ -3,7 +3,7 @@
 import re
 from six.moves.urllib.request import urlopen
 from six.moves.urllib.error import HTTPError
-import urllib2
+import urllib
 import shutil
 import argparse
 import mistune
@@ -13,48 +13,55 @@
 import requests
 
 # encoding=utf8  
-import sys  
+import sys
+
+
+# reload(sys)
+# sys.setdefaultencoding('utf8')
 
-reload(sys)  
-sys.setdefaultencoding('utf8')
 
 def download_pdf(link, location, name):
     try:
         response = requests.get(link)
         with open(os.path.join(location, name), 'wb') as f:
-        	f.write(response.content)
-        	f.close()
+            f.write(response.content)
+            f.close()
     except HTTPError:
-        print('>>> Error 404: cannot be downloaded!\n') 
-        raise   
+        print('>>> Error 404: cannot be downloaded!\n')
+        raise
     except socket.timeout:
-        print(" ".join(("can't download", link, "due to connection timeout!")) )
+        print(" ".join(("can't download", link, "due to connection timeout!")))
         raise
 
+
 def clean_pdf_link(link):
     if 'arxiv' in link:
-        link = link.replace('abs', 'pdf')   
-        if not(link.endswith('.pdf')):
+        link = link.replace('abs', 'pdf')
+        if not (link.endswith('.pdf')):
             link = '.'.join((link, 'pdf'))
 
     print(link)
     return link
 
-def clean_text(text, replacements = {':': '_', ' ': '_', '/': '_', '.': '', '"': ''}):
+
+def clean_text(text, replacements={':': '_', ' ': '_', '/': '_', '.': '', '"': ''}):
     for key, rep in replacements.items():
         text = text.replace(key, rep)
-    return text    
+    return text
+
+
+def print_title(title, pattern="-"):
+    print('\n'.join(("", title, pattern * len(title))))
 
-def print_title(title, pattern = "-"):
-    print('\n'.join(("", title, pattern * len(title)))) 
 
 def get_extension(link):
     extension = os.path.splitext(link)[1][1:]
     if extension in ['pdf', 'html']:
         return extension
     if 'pdf' in extension:
-        return 'pdf'    
-    return 'pdf'    
+        return 'pdf'
+    return 'pdf'
+
 
 def shorten_title(title):
     m1 = re.search('[[0-9]*]', title)
@@ -62,16 +69,16 @@ def shorten_title(title):
     if m1:
         title = m1.group(0)
     if m2:
-        title = ' '.join((title, m2.group(0)))   
-    return title[:50] + ' [...]'    
+        title = ' '.join((title, m2.group(0)))
+    return title[:50] + ' [...]'
 
 
 if __name__ == '__main__':
 
-    parser = argparse.ArgumentParser(description = 'Download all the PDF/HTML links into README.md')
+    parser = argparse.ArgumentParser(description='Download all the PDF/HTML links into README.md')
     parser.add_argument('-d', action="store", dest="directory")
-    parser.add_argument('--no-html', action="store_true", dest="nohtml", default = False)
-    parser.add_argument('--overwrite', action="store_true", default = False)    
+    parser.add_argument('--no-html', action="store_true", dest="nohtml", default=False)
+    parser.add_argument('--overwrite', action="store_true", default=False)
     results = parser.parse_args()
 
     output_directory = 'pdfs' if results.directory is None else results.directory
@@ -81,8 +88,8 @@ def shorten_title(title):
     if results.overwrite and os.path.exists(output_directory):
         shutil.rmtree(output_directory)
 
-    with open('README.md') as readme:
-        readme_html = mistune.markdown(readme.read())
+    with open('README.md', 'rb') as readme:
+        readme_html = mistune.markdown(readme.read().decode('utf-8'))
         readme_soup = BeautifulSoup.BeautifulSoup(readme_html, "html.parser")
 
     point = readme_soup.find_all('h1')[1]
@@ -95,7 +102,7 @@ def shorten_title(title):
                     h1_directory = os.path.join(output_directory, clean_text(point.text))
                     current_directory = h1_directory
                 elif point.name == 'h2':
-                    current_directory = os.path.join(h1_directory, clean_text(point.text))  
+                    current_directory = os.path.join(h1_directory, clean_text(point.text))
                 if not os.path.exists(current_directory):
                     os.makedirs(current_directory)
                 print_title(point.text)
@@ -111,7 +118,7 @@ def shorten_title(title):
                         try:
                             name = clean_text(point.text.split('[' + ext + ']')[0])
                             fullname = '.'.join((name, ext))
-                            if not os.path.exists('/'.join((current_directory, fullname)) ):
+                            if not os.path.exists('/'.join((current_directory, fullname))):
                                 download_pdf(link, current_directory, '.'.join((name, ext)))
                         except KeyboardInterrupt:
                             try:
@@ -122,8 +129,8 @@ def shorten_title(title):
                                 break
                         except:
                             failures.append(point.text)
-                        
-        point = point.next_sibling          
+
+        point = point.next_sibling
 
     print('Done!')
     if failures:

From 9ee93a4d09ba292c083e1f00611d316f02fec295 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E9=98=B3?= <1070942929@qq.com>
Date: Sun, 17 Dec 2017 23:40:31 +0800
Subject: [PATCH 2/2] ignore ./idea

---
 .idea/vcs.xml | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 .idea/vcs.xml

diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
\ No newline at end of file