Merge pull request #35 from theslavicbear/development

Development
theslavicbear · Dec 1, 2019 · e31dd99 · e31dd99
2 parents 4692fda + b69977f
commit e31dd99
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 4 deletions.
diff --git a/Ebook-Publisher.py b/Ebook-Publisher.py
@@ -24,6 +24,7 @@
     'www.classicreader.com':lambda x:Classicreader.Classicreader(x),
     'chyoa.com':lambda x:Chyoa.Chyoa(x),
     'www.wattpad.com':lambda x:Wattpad.Wattpad(x),
+    'nhentai.net':lambda x:Nhentai.Nhentai(x),
 }
 
 #function for making text files
@@ -57,12 +58,14 @@ def MakeEpub(site):
     book.add_author(site.author)
     c=[]
 
-    if type(site) is not Literotica.Literotica:
+    if type(site) is not Literotica.Literotica and type(site) is not Nhentai.Nhentai:
         toc=()
         for i in range(len(site.rawstoryhtml)):
             c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
             if type(site) is Chyoa.Chyoa:
                 c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.truestoryhttml[i]
+            elif type(site) is Nhentai.Nhentai:
+                c[i].content=site.truestoryhttml[i]
             else:
                 c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.rawstoryhtml[i].prettify()
             book.add_item(c[i])
@@ -71,7 +74,13 @@ def MakeEpub(site):
         book.toc=toc
         book.spine.append('nav')
 
-    #fallback method
+    elif type(site) is Nhentai.Nhentai:
+        c.append(epub.EpubHtml(title='none', file_name='Chapter 1.xhtml', lang='en'))
+        c[0].content=site.truestoryhttml[0]
+        book.add_item(c[0])
+        book.spine.append('nav')
+
+    #fallback method    
     else:
         c.append(epub.EpubHtml(title=site.title, file_name='Story.xhtml', lang='en'))
         c[0].content=site.storyhtml
@@ -84,7 +93,7 @@ def MakeEpub(site):
         book.spine.append(i)
     epub.write_epub(wd+site.title+'.epub', book, {})
 
-    if type(site) is Chyoa.Chyoa:
+    if type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai:
         if site.hasimages == True:
             with ZipFile(wd+site.title+'.epub', 'a') as myfile:
                 i=1

diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ A Python tool for converting online stories into portable formats
 * classicreader.com (The site does not appear to be working as of 11/30/19)
 * chyoa.com (rudimentary support: Input the last page you wish to include, and the code will work backwards towards the beginning of the story. You will be asked to input customizable names if they are found)
 * wattpad.com
+* nhentai.net (alpha support: Epub only. Seems to have issues with multithreading multiple nhentai galleries. Also may have issues with your epub reader because images i.e. images low quality on Okular and images high quality on Calibre reader, but a blank page inserted between pages.)
 
 ## Currently supported file types:
 * plain text files

diff --git a/Site/Nhentai.py b/Site/Nhentai.py
@@ -0,0 +1,78 @@
+import requests
+from bs4 import BeautifulSoup
+import sys
+from Site import Common
+from time import sleep
+
+
+class Nhentai:
+
+
+    def __init__(self, url):
+        self.title=''#
+        self.chapters=['']
+        #initial author only for title page
+        self.author=''#
+        #the h1 tag
+        self.temp=[]
+        self.rawstoryhtml=['']
+        self.truestoryhttml=[]
+        self.length=1
+        self.pbar=None
+        self.url=url
+        self.images=[] #testing images
+        self.hasimages = True
+        try:
+            page=requests.get(self.url)
+        except:
+            print('Error accessing website: try checking internet connection and url')
+        soup=BeautifulSoup(page.content, 'html.parser')
+
+        self.title = soup.find('meta', attrs={'itemprop':'name'}).get('content')
+        for au in soup.find_all('a', attrs={'class':'tag'}):
+            if au.get('href')[6:]=='/artist':
+                self.author=au.get_text()
+
+
+
+        self.truestoryhttml.append('')
+
+        for i in soup.find_all('a'):
+            #print(i.get('rel'))
+            if i.get('rel')==['nofollow']:
+                #print('new page')
+                self.AddPage(i.get('href'))
+                sleep(1)
+       #print(self.images)
+
+        #self.pbar.End()
+        #progress bar will be updated on image download for accuracy
+
+
+
+
+    def AddPage(self, url):
+        #print('https://nhentai.net'+url.rstrip())
+        #print('https://nhentai.net/g/53671/1/')
+        try:
+            page=requests.get('https://nhentai.net'+url.rstrip())
+        except:
+            print('Error accessing website: try checking internet connection and url')
+        soup=BeautifulSoup(page.content, 'html.parser')
+        #print(soup.prettify())
+
+        #print(soup.find('img').get('src').prettify())
+        try:
+            self.images.append(soup.find('section', attrs={'id':'image-container'}).find('img').get('src'))
+        except:
+            print('Error in: '+url)
+            print(soup.prettify())
+        self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="img'+str(len(self.images))+'.jpg" /></p>'
+
+        #if Common.images:
+            #if soup.find('div', attrs={'class': 'chapter-content'}).find('img'):
+                #for simg in soup.find('div', attrs={'class': 'chapter-content'}).find_all('img'):
+                    #self.images.append(simg.get('src'))
+                    #simg['src']='img'+str(len(self.images))+'.jpg'
+                    #self.hasimages = True
+
diff --git a/Site/__init__.py b/Site/__init__.py
@@ -1 +1 @@
-__all__=["Literotica","Fanfiction","Classicreader","Chyoa","Wattpad", "Common"]
+__all__=["Literotica","Fanfiction","Classicreader","Chyoa","Wattpad","Nhentai", "Common"]
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__all__=["Literotica","Fanfiction","Classicreader","Chyoa","Wattpad", "Common"]
		__all__=["Literotica","Fanfiction","Classicreader","Chyoa","Wattpad","Nhentai", "Common"]