Merge pull request #41 from theslavicbear/HTMLdev

Htmldev
theslavicbear · Dec 25, 2019 · 10886f8 · 10886f8
2 parents 78234ea + bf71aa8
commit 10886f8
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 40 deletions.
diff --git a/Ebook-Publisher.py b/Ebook-Publisher.py
@@ -16,6 +16,7 @@
 import queue
 import shutil
 from zipfile import ZipFile
+from time import sleep
 
 #Master dict of supported sites
 sites={
@@ -36,22 +37,29 @@ def MakeText(site):
         published.write('by '+site.author+'\n\n')
         published.write(site.story)
         published.close()
-    '''else:
-        if site.hasimages == True:
-            if not os.path.exists(wd+site.title):
-                os.makedirs(wd+site.title)
-            i = 1
-            zeros = '0' * (len(str(len(site.images)))-1)
-            print(zeros)
-            for url in site.images:
-                if i > 9:
-                    zeros='0'
-                elif i > 99:
-                    zeros = ''
-                with open(wd+site.title+'/'+zeros+str(i)+'.jpg', 'wb') as myimg:
-                    myimg.write(GetImage(url))
-                i=i+1
-    '''
+
+def MakeHTML(site):
+    if (type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai) and site.hasimages:
+        published=open(wd+site.title+'/'+site.title+'.html', 'w')
+    else:
+        published=open(wd+site.title+'.html', 'w')
+    published.write('<!DOCTYPE html>\n')
+    published.write('<html lang="en">\n')
+    published.write('<head>\n<title>'+site.title+' by '+site.author+'</title>\n</head>\n')
+    published.write('<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'>'+site.url+'</a>\n')
+    for i in range(len(site.rawstoryhtml)):
+        if type(site) is Chyoa.Chyoa:
+            published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.truestoryhttml[i])
+        elif type(site) is Nhentai.Nhentai:
+            published.write(site.truestoryhttml[i])
+        elif type(site) is Literotica.Literotica:
+            published.write(site.storyhtml)
+        else:
+            published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.rawstoryhtml[i].prettify())
+    published.write('</html>')
+
+
+    published.close()
 
 def GetImage(url):
     req = urllib.request.Request(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
@@ -63,7 +71,7 @@ def MakeEpub(site):
     book=epub.EpubBook()
     book.set_identifier(site.url)
     titlepage=epub.EpubHtml(title='Title Page', file_name='Title.xhtml', lang='en')
-    titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href=\'url\'>'+site.url+'</a>'
+    titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'</a>'
     #add summary information
     try:
         titlepage.content+='<br /><p>'+site.summary+'</p>'
@@ -116,7 +124,14 @@ def MakeEpub(site):
             with ZipFile(wd+site.title+'.epub', 'a') as myfile:
                 i=1
                 for url in site.images:
-                    with myfile.open('EPUB/img'+str(i)+'.jpg', 'w') as myimg:
+                    zeros = '0' * (len(str(site.isize))-1)
+                    if len(zeros)>1 and i > 9:
+                        zeros='0'
+                    elif len(zeros)==1 and i > 9:
+                        zeros = ''
+                    if i > 99:
+                        zeros = ''
+                    with myfile.open('EPUB/'+zeros+str(i)+'.jpg', 'w') as myimg:
                         myimg.write(GetImage(url))
                     i=i+1
 
@@ -134,6 +149,8 @@ def MakeClass(url):
         if ftype=='epub':
             #for site in s:
             MakeEpub(site)
+        elif ftype=='html':
+            MakeHTML(site)
         else:
             #for site in s:
             MakeText(site)
@@ -143,7 +160,7 @@ def MakeClass(url):
 #setting up commandline argument parser
 parser=argparse.ArgumentParser()
 parser.add_argument('url', help='The URL of the story you want', nargs='?')
-parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub'])
+parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub', 'html'], default='txt')
 parser.add_argument('-f','--file', help="Use text file containing a list of URLs instead of single URL", action='store_true')
 parser.add_argument('-d','--directory', help="Directory to place output files. Default ./")
 parser.add_argument('-q','--quiet', help="Turns off most terminal output", action='store_true')
@@ -172,8 +189,7 @@ def MakeClass(url):
     wd=args.directory
 Common.wd = wd
 
-if args.output_type == 'epub':
-    Common.opf = 'epub'
+Common.opf = args.output_type
 
 cwd=os.getcwd()
 #TODO should use non-relative path
@@ -199,19 +215,24 @@ def MakeClass(url):
     #the multithreaded variant
     if args.t:
         lock = threading.Lock()
+        threads = 0
         for i in urls:
             t=threading.Thread(target=MakeClass, args=(i,), daemon=True)
             t.start()
-        siteThreads = threading.active_count()
-        while siteThreads>1:
+            threads +=1
+        #siteThreads = threading.active_count()
+        while threads>0:
             s=q.get()
-            siteThreads-=1
-
+            #threading.active_count()-=1
+            #sleep(.01)
+            threads -=1
     else:
         for i in urls:
             #site=MakeClass(i)
             if ftype=='epub':
                 MakeEpub(MakeClass(i))
+            elif ftype=='html':
+                MakeHTML(MakeClass(i))
             else:
                 MakeText(MakeClass(i))
 
@@ -223,9 +244,9 @@ def MakeClass(url):
         sys.exit()
     if ftype=='epub':
         MakeEpub(site)
+    elif ftype=='html':
+        MakeHTML(site)
     else:
         MakeText(site)
     while threading.active_count()>1:
-        pass
-    #if type(site) is Nhentai.Nhentai and site.pbar is not None:
-        #site.pbar.End()
+        sleep(.01)
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # Ebook-Publisher
 A Python tool for converting online stories into portable formats
 
-**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB formatted output files.**
+**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB/HTML formatted output files.**
 
 Ebook-Publisher is my pet project, and the project that I currently have spent the most time and effort on. As such, I welcome criticism, requests for improvement, and bug reports. Please open an issue for any of the preceding.
 
@@ -17,6 +17,7 @@ Ebook-Publisher is my pet project, and the project that I currently have spent t
 ## Currently supported file types:
 * plain text files
 * epub ebook files
+* html files (For nhentai galleries, it will have the same output as with default/.txt output, but with an html file in the folder that easily allows viewing the gallery via a web browser. Chyoa stories don't currently grab images yet)
 
 Want more sites supported? Open an Issue and ask for its support or add support for the site yourself! 
 

diff --git a/Site/Chyoa.py b/Site/Chyoa.py
@@ -88,12 +88,9 @@ def __init__(self, url):
                     simg['src']='img'+str(len(self.images))+'.jpg'
                     self.hasimages = True
 
-
         temp=str(soup.find('div', attrs={'class': 'chapter-content'}))
 
 
-
-
 
         self.questions.insert(0, soup.find_all('h2')[1].get_text())
         temp+='<h2>'+self.questions[0]+'</h2>'
@@ -147,6 +144,10 @@ def __init__(self, url):
             self.truestoryhttml[i]=self.truestoryhttml[i].replace('  </span>\n  ', '</span> ')
 
         self.story=self.story.replace('\n', '\n\n')
+
+        if Common.images and self.hasimages and Common.opf=='html':
+            for i in range(0,len(self.images)):
+                Common.imageDL(self.title, self.images[i], i+1, size=len(self.images))
         #print(self.story)
         #print(self.truestoryhttml[len(self.truestoryhttml)-1])
         #for i in range(len(self.renames)):

diff --git a/Site/Common.py b/Site/Common.py
@@ -14,7 +14,7 @@ def prnt(out, f=False):
     if not quiet and not f:
         print(out)
 
-def imageDL(title, url, size, num, pbar):
+def imageDL(title, url, num,  size=0, pbar=None):
     if not os.path.exists(wd+title):
         os.makedirs(wd+title)
     zeros = '0' * (len(str(size))-1)
@@ -25,10 +25,14 @@ def imageDL(title, url, size, num, pbar):
         zeros = ''
     if num > 99:
         zeros = ''
+    if pbar is None:
+        zeros = 'img' #TODO fix this for Chyoa stories so that image files don't have to be prepended with 'img' and no zeros
     #print(zeros)
     with open(wd+title+'/'+zeros+str(num)+'.jpg', 'wb') as myimg:
         myimg.write(GetImage(url))
-    pbar.Update()
+    if pbar is not None:
+        pbar.Update()
+    #queue.put()
 
 
 def GetImage(url):

diff --git a/Site/Nhentai.py b/Site/Nhentai.py
@@ -4,6 +4,7 @@
 from Site import Common
 from time import sleep
 import threading
+#import queue
 
 class Nhentai:
 
@@ -23,6 +24,7 @@ def __init__(self, url):
         self.images=[] #testing images
         self.hasimages = True
         self.isize=0
+        #self.q = queue.Queue()
         try:
             page=requests.get(self.url)
         except:
@@ -36,9 +38,10 @@ def __init__(self, url):
 
 
         self.truestoryhttml.append('')
-        if Common.opf=='txt':
+        self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))
+
+        if Common.opf in ('html','txt'):
 
-            self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))
             self.pbar = Common.Progress(self.isize)
         for i in soup.find_all('a', attrs={'rel':'nofollow'}):
             #print(i.get('rel'))
@@ -66,10 +69,18 @@ def AddPage(self, url):
         except:
             print('Error in: '+url)
             #print(soup.prettify())
-        if Common.opf != 'txt':
-            self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="img'+str(len(self.images))+'.jpg" /></p>'
-        else:
-            t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, self.isize, len(self.images), self.pbar), daemon=True)
+        if Common.opf in ('epub', 'html'):
+            zeros = '0' * (len(str(self.isize))-1)
+            num = len(self.images)
+            if len(zeros)>1 and num > 9:
+                zeros='0'
+            elif len(zeros)==1 and num > 9:
+                zeros = ''
+            if num > 99:
+                zeros = ''
+            self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="'+zeros+str(num)+'.jpg" /></p>\n'
+        if Common.opf in ('txt', 'html'):
+            t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, len(self.images), self.isize, self.pbar), daemon=False)
             t.start()
             #Common.imageDL(self.title, thisimage, self.isize, len(self.images))
             #self.pbar.Update()