theslavicbear · theslavicbear · Oct 6, 2023 · Aug 5, 2021 · Aug 5, 2021 · Aug 5, 2021
diff --git a/Ebook-Publisher.py b/Ebook-Publisher.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python3
+Version = '3.3.0'
+
 import sys
 from Site import *
 import urllib.parse
@@ -32,17 +34,20 @@
 #function for making text files
 def MakeText(site):
     if type(site) is not Nhentai.Nhentai:
-        published=open(wd+site.title+'.txt', 'w', encoding="utf-8")
+        title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
+        published=open(wd+title_stripped+'.txt', 'w', encoding="utf-8")
         published.write(site.title+Common.lineEnding)
         published.write('by '+site.author+Common.lineEnding)
         published.write(site.story)
         published.close()
 
 def MakeHTML(site):
+
+    title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
     if (type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai) and site.hasimages:
-        published=open(wd+site.title+'/'+site.title+'.html', 'w', encoding="utf-8")
+        published=open(wd+title_stripped+'/'+site.title+'.html', 'w', encoding="utf-8")
     else:
-        published=open(wd+site.title+'.html', 'w', encoding="utf-8")
+        published=open(wd+title_stripped+'.html', 'w', encoding="utf-8")
     published.write('<!DOCTYPE html>\n')
     published.write('<html lang="en">\n')
     published.write('<style>\n'+styleSheet+'\n</style>')
@@ -120,9 +125,10 @@ def MakeEpub(site):
                     c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
                 else:
                     if not site.partial:
-                        c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
+                        c.append(epub.EpubHtml(title=site.chapters[i], file_name='nfChapter'+str(site.pageIDs[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
+                        #c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
                     else:
-                        c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
+                        c.append(epub.EpubHtml(title=site.chapters[i], file_name='nfChapter'+str(site.pageIDs[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
                 c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+str(site.epubrawstoryhtml[i])
             elif type(site) is Nhentai.Nhentai:
                 c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
@@ -153,7 +159,8 @@ def MakeEpub(site):
     #book.spine.append('nav')
     for i in c:
         book.spine.append(i)
-    epub.write_epub(wd+site.title+'.epub', book)
+    title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
+    epub.write_epub(wd+title_stripped+'.epub', book)
 
     if type(site) is Nhentai.Nhentai:
         if site.hasimages == True:
@@ -172,7 +179,7 @@ def MakeEpub(site):
                     i=i+1
     elif type(site) is Chyoa.Chyoa:
         if site.hasimages == True:
-            with ZipFile(wd+site.title+'.epub', 'a') as myfile:
+            with ZipFile(wd+title_stripped+'.epub', 'a') as myfile:
                 i=1
                 for num in Common.urlDict[site.url]:
                     try:
@@ -221,6 +228,7 @@ def getCSS():
         return args.css
 
 
+
 #setting up commandline argument parser
 parser=argparse.ArgumentParser()
 parser.add_argument('url', help='The URL of the story you want', nargs='*')
@@ -235,9 +243,15 @@ def getCSS():
 parser.add_argument('--chyoa-force-forwards', help='Force Chyoa stories to be scraped forwards if not given page 1', action='store_true')
 parser.add_argument('--eol', help='end of line character for .txt output format, must be enclosed in single quotes', default='\n\n')
 parser.add_argument('--chyoa-update', help='Checks if story already exists in output directory, and skips it if it has not been updated on the server since file was created.', action='store_true')
+parser.add_argument('--usr', help='Chyoa username to log in with.')
+parser.add_argument('--pswd', help='Chyoa password to log in with.')
 args=parser.parse_args()
 
 #print(args.output_type)
+if args.usr is not None and args.pswd is not None:
+    Common.chyoa_name=args.usr
+    Common.chyoa_pass=args.pswd
+    Common.GetChyoaSession()
 
 if args.quiet:
     Common.quiet=True
@@ -247,6 +261,8 @@ def getCSS():
     Common.images=True
 args.file=True
 stdin=False
+
+Common.prnt('Ebook-Publisher '+str(Version))
 if not sys.stdin.isatty():
     stdin=True
 elif not args.url:

diff --git a/README.md b/README.md
@@ -59,6 +59,8 @@ optional arguments:
                         quotes
   --chyoa-update        Checks if story already exists in output directory, and skips it if it has
                         not been updated on the server since file was created.
+  --usr USR             Chyoa username to log in with
+  --pswd PSWD           Chyoa password to log in with
 ```  
 
 

diff --git a/Site/Chyoa.py b/Site/Chyoa.py
@@ -52,8 +52,12 @@ def __init__(self, url):
         self.partial = False
         self.partialStart=1
         self.ogUrl=self.url
+        self.pageIDs=[]
+        self.pageIDIter=0
+        self.pageIDDict={}
 
-        page = Common.RequestPage(url)
+
+        page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
 
         if page is None:
             print('Could not complete request for page: ' + url)
@@ -65,20 +69,14 @@ def __init__(self, url):
             try:
                 self.title=soup.find('h1').get_text()
                 self.backwards = False
-
             except:
                 pass
-
-
-
-
-
         elif not self.backwards:
             self.partial = True
 
 
         #get update timestamp:
-        if (self.backwards or self.partial) and Common.chyoaDupCheck:
+        if (self.backwards or not self.partial) and Common.chyoaDupCheck:
             date=soup.find('p', attrs={'class':'dates'}).strong.get_text()
             #date='Jun 18, 2022'
             timestamp=datetime.strptime(date, "%b %d, %Y")
@@ -96,9 +94,9 @@ def __init__(self, url):
                 return None
 
         if self.backwards or self.partial:
-            self.authors.insert(0,soup.find_all('a')[7].get_text())
+            self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
         else:
-            self.authors.insert(0,soup.find_all('a')[5].get_text())
+            self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
         self.chapters.insert(0, soup.find('h1').get_text())
         self.summary=soup.find('p', attrs={'class': 'synopsis'}).get_text()
 
@@ -166,7 +164,10 @@ def __init__(self, url):
         #self.backwards = not Common.chyoa_force_forwards
         for i in soup.find_all('a'):
             if i.text.strip()=='Previous Chapter' and self.backwards:
-                self.AddPrevPage(i.get('href'))
+                newLink=i.get('href')
+                while newLink is not None:
+                    newLink=self.AddPrevPage(newLink)
+
                 self.backwards = True
                 break
 
@@ -218,6 +219,7 @@ def __init__(self, url):
                     j+=1
             self.Pages.extend(urls)
             j=1
+            self.pageQueue=[]
             for u in urls:
                 if Common.mt and not self.partial:
                     chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])
@@ -226,20 +228,41 @@ def __init__(self, url):
                 else:
                     if Common.mt:
                         Common.prnt('Warning: Cannot multithread partial Chyoa story: '+self.url+'\nUsing default method to download an unknown number of pages')
+                    defArgs=(u, str(j), 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
+                    self.pageQueue.append(defArgs)
+                    while self.pageQueue!=[]:
+                        #n=self.pageQueue[0]
+                        self.AddNextPage(self.pageQueue.pop(0))
 
-                    self.AddNextPage(u, j, 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
                 j+=1
             if Common.mt and not self.partial:
                 i = int(numChapters)-1
                 print("Pages to add: "+str(i))
                 while i >0:
                     #print(str(i))
-                    self.q.get()
+                    try:
+                        self.q.get(timeout=30)
+                    except queue.Empty as e:
+                        print("Unsure if all threads ended. Expected reamining pages: "+str(i))
+                        break
                     i-=1
                 #print(threading.active_count())
-                for page in self.Pages:
-                    self.addPage(page)
+                self.pageQueue=[]
 
+                for page in self.Pages:
+                    self.pageQueue.append(page)
+                    while self.pageQueue!=[]:
+                        self.addPage(self.pageQueue.pop(0))
+                #for page in self.epubtemp:
+            #print(self.pageIDDict)
+            for p in range(len(self.epubtemp)):
+                for d in self.depth:
+                    if (self.epubtemp[p].count('href="'+d+'.xhtml"')) > 0:
+                        try:
+                            self.epubtemp[p]=self.epubtemp[p].replace('href="'+d+'.xhtml"', 'href="nfChapter'+str(self.pageIDDict[d])+'.xhtml"')
+                        except KeyError as k:
+                            print("Key error at: "+d)
+                            print("Please report this error to the developer.")
 
         try:
             self.pbar.End()
@@ -323,14 +346,14 @@ def __init__(self, url):
 
 
     def AddPrevPage(self, url):
-        page = Common.RequestPage(url)
+        page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
 
         if page is None:
             print('Could not complete request for page: ' + url)
             return None            
 
         soup=BeautifulSoup(page.content, 'html.parser')
-        self.authors.insert(0,soup.find_all('a')[7].get_text())
+        self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
         self.chapters.insert(0, soup.find('h1').get_text())
 
         if Common.images:
@@ -348,21 +371,29 @@ def AddPrevPage(self, url):
         self.pbar.Update()
         for i in soup.find_all('a'):
             if i.text.strip()=='Previous Chapter':
-                self.AddPrevPage(i.get('href'))
-                return
+                return i.get('href')
         #gets author name if on last/first page I guess
-        self.authors[0]=soup.find_all('a')[5].get_text()
-
-
-    def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink, prevLinkId):
-        page = Common.RequestPage(url)
+        self.authors[0]=soup.find('p', class_='meta').find('a').get_text()
+        return None
+
+    #def AddNextPage(self, (url, depth, prevChapNum, prevLink, epubPrevLink, currLink, prevLinkId)):   
+    def AddNextPage(self, args):
+        url=args[0]
+        depth=args[1]
+        prevChapNum=args[2]
+        prevLink=args[3]
+        epubPrevLink=args[4]
+        currLink=args[5]
+        prevLinkId=args[6]
+
+        page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
 
         if page is None:
             print('Could not complete request for page: ' + url)
             return None
 
         soup=BeautifulSoup(page.content, 'html.parser')
-        self.authors.append(soup.find_all('a')[7].get_text())
+        self.authors.append(soup.find('p', class_='meta').find('a').get_text())
         self.chapters.append(soup.find('h1').get_text())
 
         epubCurrLink='\n<a href="'+str(depth)+'.xhtml">'+'Previous Chapter'+'</a>\n<br />'
@@ -425,6 +456,10 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
         #Checks if new page was a link backwards and exits if so
         chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])
 
+        self.pageIDs.append(self.pageIDIter)
+        self.pageIDDict[depth]=self.pageIDIter
+        self.pageIDIter+=1
+
         if prevChapNum >= chapNum:
             return None
 
@@ -441,9 +476,10 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
 
             return
 
-
+        n2=[]
         for i,j in zip(nextpagesurl, nextpagesdepth):
-            self.AddNextPage(i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId)
+            n2.append([i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId])
+        self.pageQueue[0:0]=n2
 
     def ThreadAdd(self, url, depth, renames, oldnames, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl):
         #if self.Pages.count(url)>1:
@@ -468,16 +504,21 @@ def addPage(self, page):
         self.epubtemp.extend(page.epubtemp)
         self.temp.extend(page.temp)
 
+        #for j in range(1, page.epubtemp.count(page.depth)+1):
+        #    self.epubtemp.replace(page.depth, self.pageIDIter+'.'+str(j), 1)
+        self.pageIDs.append(self.pageIDIter)
+        self.pageIDDict[page.depth]=self.pageIDIter
+        self.pageIDIter+=1
+
         if page.children !=[]:
             for zzz in range(0, len(page.children)):
                 #try:
                 while isinstance(page.children[zzz], str):
                     self.q.get()
-                    #print('waiting for thread to finish')
+            #prepend child pages to the queue
+            self.pageQueue[0:0]=page.children
+
 
-                self.addPage(page.children[zzz])
-                #except AttributeError as E:
-                    #print('Error after '+ str(self.depth))
 
 class Page:
 
@@ -516,14 +557,14 @@ def __init__(self, url, depth, renames, oldnames, q, prevChapNum, prevLink, epub
 
     def AddNextPage(self, url, depth):
         #print(url)
-        page = Common.RequestPage(url)
+        page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
 
         if page is None:
             print('Could not complete request for page: ' + url)
             return None
 
         soup=BeautifulSoup(page.content, 'html.parser')
-        self.author=(soup.find_all('a')[7].get_text())
+        self.author=(soup.find('p', class_='meta').find('a').get_text())
         self.chapter=(soup.find('h1').get_text())