Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 3.3 #90

Merged
merged 17 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions Ebook-Publisher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env python3
Version = '3.3.0'

import sys
from Site import *
import urllib.parse
Expand Down Expand Up @@ -32,17 +34,20 @@
#function for making text files
def MakeText(site):
if type(site) is not Nhentai.Nhentai:
published=open(wd+site.title+'.txt', 'w', encoding="utf-8")
title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
published=open(wd+title_stripped+'.txt', 'w', encoding="utf-8")
published.write(site.title+Common.lineEnding)
published.write('by '+site.author+Common.lineEnding)
published.write(site.story)
published.close()

def MakeHTML(site):

title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
if (type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai) and site.hasimages:
published=open(wd+site.title+'/'+site.title+'.html', 'w', encoding="utf-8")
published=open(wd+title_stripped+'/'+site.title+'.html', 'w', encoding="utf-8")
else:
published=open(wd+site.title+'.html', 'w', encoding="utf-8")
published=open(wd+title_stripped+'.html', 'w', encoding="utf-8")
published.write('<!DOCTYPE html>\n')
published.write('<html lang="en">\n')
published.write('<style>\n'+styleSheet+'\n</style>')
Expand Down Expand Up @@ -120,9 +125,10 @@ def MakeEpub(site):
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
else:
if not site.partial:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
c.append(epub.EpubHtml(title=site.chapters[i], file_name='nfChapter'+str(site.pageIDs[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
#c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
else:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
c.append(epub.EpubHtml(title=site.chapters[i], file_name='nfChapter'+str(site.pageIDs[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+str(site.epubrawstoryhtml[i])
elif type(site) is Nhentai.Nhentai:
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
Expand Down Expand Up @@ -153,7 +159,8 @@ def MakeEpub(site):
#book.spine.append('nav')
for i in c:
book.spine.append(i)
epub.write_epub(wd+site.title+'.epub', book)
title_stripped=site.title.replace('*', '').replace(':', '').replace('?', '').replace('"', '').replace('/', '').replace('\\', '').replace('<', '').replace('>', '').replace('|', '')
epub.write_epub(wd+title_stripped+'.epub', book)

if type(site) is Nhentai.Nhentai:
if site.hasimages == True:
Expand All @@ -172,7 +179,7 @@ def MakeEpub(site):
i=i+1
elif type(site) is Chyoa.Chyoa:
if site.hasimages == True:
with ZipFile(wd+site.title+'.epub', 'a') as myfile:
with ZipFile(wd+title_stripped+'.epub', 'a') as myfile:
i=1
for num in Common.urlDict[site.url]:
try:
Expand Down Expand Up @@ -221,6 +228,7 @@ def getCSS():
return args.css



#setting up commandline argument parser
parser=argparse.ArgumentParser()
parser.add_argument('url', help='The URL of the story you want', nargs='*')
Expand All @@ -235,9 +243,15 @@ def getCSS():
parser.add_argument('--chyoa-force-forwards', help='Force Chyoa stories to be scraped forwards if not given page 1', action='store_true')
parser.add_argument('--eol', help='end of line character for .txt output format, must be enclosed in single quotes', default='\n\n')
parser.add_argument('--chyoa-update', help='Checks if story already exists in output directory, and skips it if it has not been updated on the server since file was created.', action='store_true')
parser.add_argument('--usr', help='Chyoa username to log in with.')
parser.add_argument('--pswd', help='Chyoa password to log in with.')
args=parser.parse_args()

#print(args.output_type)
if args.usr is not None and args.pswd is not None:
Common.chyoa_name=args.usr
Common.chyoa_pass=args.pswd
Common.GetChyoaSession()

if args.quiet:
Common.quiet=True
Expand All @@ -247,6 +261,8 @@ def getCSS():
Common.images=True
args.file=True
stdin=False

Common.prnt('Ebook-Publisher '+str(Version))
if not sys.stdin.isatty():
stdin=True
elif not args.url:
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ optional arguments:
quotes
--chyoa-update Checks if story already exists in output directory, and skips it if it has
not been updated on the server since file was created.
--usr USR Chyoa username to log in with
--pswd PSWD Chyoa password to log in with
```


Expand Down
107 changes: 74 additions & 33 deletions Site/Chyoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,12 @@ def __init__(self, url):
self.partial = False
self.partialStart=1
self.ogUrl=self.url
self.pageIDs=[]
self.pageIDIter=0
self.pageIDDict={}

page = Common.RequestPage(url)

page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})

if page is None:
print('Could not complete request for page: ' + url)
Expand All @@ -65,20 +69,14 @@ def __init__(self, url):
try:
self.title=soup.find('h1').get_text()
self.backwards = False

except:
pass





elif not self.backwards:
self.partial = True


#get update timestamp:
if (self.backwards or self.partial) and Common.chyoaDupCheck:
if (self.backwards or not self.partial) and Common.chyoaDupCheck:
date=soup.find('p', attrs={'class':'dates'}).strong.get_text()
#date='Jun 18, 2022'
timestamp=datetime.strptime(date, "%b %d, %Y")
Expand All @@ -96,9 +94,9 @@ def __init__(self, url):
return None

if self.backwards or self.partial:
self.authors.insert(0,soup.find_all('a')[7].get_text())
self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
else:
self.authors.insert(0,soup.find_all('a')[5].get_text())
self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
self.chapters.insert(0, soup.find('h1').get_text())
self.summary=soup.find('p', attrs={'class': 'synopsis'}).get_text()

Expand Down Expand Up @@ -166,7 +164,10 @@ def __init__(self, url):
#self.backwards = not Common.chyoa_force_forwards
for i in soup.find_all('a'):
if i.text.strip()=='Previous Chapter' and self.backwards:
self.AddPrevPage(i.get('href'))
newLink=i.get('href')
while newLink is not None:
newLink=self.AddPrevPage(newLink)

self.backwards = True
break

Expand Down Expand Up @@ -218,6 +219,7 @@ def __init__(self, url):
j+=1
self.Pages.extend(urls)
j=1
self.pageQueue=[]
for u in urls:
if Common.mt and not self.partial:
chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])
Expand All @@ -226,20 +228,41 @@ def __init__(self, url):
else:
if Common.mt:
Common.prnt('Warning: Cannot multithread partial Chyoa story: '+self.url+'\nUsing default method to download an unknown number of pages')
defArgs=(u, str(j), 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
self.pageQueue.append(defArgs)
while self.pageQueue!=[]:
#n=self.pageQueue[0]
self.AddNextPage(self.pageQueue.pop(0))

self.AddNextPage(u, j, 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
j+=1
if Common.mt and not self.partial:
i = int(numChapters)-1
print("Pages to add: "+str(i))
while i >0:
#print(str(i))
self.q.get()
try:
self.q.get(timeout=30)
except queue.Empty as e:
print("Unsure if all threads ended. Expected reamining pages: "+str(i))
break
i-=1
#print(threading.active_count())
for page in self.Pages:
self.addPage(page)
self.pageQueue=[]

for page in self.Pages:
self.pageQueue.append(page)
while self.pageQueue!=[]:
self.addPage(self.pageQueue.pop(0))
#for page in self.epubtemp:
#print(self.pageIDDict)
for p in range(len(self.epubtemp)):
for d in self.depth:
if (self.epubtemp[p].count('href="'+d+'.xhtml"')) > 0:
try:
self.epubtemp[p]=self.epubtemp[p].replace('href="'+d+'.xhtml"', 'href="nfChapter'+str(self.pageIDDict[d])+'.xhtml"')
except KeyError as k:
print("Key error at: "+d)
print("Please report this error to the developer.")

try:
self.pbar.End()
Expand Down Expand Up @@ -323,14 +346,14 @@ def __init__(self, url):


def AddPrevPage(self, url):
page = Common.RequestPage(url)
page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})

if page is None:
print('Could not complete request for page: ' + url)
return None

soup=BeautifulSoup(page.content, 'html.parser')
self.authors.insert(0,soup.find_all('a')[7].get_text())
self.authors.insert(0,soup.find('p', class_='meta').find('a').get_text())
self.chapters.insert(0, soup.find('h1').get_text())

if Common.images:
Expand All @@ -348,21 +371,29 @@ def AddPrevPage(self, url):
self.pbar.Update()
for i in soup.find_all('a'):
if i.text.strip()=='Previous Chapter':
self.AddPrevPage(i.get('href'))
return
return i.get('href')
#gets author name if on last/first page I guess
self.authors[0]=soup.find_all('a')[5].get_text()


def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink, prevLinkId):
page = Common.RequestPage(url)
self.authors[0]=soup.find('p', class_='meta').find('a').get_text()
return None

#def AddNextPage(self, (url, depth, prevChapNum, prevLink, epubPrevLink, currLink, prevLinkId)):
def AddNextPage(self, args):
url=args[0]
depth=args[1]
prevChapNum=args[2]
prevLink=args[3]
epubPrevLink=args[4]
currLink=args[5]
prevLinkId=args[6]

page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})

if page is None:
print('Could not complete request for page: ' + url)
return None

soup=BeautifulSoup(page.content, 'html.parser')
self.authors.append(soup.find_all('a')[7].get_text())
self.authors.append(soup.find('p', class_='meta').find('a').get_text())
self.chapters.append(soup.find('h1').get_text())

epubCurrLink='\n<a href="'+str(depth)+'.xhtml">'+'Previous Chapter'+'</a>\n<br />'
Expand Down Expand Up @@ -425,6 +456,10 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
#Checks if new page was a link backwards and exits if so
chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])

self.pageIDs.append(self.pageIDIter)
self.pageIDDict[depth]=self.pageIDIter
self.pageIDIter+=1

if prevChapNum >= chapNum:
return None

Expand All @@ -441,9 +476,10 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,

return


n2=[]
for i,j in zip(nextpagesurl, nextpagesdepth):
self.AddNextPage(i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId)
n2.append([i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId])
self.pageQueue[0:0]=n2

def ThreadAdd(self, url, depth, renames, oldnames, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl):
#if self.Pages.count(url)>1:
Expand All @@ -468,16 +504,21 @@ def addPage(self, page):
self.epubtemp.extend(page.epubtemp)
self.temp.extend(page.temp)

#for j in range(1, page.epubtemp.count(page.depth)+1):
# self.epubtemp.replace(page.depth, self.pageIDIter+'.'+str(j), 1)
self.pageIDs.append(self.pageIDIter)
self.pageIDDict[page.depth]=self.pageIDIter
self.pageIDIter+=1

if page.children !=[]:
for zzz in range(0, len(page.children)):
#try:
while isinstance(page.children[zzz], str):
self.q.get()
#print('waiting for thread to finish')
#prepend child pages to the queue
self.pageQueue[0:0]=page.children


self.addPage(page.children[zzz])
#except AttributeError as E:
#print('Error after '+ str(self.depth))

class Page:

Expand Down Expand Up @@ -516,14 +557,14 @@ def __init__(self, url, depth, renames, oldnames, q, prevChapNum, prevLink, epub

def AddNextPage(self, url, depth):
#print(url)
page = Common.RequestPage(url)
page = Common.RequestPageChyoa(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})

if page is None:
print('Could not complete request for page: ' + url)
return None

soup=BeautifulSoup(page.content, 'html.parser')
self.author=(soup.find_all('a')[7].get_text())
self.author=(soup.find('p', class_='meta').find('a').get_text())
self.chapter=(soup.find('h1').get_text())


Expand Down
Loading