Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature 3.2 #79

Merged
merged 5 commits into from
Jul 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions Ebook-Publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
def MakeText(site):
if type(site) is not Nhentai.Nhentai:
published=open(wd+site.title+'.txt', 'w', encoding="utf-8")
published.write(site.title+'\n')
published.write('by '+site.author+'\n\n')
published.write(site.title+Common.lineEnding)
published.write('by '+site.author+Common.lineEnding)
published.write(site.story)
published.close()

Expand All @@ -54,11 +54,20 @@ def MakeHTML(site):
for i in range(len(site.rawstoryhtml)):
published.write('<p><a href="#Chapter '+str(i)+'">'+site.chapters[i]+'</a></p>\n')
elif not site.backwards:
j=0
for i in range(len(site.rawstoryhtml)):
if i!=0:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
if site.partial:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
else:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
else:
published.write('<p><a href="#Chapter '+str(i)+'">'+'1.1 '+site.chapters[i]+'</a></p>\n')
if site.partial:
j=site.partialStart
published.write('<p><a href="#Chapter '+str(i)+'">'+str(j)+'. '+site.chapters[i]+'</a></p>\n')
j+=1
else:
published.write('<p><a href="#Chapter '+str(i)+'">'+'1.1 '+site.chapters[i]+'</a></p>\n')
else:
for i in range(len(site.rawstoryhtml)):
published.write('<p><a href="#Chapter '+str(i)+'">'+site.chapters[i]+'</a></p>\n')
Expand Down Expand Up @@ -110,7 +119,10 @@ def MakeEpub(site):
if i == 0:
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
else:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
if not site.partial:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
else:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+str(site.epubrawstoryhtml[i])
elif type(site) is Nhentai.Nhentai:
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
Expand Down Expand Up @@ -221,6 +233,8 @@ def getCSS():
parser.add_argument('-n', '--no-duplicates', help='Skips stories if they have already been downloaded', action='store_true')
parser.add_argument('-s', '--css', '--style-sheet', help='either a CSS string or a .css file to use for formatting', default='')
parser.add_argument('--chyoa-force-forwards', help='Force Chyoa stories to be scraped forwards if not given page 1', action='store_true')
parser.add_argument('--eol', help='end of line character for .txt output format, must be enclosed in single quotes', default='\n\n')
parser.add_argument('--chyoa-update', help='Checks if story already exists in output directory, and skips it if it has not been updated on the server since file was created.', action='store_true')
args=parser.parse_args()

#print(args.output_type)
Expand All @@ -245,6 +259,10 @@ def getCSS():
if args.chyoa_force_forwards:
Common.chyoa_force_forwards=True

if args.chyoa_update:
Common.chyoaDupCheck=True

Common.lineEnding=args.eol.encode('latin-1', 'backslashreplace').decode('unicode-escape')

if args.directory is None:
wd='./'
Expand Down
62 changes: 47 additions & 15 deletions Site/Chyoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import queue
import copy
import urllib.parse
from datetime import datetime
lock = Lock()
lock2 = Lock()

Expand Down Expand Up @@ -43,12 +44,14 @@ def __init__(self, url):
self.images=[] #testing images
self.hasimages = False
self.duplicate = False
self.backwards = True
self.backwards = not Common.chyoa_force_forwards
self.depth = []
self.quiet = Common.quiet
self.epubnextpages = []
self.nextLinks=[]

self.partial = False
self.partialStart=1
self.ogUrl=self.url

page = Common.RequestPage(url)

Expand All @@ -66,12 +69,33 @@ def __init__(self, url):
except:
pass





elif not self.backwards:
self.partial = True


#get update timestamp:
if (self.backwards or self.partial) and Common.chyoaDupCheck:
date=soup.find('p', attrs={'class':'dates'}).strong.get_text()
#date='Jun 18, 2022'
timestamp=datetime.strptime(date, "%b %d, %Y")
#print(timestamp)
if not Common.CheckDuplicateTime(self.title, timestamp):
Common.prnt('Story not updated: '+self.url, f=True)
self.duplicate= True
return None

#check duplicate with timestamp

if Common.dup:
if Common.CheckDuplicate(self.title):
self.duplicate = True
return None

if self.backwards:
if self.backwards or self.partial:
self.authors.insert(0,soup.find_all('a')[7].get_text())
else:
self.authors.insert(0,soup.find_all('a')[5].get_text())
Expand All @@ -80,7 +104,8 @@ def __init__(self, url):

tmp=soup.find('p', attrs={'class': 'meta'}).get_text()
t=[s for s in tmp.split() if s.isdigit()]
self.length=int(t[0])
self.length=int(t[0])
self.partialStart=self.length


if soup.find('form', attrs={'id':'immersion-form'}) is not None:
Expand Down Expand Up @@ -138,9 +163,9 @@ def __init__(self, url):


#if soup.find('a').text.strip()==
self.backwards = False
#self.backwards = not Common.chyoa_force_forwards
for i in soup.find_all('a'):
if i.text.strip()=='Previous Chapter':
if i.text.strip()=='Previous Chapter' and self.backwards:
self.AddPrevPage(i.get('href'))
self.backwards = True
break
Expand All @@ -159,8 +184,11 @@ def __init__(self, url):
numChapters=numChapters.replace(',','')
try:
if not Common.mt:
self.pbar=Common.Progress(int(numChapters))
self.pbar.Update()
if self.partial:
print('Downloading an unknown number of pages')
else:
self.pbar=Common.Progress(int(numChapters))
self.pbar.Update()
except:
pass

Expand Down Expand Up @@ -191,14 +219,17 @@ def __init__(self, url):
self.Pages.extend(urls)
j=1
for u in urls:
if Common.mt:
if Common.mt and not self.partial:
chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])
firstLinkId=None
threading.Thread(target=self.ThreadAdd, args=(u, j, self.renames, self.oldnames, chapNum, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], firstLinkId, self.url), daemon=True).start() #TODO
else:
if Common.mt:
Common.prnt('Warning: Cannot multithread partial Chyoa story: '+self.url+'\nUsing default method to download an unknown number of pages')

self.AddNextPage(u, j, 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
j+=1
if Common.mt:
if Common.mt and not self.partial:
i = int(numChapters)-1
print("Pages to add: "+str(i))
while i >0:
Expand Down Expand Up @@ -272,7 +303,7 @@ def __init__(self, url):



self.story=self.story.replace('\n', '\n\n')
self.story=self.story.replace('\n', Common.lineEnding)

for i in range(0,len(self.truestoryhttml)):
self.rawstoryhtml[i]=BeautifulSoup(self.truestoryhttml[i], 'html.parser')
Expand Down Expand Up @@ -348,7 +379,8 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
temp='<div id="'+str(depth)+'">'+str(temp2)
self.questions.append(soup.find('header', attrs={'class':"question-header"}).get_text())
temp+='<h2>'+self.questions[-1]+'</h2>\n</div>'
#Common.prnt(str(depth))
if self.partial:
Common.prnt(str(depth))
j = 1

nextpages=[]
Expand Down Expand Up @@ -414,9 +446,9 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
self.AddNextPage(i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId)

def ThreadAdd(self, url, depth, renames, oldnames, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl):
if self.Pages.count(url)>1:
print("found issue at" + str(url))
return None
#if self.Pages.count(url)>1:
# print("found issue at" + str(url))
# return None
self.Pages[self.Pages.index(url)]=(Page(url, depth, renames, oldnames, self.q, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl))

def addPage(self, page):
Expand Down
35 changes: 31 additions & 4 deletions Site/Common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys, urllib, os, requests, time
from datetime import datetime

#Module contains common functions needed by sites

Expand All @@ -16,14 +17,16 @@

dup = False

chyoaDupCheck=False

chyoa_force_forwards=False

mt = False

urlDict= {}

def prnt(out, f=False):
if not quiet and not f:
if not quiet or f:
print(out)

def imageDL(title, url, num, size=0, pbar=None, queue=None):
Expand Down Expand Up @@ -52,13 +55,37 @@ def imageDL(title, url, num, size=0, pbar=None, queue=None):


def CheckDuplicate(title):
if opf == 'epub':
if any(x in ('epub', 'EPUB') for x in opf):
return os.path.isfile(wd+title+'.epub')
elif opf == 'txt':
elif any(x in ('txt', 'TXT') for x in opf):
return os.path.isfile(wd+title+'.txt') or os.path.exists(wd+title)
elif opf == 'html':
elif any(x in ('html', 'HTML') for x in opf):
return os.path.isfile(wd+title+'.html') or os.path.exists(wd+title)

def CheckDuplicateTime(title, timeObject):
if any(x in ('epub', 'EPUB') for x in opf):
if os.path.isfile(wd+title+'.epub'):
#print(time.ctime(os.path.getmtime(wd+title+'.epub')))
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.epub')), '%a %b %d %H:%M:%S %Y'):
return True
elif any(x in ('txt', 'TXT') for x in opf):
if os.path.isfile(wd+title+'.txt'):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.txt')), '%a %b %d %H:%M:%S %Y'):
return True
elif os.path.exists(wd+title):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'):
return True

elif any(x in ('html', 'HTML') for x in opf):
if os.path.isfile(wd+title+'.html'):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.html')), '%a %b %d %H:%M:%S %Y'):
return True
elif os.path.exists(wd+title):
#print(datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'))
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'):
return True
return False


def GetImage(url):
try:
Expand Down
2 changes: 1 addition & 1 deletion Site/Wattpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, url):
for i in range(0, len(self.rawstoryhtml)):
self.story=self.story+self.chapters[i]+'\n'
self.story=self.story+self.rawstoryhtml[i].get_text()
self.story=self.story.replace('\n', '\n\n')
self.story=self.story.replace('\n', Common.lineEnding)

def addNextPage(self, url):
soup=BeautifulSoup(self.requestPage(url).content, 'html.parser')
Expand Down