Skip to content

Commit

Permalink
Merge pull request #79 from theslavicbear/feature-3.2
Browse files Browse the repository at this point in the history
Feature 3.2
  • Loading branch information
theslavicbear authored Jul 18, 2021
2 parents d21b1ab + 4c74d69 commit 8ea2183
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 25 deletions.
28 changes: 23 additions & 5 deletions Ebook-Publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
def MakeText(site):
if type(site) is not Nhentai.Nhentai:
published=open(wd+site.title+'.txt', 'w', encoding="utf-8")
published.write(site.title+'\n')
published.write('by '+site.author+'\n\n')
published.write(site.title+Common.lineEnding)
published.write('by '+site.author+Common.lineEnding)
published.write(site.story)
published.close()

Expand All @@ -54,11 +54,20 @@ def MakeHTML(site):
for i in range(len(site.rawstoryhtml)):
published.write('<p><a href="#Chapter '+str(i)+'">'+site.chapters[i]+'</a></p>\n')
elif not site.backwards:
j=0
for i in range(len(site.rawstoryhtml)):
if i!=0:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
if site.partial:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
else:
published.write('<p><a href="#'+str(site.depth[i-1])+'">'+str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]+'</a></p>\n')
else:
published.write('<p><a href="#Chapter '+str(i)+'">'+'1.1 '+site.chapters[i]+'</a></p>\n')
if site.partial:
j=site.partialStart
published.write('<p><a href="#Chapter '+str(i)+'">'+str(j)+'. '+site.chapters[i]+'</a></p>\n')
j+=1
else:
published.write('<p><a href="#Chapter '+str(i)+'">'+'1.1 '+site.chapters[i]+'</a></p>\n')
else:
for i in range(len(site.rawstoryhtml)):
published.write('<p><a href="#Chapter '+str(i)+'">'+site.chapters[i]+'</a></p>\n')
Expand Down Expand Up @@ -110,7 +119,10 @@ def MakeEpub(site):
if i == 0:
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
else:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
if not site.partial:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((len(site.depth[i-1])/2)+2))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
else:
c.append(epub.EpubHtml(title=site.chapters[i], file_name=str(site.depth[i-1])+'.xhtml', lang='en', tocTitle=str(' _'*int((len(site.depth[i-1])/2)+1))+' '+str(int((site.partialStart+len(site.depth[i-1])/2)+1))+'.'+site.depth[i-1].split('.')[-1]+' '+site.chapters[i]))
c[i].content='<h2>\n'+site.chapters[i]+'\n</h2>\n'+str(site.epubrawstoryhtml[i])
elif type(site) is Nhentai.Nhentai:
c.append(epub.EpubHtml(title=site.chapters[i], file_name='Chapter '+str(i+1)+'.xhtml', lang='en'))
Expand Down Expand Up @@ -221,6 +233,8 @@ def getCSS():
parser.add_argument('-n', '--no-duplicates', help='Skips stories if they have already been downloaded', action='store_true')
parser.add_argument('-s', '--css', '--style-sheet', help='either a CSS string or a .css file to use for formatting', default='')
parser.add_argument('--chyoa-force-forwards', help='Force Chyoa stories to be scraped forwards if not given page 1', action='store_true')
parser.add_argument('--eol', help='end of line character for .txt output format, must be enclosed in single quotes', default='\n\n')
parser.add_argument('--chyoa-update', help='Checks if story already exists in output directory, and skips it if it has not been updated on the server since file was created.', action='store_true')
args=parser.parse_args()

#print(args.output_type)
Expand All @@ -245,6 +259,10 @@ def getCSS():
if args.chyoa_force_forwards:
Common.chyoa_force_forwards=True

if args.chyoa_update:
Common.chyoaDupCheck=True

Common.lineEnding=args.eol.encode('latin-1', 'backslashreplace').decode('unicode-escape')

if args.directory is None:
wd='./'
Expand Down
62 changes: 47 additions & 15 deletions Site/Chyoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import queue
import copy
import urllib.parse
from datetime import datetime
lock = Lock()
lock2 = Lock()

Expand Down Expand Up @@ -43,12 +44,14 @@ def __init__(self, url):
self.images=[] #testing images
self.hasimages = False
self.duplicate = False
self.backwards = True
self.backwards = not Common.chyoa_force_forwards
self.depth = []
self.quiet = Common.quiet
self.epubnextpages = []
self.nextLinks=[]

self.partial = False
self.partialStart=1
self.ogUrl=self.url

page = Common.RequestPage(url)

Expand All @@ -66,12 +69,33 @@ def __init__(self, url):
except:
pass





elif not self.backwards:
self.partial = True


#get update timestamp:
if (self.backwards or self.partial) and Common.chyoaDupCheck:
date=soup.find('p', attrs={'class':'dates'}).strong.get_text()
#date='Jun 18, 2022'
timestamp=datetime.strptime(date, "%b %d, %Y")
#print(timestamp)
if not Common.CheckDuplicateTime(self.title, timestamp):
Common.prnt('Story not updated: '+self.url, f=True)
self.duplicate= True
return None

#check duplicate with timestamp

if Common.dup:
if Common.CheckDuplicate(self.title):
self.duplicate = True
return None

if self.backwards:
if self.backwards or self.partial:
self.authors.insert(0,soup.find_all('a')[7].get_text())
else:
self.authors.insert(0,soup.find_all('a')[5].get_text())
Expand All @@ -80,7 +104,8 @@ def __init__(self, url):

tmp=soup.find('p', attrs={'class': 'meta'}).get_text()
t=[s for s in tmp.split() if s.isdigit()]
self.length=int(t[0])
self.length=int(t[0])
self.partialStart=self.length


if soup.find('form', attrs={'id':'immersion-form'}) is not None:
Expand Down Expand Up @@ -138,9 +163,9 @@ def __init__(self, url):


#if soup.find('a').text.strip()==
self.backwards = False
#self.backwards = not Common.chyoa_force_forwards
for i in soup.find_all('a'):
if i.text.strip()=='Previous Chapter':
if i.text.strip()=='Previous Chapter' and self.backwards:
self.AddPrevPage(i.get('href'))
self.backwards = True
break
Expand All @@ -159,8 +184,11 @@ def __init__(self, url):
numChapters=numChapters.replace(',','')
try:
if not Common.mt:
self.pbar=Common.Progress(int(numChapters))
self.pbar.Update()
if self.partial:
print('Downloading an unknown number of pages')
else:
self.pbar=Common.Progress(int(numChapters))
self.pbar.Update()
except:
pass

Expand Down Expand Up @@ -191,14 +219,17 @@ def __init__(self, url):
self.Pages.extend(urls)
j=1
for u in urls:
if Common.mt:
if Common.mt and not self.partial:
chapNum = int(soup.find('p', attrs={'class':'meta'}).get_text().split()[1])
firstLinkId=None
threading.Thread(target=self.ThreadAdd, args=(u, j, self.renames, self.oldnames, chapNum, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], firstLinkId, self.url), daemon=True).start() #TODO
else:
if Common.mt:
Common.prnt('Warning: Cannot multithread partial Chyoa story: '+self.url+'\nUsing default method to download an unknown number of pages')

self.AddNextPage(u, j, 1, '<a href="#Chapter 0">Previous Chapter</a>\n<br />', '\n<a href="'+'Chapter 1'+'.xhtml">'+'Previous Chapter'+'</a>\n<br />', self.nextLinks[j-1], None)
j+=1
if Common.mt:
if Common.mt and not self.partial:
i = int(numChapters)-1
print("Pages to add: "+str(i))
while i >0:
Expand Down Expand Up @@ -272,7 +303,7 @@ def __init__(self, url):



self.story=self.story.replace('\n', '\n\n')
self.story=self.story.replace('\n', Common.lineEnding)

for i in range(0,len(self.truestoryhttml)):
self.rawstoryhtml[i]=BeautifulSoup(self.truestoryhttml[i], 'html.parser')
Expand Down Expand Up @@ -348,7 +379,8 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
temp='<div id="'+str(depth)+'">'+str(temp2)
self.questions.append(soup.find('header', attrs={'class':"question-header"}).get_text())
temp+='<h2>'+self.questions[-1]+'</h2>\n</div>'
#Common.prnt(str(depth))
if self.partial:
Common.prnt(str(depth))
j = 1

nextpages=[]
Expand Down Expand Up @@ -414,9 +446,9 @@ def AddNextPage(self, url, depth, prevChapNum, prevLink, epubPrevLink, currLink,
self.AddNextPage(i.get('href'), str(depth)+'.'+str(j), chapNum, currLink, epubCurrLink, nextLink, currLinkId)

def ThreadAdd(self, url, depth, renames, oldnames, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl):
if self.Pages.count(url)>1:
print("found issue at" + str(url))
return None
#if self.Pages.count(url)>1:
# print("found issue at" + str(url))
# return None
self.Pages[self.Pages.index(url)]=(Page(url, depth, renames, oldnames, self.q, chapNum, currLink, epubCurrLink, nextLink, currLinkId, ogUrl))

def addPage(self, page):
Expand Down
35 changes: 31 additions & 4 deletions Site/Common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys, urllib, os, requests, time
from datetime import datetime

#Module contains common functions needed by sites

Expand All @@ -16,14 +17,16 @@

dup = False

chyoaDupCheck=False

chyoa_force_forwards=False

mt = False

urlDict= {}

def prnt(out, f=False):
if not quiet and not f:
if not quiet or f:
print(out)

def imageDL(title, url, num, size=0, pbar=None, queue=None):
Expand Down Expand Up @@ -52,13 +55,37 @@ def imageDL(title, url, num, size=0, pbar=None, queue=None):


def CheckDuplicate(title):
if opf == 'epub':
if any(x in ('epub', 'EPUB') for x in opf):
return os.path.isfile(wd+title+'.epub')
elif opf == 'txt':
elif any(x in ('txt', 'TXT') for x in opf):
return os.path.isfile(wd+title+'.txt') or os.path.exists(wd+title)
elif opf == 'html':
elif any(x in ('html', 'HTML') for x in opf):
return os.path.isfile(wd+title+'.html') or os.path.exists(wd+title)

def CheckDuplicateTime(title, timeObject):
if any(x in ('epub', 'EPUB') for x in opf):
if os.path.isfile(wd+title+'.epub'):
#print(time.ctime(os.path.getmtime(wd+title+'.epub')))
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.epub')), '%a %b %d %H:%M:%S %Y'):
return True
elif any(x in ('txt', 'TXT') for x in opf):
if os.path.isfile(wd+title+'.txt'):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.txt')), '%a %b %d %H:%M:%S %Y'):
return True
elif os.path.exists(wd+title):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'):
return True

elif any(x in ('html', 'HTML') for x in opf):
if os.path.isfile(wd+title+'.html'):
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title+'.html')), '%a %b %d %H:%M:%S %Y'):
return True
elif os.path.exists(wd+title):
#print(datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'))
if timeObject > datetime.strptime(time.ctime(os.path.getmtime(wd+title)), '%a %b %d %H:%M:%S %Y'):
return True
return False


def GetImage(url):
try:
Expand Down
2 changes: 1 addition & 1 deletion Site/Wattpad.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, url):
for i in range(0, len(self.rawstoryhtml)):
self.story=self.story+self.chapters[i]+'\n'
self.story=self.story+self.rawstoryhtml[i].get_text()
self.story=self.story.replace('\n', '\n\n')
self.story=self.story.replace('\n', Common.lineEnding)

def addNextPage(self, url):
soup=BeautifulSoup(self.requestPage(url).content, 'html.parser')
Expand Down

0 comments on commit 8ea2183

Please sign in to comment.