Skip to content

Commit

Permalink
Merge pull request #41 from theslavicbear/HTMLdev
Browse files Browse the repository at this point in the history
Htmldev
  • Loading branch information
theslavicbear authored Dec 25, 2019
2 parents 78234ea + bf71aa8 commit 10886f8
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 40 deletions.
77 changes: 49 additions & 28 deletions Ebook-Publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import queue
import shutil
from zipfile import ZipFile
from time import sleep

#Master dict of supported sites
sites={
Expand All @@ -36,22 +37,29 @@ def MakeText(site):
published.write('by '+site.author+'\n\n')
published.write(site.story)
published.close()
'''else:
if site.hasimages == True:
if not os.path.exists(wd+site.title):
os.makedirs(wd+site.title)
i = 1
zeros = '0' * (len(str(len(site.images)))-1)
print(zeros)
for url in site.images:
if i > 9:
zeros='0'
elif i > 99:
zeros = ''
with open(wd+site.title+'/'+zeros+str(i)+'.jpg', 'wb') as myimg:
myimg.write(GetImage(url))
i=i+1
'''

def MakeHTML(site):
if (type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai) and site.hasimages:
published=open(wd+site.title+'/'+site.title+'.html', 'w')
else:
published=open(wd+site.title+'.html', 'w')
published.write('<!DOCTYPE html>\n')
published.write('<html lang="en">\n')
published.write('<head>\n<title>'+site.title+' by '+site.author+'</title>\n</head>\n')
published.write('<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'>'+site.url+'</a>\n')
for i in range(len(site.rawstoryhtml)):
if type(site) is Chyoa.Chyoa:
published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.truestoryhttml[i])
elif type(site) is Nhentai.Nhentai:
published.write(site.truestoryhttml[i])
elif type(site) is Literotica.Literotica:
published.write(site.storyhtml)
else:
published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.rawstoryhtml[i].prettify())
published.write('</html>')


published.close()

def GetImage(url):
req = urllib.request.Request(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
Expand All @@ -63,7 +71,7 @@ def MakeEpub(site):
book=epub.EpubBook()
book.set_identifier(site.url)
titlepage=epub.EpubHtml(title='Title Page', file_name='Title.xhtml', lang='en')
titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href=\'url\'>'+site.url+'</a>'
titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'</a>'
#add summary information
try:
titlepage.content+='<br /><p>'+site.summary+'</p>'
Expand Down Expand Up @@ -116,7 +124,14 @@ def MakeEpub(site):
with ZipFile(wd+site.title+'.epub', 'a') as myfile:
i=1
for url in site.images:
with myfile.open('EPUB/img'+str(i)+'.jpg', 'w') as myimg:
zeros = '0' * (len(str(site.isize))-1)
if len(zeros)>1 and i > 9:
zeros='0'
elif len(zeros)==1 and i > 9:
zeros = ''
if i > 99:
zeros = ''
with myfile.open('EPUB/'+zeros+str(i)+'.jpg', 'w') as myimg:
myimg.write(GetImage(url))
i=i+1

Expand All @@ -134,6 +149,8 @@ def MakeClass(url):
if ftype=='epub':
#for site in s:
MakeEpub(site)
elif ftype=='html':
MakeHTML(site)
else:
#for site in s:
MakeText(site)
Expand All @@ -143,7 +160,7 @@ def MakeClass(url):
#setting up commandline argument parser
parser=argparse.ArgumentParser()
parser.add_argument('url', help='The URL of the story you want', nargs='?')
parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub'])
parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub', 'html'], default='txt')
parser.add_argument('-f','--file', help="Use text file containing a list of URLs instead of single URL", action='store_true')
parser.add_argument('-d','--directory', help="Directory to place output files. Default ./")
parser.add_argument('-q','--quiet', help="Turns off most terminal output", action='store_true')
Expand Down Expand Up @@ -172,8 +189,7 @@ def MakeClass(url):
wd=args.directory
Common.wd = wd

if args.output_type == 'epub':
Common.opf = 'epub'
Common.opf = args.output_type

cwd=os.getcwd()
#TODO should use non-relative path
Expand All @@ -199,19 +215,24 @@ def MakeClass(url):
#the multithreaded variant
if args.t:
lock = threading.Lock()
threads = 0
for i in urls:
t=threading.Thread(target=MakeClass, args=(i,), daemon=True)
t.start()
siteThreads = threading.active_count()
while siteThreads>1:
threads +=1
#siteThreads = threading.active_count()
while threads>0:
s=q.get()
siteThreads-=1

#threading.active_count()-=1
#sleep(.01)
threads -=1
else:
for i in urls:
#site=MakeClass(i)
if ftype=='epub':
MakeEpub(MakeClass(i))
elif ftype=='html':
MakeHTML(MakeClass(i))
else:
MakeText(MakeClass(i))

Expand All @@ -223,9 +244,9 @@ def MakeClass(url):
sys.exit()
if ftype=='epub':
MakeEpub(site)
elif ftype=='html':
MakeHTML(site)
else:
MakeText(site)
while threading.active_count()>1:
pass
#if type(site) is Nhentai.Nhentai and site.pbar is not None:
#site.pbar.End()
sleep(.01)
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Ebook-Publisher
A Python tool for converting online stories into portable formats

**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB formatted output files.**
**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB/HTML formatted output files.**

Ebook-Publisher is my pet project, and the project that I currently have spent the most time and effort on. As such, I welcome criticism, requests for improvement, and bug reports. Please open an issue for any of the preceding.

Expand All @@ -17,6 +17,7 @@ Ebook-Publisher is my pet project, and the project that I currently have spent t
## Currently supported file types:
* plain text files
* epub ebook files
* html files (For nhentai galleries, it will have the same output as with default/.txt output, but with an html file in the folder that easily allows viewing the gallery via a web browser. Chyoa stories don't currently grab images yet)

Want more sites supported? Open an Issue and ask for its support or add support for the site yourself!

Expand Down
7 changes: 4 additions & 3 deletions Site/Chyoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,9 @@ def __init__(self, url):
simg['src']='img'+str(len(self.images))+'.jpg'
self.hasimages = True


temp=str(soup.find('div', attrs={'class': 'chapter-content'}))





self.questions.insert(0, soup.find_all('h2')[1].get_text())
temp+='<h2>'+self.questions[0]+'</h2>'
Expand Down Expand Up @@ -147,6 +144,10 @@ def __init__(self, url):
self.truestoryhttml[i]=self.truestoryhttml[i].replace(' </span>\n ', '</span> ')

self.story=self.story.replace('\n', '\n\n')

if Common.images and self.hasimages and Common.opf=='html':
for i in range(0,len(self.images)):
Common.imageDL(self.title, self.images[i], i+1, size=len(self.images))
#print(self.story)
#print(self.truestoryhttml[len(self.truestoryhttml)-1])
#for i in range(len(self.renames)):
Expand Down
8 changes: 6 additions & 2 deletions Site/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def prnt(out, f=False):
if not quiet and not f:
print(out)

def imageDL(title, url, size, num, pbar):
def imageDL(title, url, num, size=0, pbar=None):
if not os.path.exists(wd+title):
os.makedirs(wd+title)
zeros = '0' * (len(str(size))-1)
Expand All @@ -25,10 +25,14 @@ def imageDL(title, url, size, num, pbar):
zeros = ''
if num > 99:
zeros = ''
if pbar is None:
zeros = 'img' #TODO fix this for Chyoa stories so that image files don't have to be prepended with 'img' and no zeros
#print(zeros)
with open(wd+title+'/'+zeros+str(num)+'.jpg', 'wb') as myimg:
myimg.write(GetImage(url))
pbar.Update()
if pbar is not None:
pbar.Update()
#queue.put()


def GetImage(url):
Expand Down
23 changes: 17 additions & 6 deletions Site/Nhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from Site import Common
from time import sleep
import threading
#import queue

class Nhentai:

Expand All @@ -23,6 +24,7 @@ def __init__(self, url):
self.images=[] #testing images
self.hasimages = True
self.isize=0
#self.q = queue.Queue()
try:
page=requests.get(self.url)
except:
Expand All @@ -36,9 +38,10 @@ def __init__(self, url):


self.truestoryhttml.append('')
if Common.opf=='txt':
self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))

if Common.opf in ('html','txt'):

self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))
self.pbar = Common.Progress(self.isize)
for i in soup.find_all('a', attrs={'rel':'nofollow'}):
#print(i.get('rel'))
Expand Down Expand Up @@ -66,10 +69,18 @@ def AddPage(self, url):
except:
print('Error in: '+url)
#print(soup.prettify())
if Common.opf != 'txt':
self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="img'+str(len(self.images))+'.jpg" /></p>'
else:
t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, self.isize, len(self.images), self.pbar), daemon=True)
if Common.opf in ('epub', 'html'):
zeros = '0' * (len(str(self.isize))-1)
num = len(self.images)
if len(zeros)>1 and num > 9:
zeros='0'
elif len(zeros)==1 and num > 9:
zeros = ''
if num > 99:
zeros = ''
self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="'+zeros+str(num)+'.jpg" /></p>\n'
if Common.opf in ('txt', 'html'):
t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, len(self.images), self.isize, self.pbar), daemon=False)
t.start()
#Common.imageDL(self.title, thisimage, self.isize, len(self.images))
#self.pbar.Update()
Expand Down

0 comments on commit 10886f8

Please sign in to comment.