Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Htmldev #41

Merged
merged 5 commits into from
Dec 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 49 additions & 28 deletions Ebook-Publisher.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import queue
import shutil
from zipfile import ZipFile
from time import sleep

#Master dict of supported sites
sites={
Expand All @@ -36,22 +37,29 @@ def MakeText(site):
published.write('by '+site.author+'\n\n')
published.write(site.story)
published.close()
'''else:
if site.hasimages == True:
if not os.path.exists(wd+site.title):
os.makedirs(wd+site.title)
i = 1
zeros = '0' * (len(str(len(site.images)))-1)
print(zeros)
for url in site.images:
if i > 9:
zeros='0'
elif i > 99:
zeros = ''
with open(wd+site.title+'/'+zeros+str(i)+'.jpg', 'wb') as myimg:
myimg.write(GetImage(url))
i=i+1
'''

def MakeHTML(site):
if (type(site) is Chyoa.Chyoa or type(site) is Nhentai.Nhentai) and site.hasimages:
published=open(wd+site.title+'/'+site.title+'.html', 'w')
else:
published=open(wd+site.title+'.html', 'w')
published.write('<!DOCTYPE html>\n')
published.write('<html lang="en">\n')
published.write('<head>\n<title>'+site.title+' by '+site.author+'</title>\n</head>\n')
published.write('<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'>'+site.url+'</a>\n')
for i in range(len(site.rawstoryhtml)):
if type(site) is Chyoa.Chyoa:
published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.truestoryhttml[i])
elif type(site) is Nhentai.Nhentai:
published.write(site.truestoryhttml[i])
elif type(site) is Literotica.Literotica:
published.write(site.storyhtml)
else:
published.write('<h2>\n'+site.chapters[i]+'\n</h2>\n'+site.rawstoryhtml[i].prettify())
published.write('</html>')


published.close()

def GetImage(url):
req = urllib.request.Request(url, headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})
Expand All @@ -63,7 +71,7 @@ def MakeEpub(site):
book=epub.EpubBook()
book.set_identifier(site.url)
titlepage=epub.EpubHtml(title='Title Page', file_name='Title.xhtml', lang='en')
titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href=\'url\'>'+site.url+'</a>'
titlepage.content='<h1>'+site.title+'</h1><h3>by '+site.author+'</h3><br /><a href='+site.url+'</a>'
#add summary information
try:
titlepage.content+='<br /><p>'+site.summary+'</p>'
Expand Down Expand Up @@ -116,7 +124,14 @@ def MakeEpub(site):
with ZipFile(wd+site.title+'.epub', 'a') as myfile:
i=1
for url in site.images:
with myfile.open('EPUB/img'+str(i)+'.jpg', 'w') as myimg:
zeros = '0' * (len(str(site.isize))-1)
if len(zeros)>1 and i > 9:
zeros='0'
elif len(zeros)==1 and i > 9:
zeros = ''
if i > 99:
zeros = ''
with myfile.open('EPUB/'+zeros+str(i)+'.jpg', 'w') as myimg:
myimg.write(GetImage(url))
i=i+1

Expand All @@ -134,6 +149,8 @@ def MakeClass(url):
if ftype=='epub':
#for site in s:
MakeEpub(site)
elif ftype=='html':
MakeHTML(site)
else:
#for site in s:
MakeText(site)
Expand All @@ -143,7 +160,7 @@ def MakeClass(url):
#setting up commandline argument parser
parser=argparse.ArgumentParser()
parser.add_argument('url', help='The URL of the story you want', nargs='?')
parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub'])
parser.add_argument('-o','--output-type', help='The file type you want', choices=['txt', 'epub', 'html'], default='txt')
parser.add_argument('-f','--file', help="Use text file containing a list of URLs instead of single URL", action='store_true')
parser.add_argument('-d','--directory', help="Directory to place output files. Default ./")
parser.add_argument('-q','--quiet', help="Turns off most terminal output", action='store_true')
Expand Down Expand Up @@ -172,8 +189,7 @@ def MakeClass(url):
wd=args.directory
Common.wd = wd

if args.output_type == 'epub':
Common.opf = 'epub'
Common.opf = args.output_type

cwd=os.getcwd()
#TODO should use non-relative path
Expand All @@ -199,19 +215,24 @@ def MakeClass(url):
#the multithreaded variant
if args.t:
lock = threading.Lock()
threads = 0
for i in urls:
t=threading.Thread(target=MakeClass, args=(i,), daemon=True)
t.start()
siteThreads = threading.active_count()
while siteThreads>1:
threads +=1
#siteThreads = threading.active_count()
while threads>0:
s=q.get()
siteThreads-=1

#threading.active_count()-=1
#sleep(.01)
threads -=1
else:
for i in urls:
#site=MakeClass(i)
if ftype=='epub':
MakeEpub(MakeClass(i))
elif ftype=='html':
MakeHTML(MakeClass(i))
else:
MakeText(MakeClass(i))

Expand All @@ -223,9 +244,9 @@ def MakeClass(url):
sys.exit()
if ftype=='epub':
MakeEpub(site)
elif ftype=='html':
MakeHTML(site)
else:
MakeText(site)
while threading.active_count()>1:
pass
#if type(site) is Nhentai.Nhentai and site.pbar is not None:
#site.pbar.End()
sleep(.01)
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Ebook-Publisher
A Python tool for converting online stories into portable formats

**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB formatted output files.**
**Download Ebook-Publisher by cloning the git repository `git clone https://github.com/theslavicbear/Ebook-Publisher.git` or downloading the zip of the latest release (generally more stable, as I tend to push directly to master) and running the Ebook-Publisher.py file. At a minimum, you must supply one URL from a supported site as a command line argument. With no other options, you will receive a text file with the story contents. Please see the below help message for a list of possible options to improve your experience, e.g. multiple URL inputs, concurrent downloads, and/or EPUB/HTML formatted output files.**

Ebook-Publisher is my pet project, and the project that I currently have spent the most time and effort on. As such, I welcome criticism, requests for improvement, and bug reports. Please open an issue for any of the preceding.

Expand All @@ -17,6 +17,7 @@ Ebook-Publisher is my pet project, and the project that I currently have spent t
## Currently supported file types:
* plain text files
* epub ebook files
* html files (For nhentai galleries, it will have the same output as with default/.txt output, but with an html file in the folder that easily allows viewing the gallery via a web browser. Chyoa stories don't currently grab images yet)

Want more sites supported? Open an Issue and ask for its support or add support for the site yourself!

Expand Down
7 changes: 4 additions & 3 deletions Site/Chyoa.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,9 @@ def __init__(self, url):
simg['src']='img'+str(len(self.images))+'.jpg'
self.hasimages = True


temp=str(soup.find('div', attrs={'class': 'chapter-content'}))





self.questions.insert(0, soup.find_all('h2')[1].get_text())
temp+='<h2>'+self.questions[0]+'</h2>'
Expand Down Expand Up @@ -147,6 +144,10 @@ def __init__(self, url):
self.truestoryhttml[i]=self.truestoryhttml[i].replace(' </span>\n ', '</span> ')

self.story=self.story.replace('\n', '\n\n')

if Common.images and self.hasimages and Common.opf=='html':
for i in range(0,len(self.images)):
Common.imageDL(self.title, self.images[i], i+1, size=len(self.images))
#print(self.story)
#print(self.truestoryhttml[len(self.truestoryhttml)-1])
#for i in range(len(self.renames)):
Expand Down
8 changes: 6 additions & 2 deletions Site/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def prnt(out, f=False):
if not quiet and not f:
print(out)

def imageDL(title, url, size, num, pbar):
def imageDL(title, url, num, size=0, pbar=None):
if not os.path.exists(wd+title):
os.makedirs(wd+title)
zeros = '0' * (len(str(size))-1)
Expand All @@ -25,10 +25,14 @@ def imageDL(title, url, size, num, pbar):
zeros = ''
if num > 99:
zeros = ''
if pbar is None:
zeros = 'img' #TODO fix this for Chyoa stories so that image files don't have to be prepended with 'img' and no zeros
#print(zeros)
with open(wd+title+'/'+zeros+str(num)+'.jpg', 'wb') as myimg:
myimg.write(GetImage(url))
pbar.Update()
if pbar is not None:
pbar.Update()
#queue.put()


def GetImage(url):
Expand Down
23 changes: 17 additions & 6 deletions Site/Nhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from Site import Common
from time import sleep
import threading
#import queue

class Nhentai:

Expand All @@ -23,6 +24,7 @@ def __init__(self, url):
self.images=[] #testing images
self.hasimages = True
self.isize=0
#self.q = queue.Queue()
try:
page=requests.get(self.url)
except:
Expand All @@ -36,9 +38,10 @@ def __init__(self, url):


self.truestoryhttml.append('')
if Common.opf=='txt':
self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))

if Common.opf in ('html','txt'):

self.isize=len(soup.find_all('a', attrs={'rel':'nofollow'}))
self.pbar = Common.Progress(self.isize)
for i in soup.find_all('a', attrs={'rel':'nofollow'}):
#print(i.get('rel'))
Expand Down Expand Up @@ -66,10 +69,18 @@ def AddPage(self, url):
except:
print('Error in: '+url)
#print(soup.prettify())
if Common.opf != 'txt':
self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="img'+str(len(self.images))+'.jpg" /></p>'
else:
t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, self.isize, len(self.images), self.pbar), daemon=True)
if Common.opf in ('epub', 'html'):
zeros = '0' * (len(str(self.isize))-1)
num = len(self.images)
if len(zeros)>1 and num > 9:
zeros='0'
elif len(zeros)==1 and num > 9:
zeros = ''
if num > 99:
zeros = ''
self.truestoryhttml[0]=self.truestoryhttml[0]+'<p><img src="'+zeros+str(num)+'.jpg" /></p>\n'
if Common.opf in ('txt', 'html'):
t=threading.Thread(target=Common.imageDL, args=(self.title, thisimage, len(self.images), self.isize, self.pbar), daemon=False)
t.start()
#Common.imageDL(self.title, thisimage, self.isize, len(self.images))
#self.pbar.Update()
Expand Down