Skip to content
This repository has been archived by the owner on Dec 3, 2019. It is now read-only.

MEDUSA_URL_PREFIX, and exception handling + logging fixes when running in multiprocessing #13

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
de23099
Add support for rendering with a URL prefix
hyperair Jan 14, 2014
2360905
Call Renderer.get_paths() before set_script_prefix
hyperair Jan 14, 2014
d2644f8
Reduce chunksize to 1 when using multiprocessing
hyperair Feb 17, 2014
378adc9
Reduce chunksize to 1 for other renderers as well, not just disk
hyperair Feb 18, 2014
e75e35b
Handle cases when MEDUSA_URL_PREFIX isn't set
hyperair Feb 20, 2014
507503b
Move batch-processing/generate logic into BaseStaticSiteRenderer
hyperair Feb 20, 2014
f9a90c1
Move some of the render_path() logic into BaseStaticSiteRenderer
hyperair Feb 21, 2014
4b820ed
Factor directory index file name derivation into separate method
hyperair Feb 24, 2014
270347e
Move _s3_render_path into S3StaticSiteRenderer.render_path
hyperair Feb 26, 2014
276caa8
Move _gae_render_path into GAEStaticSiteRenderer
hyperair Feb 26, 2014
2c80b88
Fix up typo in variable name
hyperair Feb 26, 2014
fc3bd4b
Initialize BaseStaticSiteRenderer.client to None
hyperair Feb 26, 2014
6d5eac3
Fix missing import of os
hyperair Feb 26, 2014
5c78ff1
Fix NameError: global name 'mime' is not defined
hyperair Feb 27, 2014
41a04df
Fix missing mimetypes import
hyperair Feb 27, 2014
af569b0
Fix missing mimetype name in appengine.py
hyperair Feb 27, 2014
dba06d6
Forward render_path return values so they can be properly reaped by .…
hyperair Feb 27, 2014
0e99a28
Use QueueHandler from logutils when logging in a multiprocessing context
hyperair Feb 28, 2014
1027477
Move all log setup code into separate submodule
hyperair Mar 3, 2014
73def02
Replace all print-based logging with proper logging calls
hyperair Mar 3, 2014
30c9379
Don't use super() with logging.Handler
hyperair Mar 3, 2014
65d6154
Fix finalize_logger handling
hyperair Mar 3, 2014
9e3f795
Initialize and finalize logger on BaseStaticSiteRenderer
hyperair Mar 4, 2014
13851b7
Use logger in get_static_renderers for logging
hyperair Mar 6, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions django_medusa/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from django.conf import settings
import logging


class ProxyLogHandler(logging.Handler):
def __init__(self, logger):
logging.Handler.__init__(self)
self.__logger = logger

def emit(self, record):
self.__logger.handle(record)

listener = None

def get_logger():
if not settings.MEDUSA_MULTITHREAD:
return get_base_logger()

from logutils.queue import QueueHandler, QueueListener
from multiprocessing import Queue

mplogger = logging.getLogger(__name__ + '.__multiprocessing__')
if not getattr(mplogger, 'setup_done', False):
base = get_base_logger()
logqueue = Queue()

mplogger.setLevel(logging.DEBUG)
mplogger.addHandler(QueueHandler(logqueue))
mplogger.setup_done = True
mplogger.propagate = False

global listener
listener = QueueListener(logqueue, ProxyLogHandler(get_base_logger()))
listener.start()

return mplogger

def finalize_logger():
global listener

if listener is not None:
mplogger = get_logger()
mplogger.setup_done = False
listener.stop()
listener = None

def get_base_logger():
return logging.getLogger(__name__)
18 changes: 15 additions & 3 deletions django_medusa/management/commands/staticsitegen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from django.conf import settings
from django.core.management.base import BaseCommand
from django.core.urlresolvers import set_script_prefix
from django_medusa.renderers import StaticSiteRenderer
from django_medusa.utils import get_static_renderers

Expand All @@ -12,8 +14,18 @@ class Command(BaseCommand):
def handle(self, *args, **options):
StaticSiteRenderer.initialize_output()

for Renderer in get_static_renderers():
r = Renderer()
r.generate()
renderers = [Renderer() for Renderer in get_static_renderers()]
for renderer in renderers:
# memoize this first to avoid script prefix pollution
renderer.paths

# Set script prefix here
url_prefix = getattr(settings, 'MEDUSA_URL_PREFIX', None)
if url_prefix is not None:
set_script_prefix(url_prefix)

# And now generate stuff
for renderer in renderers:
renderer.generate()

StaticSiteRenderer.finalize_output()
120 changes: 45 additions & 75 deletions django_medusa/renderers/appengine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import print_function
from django.conf import settings
from django.test.client import Client
from ..log import get_logger
from .base import BaseStaticSiteRenderer
import os

Expand All @@ -10,55 +11,53 @@
'htm', 'html', 'css', 'xml', 'json', 'js', 'yaml', 'txt'
)


# Unfortunately split out from the class at the moment to allow rendering with
# several processes via `multiprocessing`.
# TODO: re-implement within the class if possible?
def _gae_render_path(args):
client, path, view = args
if not client:
client = Client()
if path:
DEPLOY_DIR = settings.MEDUSA_DEPLOY_DIR
realpath = path
if path.startswith("/"):
realpath = realpath[1:]

if path.endswith("/"):
needs_ext = True
else:
needs_ext = False

output_dir = os.path.abspath(os.path.join(
DEPLOY_DIR,
"deploy",
os.path.dirname(realpath)
))
if not os.path.exists(output_dir):
os.makedirs(output_dir)
outpath = os.path.join(DEPLOY_DIR, "deploy", realpath)
class GAEStaticSiteRenderer(BaseStaticSiteRenderer):
"""
A variation of BaseStaticSiteRenderer that deploys directly to S3
rather than to the local filesystem.

Settings:
* GAE_APP_ID
* MEDUSA_DEPLOY_DIR
"""
def render_path(self, path=None, view=None):
if not path:
return None

DEPLOY_DIR = settings.MEDUSA_DEPLOY_DIR

resp = client.get(path)
if resp.status_code != 200:
raise Exception
resp = self._render(path)

mimetype = resp['Content-Type'].split(";", 1)[0]
# Force get_outpath to always use index.html by passing text/html
# mimetype
rel_outpath = os.path.join("deploy",
self.get_outpath(path, 'text/html'))
outpath = os.path.join(self.DEPLOY_DIR, rel_outpath)

if needs_ext:
outpath += "index.html"
# Ensure the directories exist
try:
os.makedirs(os.path.dirname(outpath))
except OSError:
pass

self.logger.info("Saving file to %s", outpath)

print(outpath)
with open(outpath, 'w') as f:
f.write(resp.content)

rel_outpath = outpath.replace(
os.path.abspath(DEPLOY_DIR) + "/",
""
)
if ((not needs_ext) and path.endswith(STANDARD_EXTENSIONS))\
or (mimetype == "text/html"):
# Either has obvious extension OR it's a regular HTML file.
mimetype = resp['Content-Type'].split(';', 1)[0]

if (mimetype == 'text/html' or
(not path.endswith('/') and
outpath.endswith(STANDARD_EXTENSIONS))):
# Either has obvious extension OR it's a regular HTML file
return None

return "# req since this url does not end in an extension and also\n"\
"# has non-html mime: %s\n"\
"- url: %s\n"\
Expand All @@ -68,22 +67,10 @@ def _gae_render_path(args):
mimetype, path, rel_outpath, rel_outpath, mimetype
)


class GAEStaticSiteRenderer(BaseStaticSiteRenderer):
"""
A variation of BaseStaticSiteRenderer that deploys directly to S3
rather than to the local filesystem.

Settings:
* GAE_APP_ID
* MEDUSA_DEPLOY_DIR
"""
def render_path(self, path=None, view=None):
return _gae_render_path((self.client, path, view))

@classmethod
def initialize_output(cls):
print("Initializing output directory with `app.yaml`.")
super(GAEStaticSiteRenderer, cls).initialize_output()
cls.logger.info("Initializing output directory with `app.yaml`")

# Initialize the MEDUSA_DEPLOY_DIR with an `app.yaml` and `deploy`
# directory which stores the static files on disk.
Expand Down Expand Up @@ -113,7 +100,7 @@ def initialize_output(cls):

@classmethod
def finalize_output(cls):
print("Finalizing `app.yaml`.")
cls.logger.info("Finalizing app.yaml")

DEPLOY_DIR = settings.MEDUSA_DEPLOY_DIR
app_yaml = os.path.abspath(os.path.join(
Expand Down Expand Up @@ -156,34 +143,17 @@ def finalize_output(cls):
)
app_yaml_f.close()

print("You should now be able to deploy this to Google App Engine")
print("by performing the following command:")
print("appcfg.py update %s" % os.path.abspath(DEPLOY_DIR))
cls.logger.info("You should now be able to deploy this to "
"Google App Engine by performing the following "
"command:\n"
"appcfg.py update %s", os.path.abspath(DEPLOY_DIR))

super(GAEStaticSiteRenderer, cls).finalize_output()

def generate(self):
DEPLOY_DIR = settings.MEDUSA_DEPLOY_DIR

# Generate the site
if getattr(settings, "MEDUSA_MULTITHREAD", False):
# Upload up to ten items at once via `multiprocessing`.
from multiprocessing import Pool

print("Uploading with up to 10 upload processes...")
pool = Pool(10)

handlers = pool.map(
_gae_render_path,
((None, path, None) for path in self.paths),
chunksize=5
)
pool.close()
pool.join()
else:
# Use standard, serial upload.
self.client = Client()
handlers = []
for path in self.paths:
handlers.append(self.render_path(path=path))
handlers = super(GAEStaticSiteRenderer, self).generate()

DEPLOY_DIR = settings.MEDUSA_DEPLOY_DIR
app_yaml = os.path.abspath(os.path.join(
Expand Down
97 changes: 93 additions & 4 deletions django_medusa/renderers/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from __future__ import print_function
from django.conf import settings
from django.test.client import Client
from django_medusa.log import get_logger, finalize_logger
import mimetypes
import os

__all__ = ['COMMON_MIME_MAPS', 'BaseStaticSiteRenderer']


Expand All @@ -15,13 +22,20 @@
"text/css": ".css",
}

class RenderError(Exception):
"""
Exception thrown during a rendering error.
"""
pass

class BaseStaticSiteRenderer(object):
"""
This default renderer writes the given URLs (defined in get_paths())
into static files on the filesystem by getting the view's response
through the Django testclient.
"""
def __init__(self):
self.client = None

@classmethod
def initialize_output(cls):
Expand All @@ -33,7 +47,9 @@ def initialize_output(cls):
Management command calls this once before iterating over all
renderer instances.
"""
pass
# Store logger on BaseStaticSiteRenderer so that all derivative classes
# can access this instance.
BaseStaticSiteRenderer.logger = get_logger()

@classmethod
def finalize_output(cls):
Expand All @@ -46,7 +62,8 @@ def finalize_output(cls):
Management command calls this once after iterating over all
renderer instances.
"""
pass
finalize_logger()
BaseStaticSiteRenderer.logger = None

def get_paths(self):
""" Override this in a subclass to define the URLs to process """
Expand All @@ -61,9 +78,81 @@ def paths(self):
self._paths = p
return p

def _render(self, path=None, view=None):
client = self.client

if not client:
client = Client()

response = client.get(path)
if response.status_code != 200:
raise RenderError(
"Path {0} did not return status 200".format(path))

return response

@classmethod
def get_outpath(cls, path, content_type):
# Get non-absolute path
path = path[1:] if path.startswith('/') else path

# Resolves to a file, not a directory
if not path.endswith('/'):
return path

return os.path.join(path, cls.get_dirsuffix(content_type))

@classmethod
def get_dirsuffix(cls, content_type):
mime = content_type.split(';', 1)[0]

return ('index' +
(COMMON_MIME_MAPS.get(mime, mimetypes.guess_extension(mime)) or
'.html'))

def render_path(self, path=None, view=None):
raise NotImplementedError

def generate(self):
for path in self.paths:
self.render_path(path)
arglist = ((path, None) for path in self.paths)

if getattr(settings, "MEDUSA_MULTITHREAD", False):
from multiprocessing import Pool, cpu_count, Queue

generator = PageGenerator(self)

self.logger.info("Generating with up to %s processes...",
cpu_count())
pool = Pool(cpu_count())
retval = pool.map(generator, arglist, chunksize=1)
pool.close()

else:
self.client = Client()
generator = PageGenerator(self)

retval = map(generator, arglist)

return retval


class PageGenerator(object):
"""
Helper class to bounce things back into the renderer instance, since
multiprocessing is unable to transfer a bound method object into a pickle.
"""
def __init__(self, renderer):
self.renderer = renderer

def __call__(self, args):
path = args[0]
logger = self.renderer.logger

try:
logger.info("Generating %s...", path)
retval = self.renderer.render_path(*args)
logger.info("Generated %s successfully", path)
return retval

except:
logger.error("Could not generate %s", path, exc_info=True)
Loading