diff --git a/requirements.txt b/requirements.txt index bbfb0c74..742d908f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ django-tastypie==0.9.16 django-waffle==0.9.1 -e git+git://github.com/scieloorg/django-htmlmin.git#egg=django-htmlmin -e git+git://github.com/scieloorg/django-cache-machine.git#egg=django-cache-machine -packtools==0.4dev-20140908 +packtools==0.5dev-20140924 Celery django-celery django-kombu diff --git a/scielomanager/articletrack/templates/articletrack/notice_detail.html b/scielomanager/articletrack/templates/articletrack/notice_detail.html index 8bb5f1bf..0537ef3b 100644 --- a/scielomanager/articletrack/templates/articletrack/notice_detail.html +++ b/scielomanager/articletrack/templates/articletrack/notice_detail.html @@ -148,20 +148,17 @@
{% trans "Detail:"%} {{ xml_data.can_be_analyzed.1 }}
+{{ xml_data.can_be_analyzed.1 }}
-{% trans "Detail:"%} {{ xml_exception }}
{{ error.level|upper }} |
@@ -43,7 +57,7 @@ {{ xml_data.validation_errors.errors_total_count }} {% {{ error.message }} {% endif %} |
- {{ error.count|default:"--" }} | +{{ count|default:"--" }} |
{% trans "packtools version" %}: {{ packtools_version }}
+ {% endblock main_content %} {% block extrafooter %} diff --git a/scielomanager/validator/templatetags/__init__.py b/scielomanager/validator/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scielomanager/validator/templatetags/clean_uri.py b/scielomanager/validator/templatetags/clean_uri.py new file mode 100644 index 00000000..b8971838 --- /dev/null +++ b/scielomanager/validator/templatetags/clean_uri.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +import os + +from django.utils.safestring import mark_safe +from django import template + +register = template.Library() + +def clean_uri(text): + if text.startswith('http'): + return mark_safe(text) + else: + return mark_safe(os.path.basename(text)) + +register.filter('clean_uri', clean_uri) + diff --git a/scielomanager/validator/tests/doubles.py b/scielomanager/validator/tests/doubles.py new file mode 100644 index 00000000..4f19e176 --- /dev/null +++ b/scielomanager/validator/tests/doubles.py @@ -0,0 +1,42 @@ +import packtools + +# packtools.stylechecker double + + +class XMLValidatorDouble(packtools.XMLValidator): + def __init__(self, file, dtd=None, no_doctype=False): + pass + + def validate_all(self, fail_fast=False): + return True, None + + @property + def meta(self): + return { + 'article_title': 'HIV/AIDS knowledge among men who have sex with men: applying the item response theory', + 'issue_year': '2014', + 'journal_title': u'Revista de Sa\xfade P\xfablica', + 'journal_pissn': '0034-8910', + 'journal_eissn': '1518-8787', + 'issue_number': '2', + 'issue_volume': '48' + } + + +class XMLValidatorAnnotationsDouble(XMLValidatorDouble): + def annotate_errors(self, fail_fast=False): + return "some annotations in xml string" + + def validate_all(self, fail_fast=False): + error_list = [] + + class DummyError(object): + line = 1 + column = 6 + message = u'Premature end of data in tag xml line 1, line 1, column 6' + level_name = 'ERROR' + + for x in xrange(0,6): + error_list.append(DummyError()) + + return False, error_list diff --git a/scielomanager/validator/tests/tests_pages.py b/scielomanager/validator/tests/tests_pages.py index c2b84fa3..a579fbe2 100644 --- a/scielomanager/validator/tests/tests_pages.py +++ b/scielomanager/validator/tests/tests_pages.py @@ -13,7 +13,10 @@ from waffle import Flag -from articletrack.tests import doubles +from . import doubles +import pkg_resources + +PACKTOOLS_VERSION = pkg_resources.get_distribution('packtools').version def _get_test_xml_abspath(filename): @@ -55,12 +58,15 @@ def _addWaffleFlag(self): Flag.objects.create(name='packtools_validator', everyone=True) def _mocker_replace_stylechecker(self, with_annotations=False): - XML = self.mocker.replace('packtools.stylechecker.XML') - XML(mocker.ANY) + XMLValidator = self.mocker.replace('packtools.stylechecker.XMLValidator') + XMLValidator(mocker.ANY) if with_annotations: - self.mocker.result(doubles.StylecheckerAnnotationsDouble(mocker.ANY)) + self.mocker.result(doubles.XMLValidatorAnnotationsDouble(mocker.ANY)) + lxml_tostring = self.mocker.replace('lxml.etree.tostring') + lxml_tostring(mocker.ANY, mocker.KWARGS) + self.mocker.result("some annotations in xml string") else: - self.mocker.result(doubles.StylecheckerDouble(mocker.ANY)) + self.mocker.result(doubles.XMLValidatorDouble(mocker.ANY)) self.mocker.replay() def test_status_code_stylechecker_without_waffle_flag(self): @@ -127,7 +133,7 @@ def test_submit_empty_form_is_not_valid(self): self.assertEqual(form_errors, expected_errors) self.assertFalse(hasattr(response.context, 'results')) - def test_submite_invalid_url_then_form_is_not_valid(self): + def test_submit_invalid_url_then_form_is_not_valid(self): # with self._addWaffleFlag() page = self.app.get( @@ -168,17 +174,20 @@ def test_submit_valid_url_then_form_is_valid(self): self.assertTemplateUsed(response, 'validator/packtools.html') self.assertEqual(response.status_code, 200) self.assertTrue(response.context['form'].is_valid()) - import pkg_resources - packtools_version = pkg_resources.get_distribution('packtools').version + expected_results = { - 'validation_errors': {'errors_total_count': 0, 'results': []}, - 'annotations': None, - 'can_be_analyzed': (True, None), - 'packtools_version': packtools_version + 'results': None, + 'xml_exception': None, + 'packtools_version': PACKTOOLS_VERSION } - self.assertEqual(response.context['results'], expected_results) + self.assertEqual(response.context['results'], expected_results['results']) + self.assertEqual(response.context['xml_exception'], expected_results['xml_exception']) + self.assertEqual(response.context['packtools_version'], expected_results['packtools_version']) def test_submit_text_file_then_form_not_valid(self): + """ + Submitting a text file will raise a from validation error + """ # with self._addWaffleFlag() test_file = get_temporary_text_file() @@ -201,6 +210,9 @@ def test_submit_text_file_then_form_not_valid(self): self.assertEqual(form.errors, expected_errors) def test_submit_image_file_then_form_not_valid(self): + """ + Submitting a image file will raise a from validation error + """ # with self._addWaffleFlag() test_file = get_temporary_image_file() @@ -222,7 +234,11 @@ def test_submit_image_file_then_form_not_valid(self): } self.assertEqual(form.errors, expected_errors) - def test_submit_valid_xml_file_then_form_not_valid(self): + def test_submit_valid_xml_file_then_get_annotations_form_valid(self): + """ + Submitting a xml file that generate annotations will let the form as valid, + and xml validation will return annotations + """ # with self._addWaffleFlag() self._mocker_replace_stylechecker(with_annotations=True) @@ -238,13 +254,24 @@ def test_submit_valid_xml_file_then_form_not_valid(self): ) # then form = response.context['form'] + xml_exception = response.context['xml_exception'] results = response.context['results'] self.assertEqual(200, response.status_code) self.assertTrue(response.context['form'].is_valid()) - self.assertEqual((True, None), results['can_be_analyzed']) - self.assertIsNotNone(results['validation_errors']) - self.assertIsNotNone(results['annotations']) - self.assertTrue(len(results['validation_errors']['results']) > 0) - self.assertTemplateUsed('validator/packtools.html') + + self.assertIsNotNone(results) # have some results + self.assertEqual(results.keys(), ['validation_errors', 'meta', 'annotations']) + expected_meta = { + 'article_title': 'HIV/AIDS knowledge among men who have sex with men: applying the item response theory', + 'issue_year': '2014', + 'journal_title': u'Revista de Sa\xfade P\xfablica', + 'journal_pissn': '0034-8910', + 'journal_eissn': '1518-8787', + 'issue_number': '2', + 'issue_volume': '48' + } + self.assertEqual(results['meta'], expected_meta) + self.assertEqual(len(results['validation_errors']), 1) + self.assertEqual(results['annotations'], "some annotations in xml string") diff --git a/scielomanager/validator/utils.py b/scielomanager/validator/utils.py index b4c2f5a6..ee6c32d2 100644 --- a/scielomanager/validator/utils.py +++ b/scielomanager/validator/utils.py @@ -2,7 +2,7 @@ import logging import lxml import pkg_resources -from packtools import stylechecker +import packtools logger = logging.getLogger(__name__) @@ -12,154 +12,60 @@ PACKTOOLS_VERSION = None -class ErrorCollection(object): - - def __init__(self): - self._errors = [] - - def add_object_error(self, error_obj=None, line='--', column='--', message='', level="ERROR", allow_repeted=True): - if error_obj: - line = getattr(error_obj, 'line', line) - column = getattr(error_obj, 'column', column) - message = getattr(error_obj, 'message', message) - level = getattr(error_obj, 'level', level) - - error_data = { - 'line': line, - 'column': column, - 'message': message, - 'level': level, - } - if (error_data not in self._errors) or allow_repeted: - # if error was not included yet, let's append it. - # if error already included but allow_repeted == True, it's ok to append it again. - self._errors.append(error_data) - - def add_exception_error(self, exception_instance, allow_repeted=True): - message = exception_instance.message - if hasattr(exception_instance, 'position'): - line, column = exception_instance.position - else: - line, column = None, None - self.add_object_error(error_obj=None, line=line, column=column, message=message, allow_repeted=allow_repeted) - - def add_list_of_errors(self, iterable, allow_repeted=True): - if iterable: - for error in iterable: - self.add_object_error(error, allow_repeted=allow_repeted) - - def get_list(self): - return self._errors - - def get_list_uniques_and_counts(self): - """ - return a list, with dicts of errors (without repeted) and - each error has the count (of ocurrences) of this error - """ - unique_errors = [] # collect errors without repeated - occurs = [] # collect error counts, mapped by index of the unique_errors list - for error in self._errors: - if error in unique_errors: - # already included, only increments count - error_idx = unique_errors.index(error) - occurs[error_idx] += 1 - else: - unique_errors.append(error) - error_idx = unique_errors.index(error) - occurs.append(1) - result = [] - for error in unique_errors: - elem = error - error_idx = unique_errors.index(error) - elem['count'] = occurs[error_idx] - result.append(elem) - return result - - def get_total_count(self): - """ return the total count of errors found in validation """ - return len(self._errors) - - def get_lines(self): - result = [] - for error in self._errors: - line = error['line'] - if line not in result and line > 0: - result.append(str(line)) - return result - - - -class StyleCheckerAnalyzer(object): - target_input = None - _target_data = None - _can_be_analyzed = (False, "Can't be analyzed") - _can_be_analyzed_as_exception = False - _annotations = None - _error_collection = None - _packtools_version = PACKTOOLS_VERSION - - def __init__(self, target_input): - if not bool(target_input): - raise ValueError("Can't analyze, target is None or empty") - self.target_input = target_input - try: - self._target_data = stylechecker.XML(self.target_input) - self._can_be_analyzed = (True, "") - except lxml.etree.XMLSyntaxError as e: - self._target_data = e - self._can_be_analyzed_as_exception = True - except IOError as e: - self._can_be_analyzed = (False, "IOError while starting Stylechecker.XML(), please verify if the input is correct") - except Exception as e: - self._can_be_analyzed = (False, "Error while starting Stylechecker.XML()") - self._validation_errors = {'results': [], 'errors_total_count': 0, } - self._error_collection = ErrorCollection() - - def get_validation_errors(self): - """ - returns a dict like { 'results' : ... , 'error_lines': 0} - 'results' is a dict with a structure necessary to display errors table (error level, line, cols, message) - 'errors_total_count' is a number that indicates the total of errors detected in validation - """ - self._validation_errors['results'] = self._error_collection.get_list_uniques_and_counts() - self._validation_errors['errors_total_count'] = self._error_collection.get_total_count() - return self._validation_errors - - def get_version(self): - return self._packtools_version - - def analyze(self): - results = { - 'can_be_analyzed': (False, "Can't be analyzed"), - 'annotations': None, - 'validation_errors': None, - 'packtools_version': self.get_version() - } - if self._can_be_analyzed_as_exception: - # in case of exceptions: self._target_data is the exception - self._annotations = self._target_data.message - self._error_collection.add_exception_error(self._target_data, allow_repeted=False) - results['can_be_analyzed'] = (True, None) - elif self._can_be_analyzed[0]: - try: - vs_status, vs_errors = self._target_data.validate_style() - v_status, v_errors = self._target_data.validate() - except Exception as e: - self._annotations = e.message - self._error_collection.add_exception_error(e, allow_repeted=False) - results['can_be_analyzed'] = (True, None) - else: - if not vs_status or not v_status: # have errors - self._target_data.annotate_errors() - self._annotations = str(self._target_data) - if not vs_status: - self._error_collection.add_list_of_errors(vs_errors) - if not v_status: - self._error_collection.add_list_of_errors(v_errors) - results['can_be_analyzed'] = (True, None) - else: - results['can_be_analyzed'] = self._can_be_analyzed - - results['annotations'] = self._annotations - results['validation_errors'] = self.get_validation_errors() - return results +def count(target, collection, key): + """Total target count on collection. + + :param key: callable to get the comparison value + """ + occurences = sum([1 for item in collection if key(item) == key(target)]) + return occurences + + +def make_error_filter(key): + """Filtering function factory + + :param key: callable to get the filtering value + """ + known_errors = set() + def err_filter(err): + _err = key(err) + + is_known = _err in known_errors + if is_known == False: + known_errors.add(_err) + + return not is_known + return err_filter + + +def analyze_xml(file): + """Analyzes `file` against packtools' XMLValidator. + """ + result = err = None + + try: + xml = packtools.XMLValidator(file) + + except (lxml.etree.XMLSyntaxError, IOError, ValueError) as e: + err = e + + else: + status, errors = xml.validate_all() + + if not status: + err_xml = lxml.etree.tostring(xml.annotate_errors(), + pretty_print=True, encoding='utf-8', xml_declaration=True) + + err_list = ((error, count(error, errors, lambda x: x.message)) for error in errors) + + err_filter = make_error_filter(lambda x: x[0].message) + unique_err_list = filter(err_filter, err_list) + + result = { + 'annotations': err_xml, + 'validation_errors': unique_err_list, + 'meta': xml.meta, + } + + return result, err + diff --git a/scielomanager/validator/views.py b/scielomanager/validator/views.py index 956aaa8b..d00b9a25 100644 --- a/scielomanager/validator/views.py +++ b/scielomanager/validator/views.py @@ -8,16 +8,11 @@ from . import utils -def __prepare_and_analyze(data_input): - """ Normalize input to feed the stylechecker and obtain results """ - analyzer = utils.StyleCheckerAnalyzer(data_input) - return analyzer.analyze() - - @waffle_flag('packtools_validator') def packtools_home(request, template_name='validator/packtools.html'): context = { 'SETTINGS_MAX_UPLOAD_SIZE' : settings.VALIDATOR_MAX_UPLOAD_SIZE, + 'packtools_version': utils.PACKTOOLS_VERSION, } form = forms.StyleCheckerForm() @@ -26,12 +21,14 @@ def packtools_home(request, template_name='validator/packtools.html'): if form.is_valid(): type = form.cleaned_data['type'] if type == 'url': - url = form.cleaned_data['url'] - results = __prepare_and_analyze(url) + xml_file = form.cleaned_data['url'] else: xml_file = request.FILES['file'] - results = __prepare_and_analyze(xml_file) + + results, exc = utils.analyze_xml(xml_file) context['results'] = results + context['xml_exception'] = getattr(exc, 'message', None) + else: form = forms.StyleCheckerForm()