Skip to content

Commit d6d3b45

Browse files
committed
Merge branch 'feature/win_builds' into develop
2 parents f16c7e2 + 8ae14ab commit d6d3b45

29 files changed

+2129
-1473
lines changed

CHANGELOG

+8
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,17 @@ v0.4.1 (unreleased):
88
includes when denoting tags, but not comments.
99
- Fixed the behavior of preserve_spacing in Template.add() and keep_field in
1010
Template.remove() on parameters with hidden keys.
11+
- Removed _ListProxy.detach(). SmartLists now use weak references and their
12+
children are garbage-collected properly.
1113
- Fixed parser bugs involving:
1214
- templates with completely blank names;
1315
- templates with newlines and comments.
16+
- Heavy refactoring and fixes to the C tokenizer, including:
17+
- corrected a design flaw in text handling, allowing for substantial speed
18+
improvements when parsing long strings of plain text;
19+
- implemented new Python 3.3 PEP 393 Unicode APIs.
20+
- Fixed various bugs in SmartList, including one that was causing memory issues
21+
on 64-bit builds of Python 2 on Windows.
1422
- Fixed some bugs in the release scripts.
1523

1624
v0.4 (released May 23, 2015):

appveyor.yml

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# This config file is used by appveyor.com to build Windows release binaries
2+
3+
version: 0.4.1.dev0-b{build}
4+
5+
branches:
6+
only:
7+
- master
8+
9+
skip_tags: true
10+
11+
environment:
12+
global:
13+
# See: http://stackoverflow.com/a/13751649/163740
14+
WRAPPER: "cmd /E:ON /V:ON /C .\\scripts\\win_wrapper.cmd"
15+
PIP: "%WRAPPER% %PYTHON%\\Scripts\\pip.exe"
16+
SETUPPY: "%WRAPPER% %PYTHON%\\python setup.py --with-extension"
17+
PYPI_USERNAME: "earwigbot"
18+
PYPI_PASSWORD:
19+
secure: gOIcvPxSC2ujuhwOzwj3v8xjq3CCYd8keFWVnguLM+gcL0e02qshDHy7gwZZwj0+
20+
21+
matrix:
22+
- PYTHON: "C:\\Python27"
23+
PYTHON_VERSION: "2.7"
24+
PYTHON_ARCH: "32"
25+
26+
- PYTHON: "C:\\Python27-x64"
27+
PYTHON_VERSION: "2.7"
28+
PYTHON_ARCH: "64"
29+
30+
- PYTHON: "C:\\Python33"
31+
PYTHON_VERSION: "3.3"
32+
PYTHON_ARCH: "32"
33+
34+
- PYTHON: "C:\\Python33-x64"
35+
PYTHON_VERSION: "3.3"
36+
PYTHON_ARCH: "64"
37+
38+
- PYTHON: "C:\\Python34"
39+
PYTHON_VERSION: "3.4"
40+
PYTHON_ARCH: "32"
41+
42+
- PYTHON: "C:\\Python34-x64"
43+
PYTHON_VERSION: "3.4"
44+
PYTHON_ARCH: "64"
45+
46+
install:
47+
- "%PIP% install wheel twine"
48+
49+
build_script:
50+
- "%SETUPPY% build"
51+
52+
test_script:
53+
- "%SETUPPY% -q test"
54+
55+
after_test:
56+
- "%SETUPPY% bdist_wheel"
57+
58+
on_success:
59+
- "twine upload dist\\* -u %PYPI_USERNAME% -p %PYPI_PASSWORD%"
60+
61+
artifacts:
62+
- path: dist\*
63+
64+
deploy: off

docs/changelog.rst

+13-2
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,24 @@ Unreleased
1313
- Added support for Python 3.5.
1414
- ``<`` and ``>`` are now disallowed in wikilink titles and template names.
1515
This includes when denoting tags, but not comments.
16-
- Fixed the behavior of *preserve_spacing* in :func:`~.Template.add` and
17-
*keep_field* in :func:`~.Template.remove` on parameters with hidden keys.
16+
- Fixed the behavior of *preserve_spacing* in :meth:`.Template.add` and
17+
*keep_field* in :meth:`.Template.remove` on parameters with hidden keys.
18+
- Removed :meth:`._ListProxy.detach`. :class:`.SmartList`\ s now use weak
19+
references and their children are garbage-collected properly.
1820
- Fixed parser bugs involving:
1921

2022
- templates with completely blank names;
2123
- templates with newlines and comments.
2224

25+
- Heavy refactoring and fixes to the C tokenizer, including:
26+
27+
- corrected a design flaw in text handling, allowing for substantial speed
28+
improvements when parsing long strings of plain text;
29+
- implemented new Python 3.3
30+
`PEP 393 <https://www.python.org/dev/peps/pep-0393/>`_ Unicode APIs.
31+
32+
- Fixed various bugs in :class:`.SmartList`, including one that was causing
33+
memory issues on 64-bit builds of Python 2 on Windows.
2334
- Fixed some bugs in the release scripts.
2435

2536
v0.4

mwparserfromhell/compat.py

-2
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@
1818
bytes = bytes
1919
str = str
2020
range = range
21-
maxsize = sys.maxsize
2221
import html.entities as htmlentities
2322

2423
else:
2524
bytes = str
2625
str = unicode
2726
range = xrange
28-
maxsize = sys.maxint
2927
import htmlentitydefs as htmlentities
3028

3129
del sys

mwparserfromhell/definitions.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,8 @@ def is_single_only(tag):
8181
"""Return whether or not the given *tag* must exist without a close tag."""
8282
return tag.lower() in SINGLE_ONLY
8383

84-
def is_scheme(scheme, slashes=True, reverse=False):
84+
def is_scheme(scheme, slashes=True):
8585
"""Return whether *scheme* is valid for external links."""
86-
if reverse: # Convenience for C
87-
scheme = scheme[::-1]
8886
scheme = scheme.lower()
8987
if slashes:
9088
return scheme in URI_SCHEMES
+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
Copyright (C) 2012-2015 Ben Kurtovic <[email protected]>
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy of
5+
this software and associated documentation files (the "Software"), to deal in
6+
the Software without restriction, including without limitation the rights to
7+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8+
of the Software, and to permit persons to whom the Software is furnished to do
9+
so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in all
12+
copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
SOFTWARE.
21+
*/
22+
23+
#pragma once
24+
25+
#ifndef PY_SSIZE_T_CLEAN
26+
#define PY_SSIZE_T_CLEAN // See: https://docs.python.org/2/c-api/arg.html
27+
#endif
28+
29+
#include <Python.h>
30+
#include <structmember.h>
31+
#include <bytesobject.h>
32+
33+
/* Compatibility macros */
34+
35+
#if PY_MAJOR_VERSION >= 3
36+
#define IS_PY3K
37+
#endif
38+
39+
#ifndef uint64_t
40+
#define uint64_t unsigned PY_LONG_LONG
41+
#endif
42+
43+
#define malloc PyObject_Malloc // XXX: yuck
44+
#define realloc PyObject_Realloc
45+
#define free PyObject_Free
46+
47+
/* Unicode support macros */
48+
49+
#if defined(IS_PY3K) && PY_MINOR_VERSION >= 3
50+
#define PEP_393
51+
#endif
52+
53+
#ifdef PEP_393
54+
#define Unicode Py_UCS4
55+
#define PyUnicode_FROM_SINGLE(chr) \
56+
PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, &(chr), 1)
57+
#else
58+
#define Unicode Py_UNICODE
59+
#define PyUnicode_FROM_SINGLE(chr) \
60+
PyUnicode_FromUnicode(&(chr), 1)
61+
#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE
62+
#endif
63+
64+
/* Error handling macros */
65+
66+
#define BAD_ROUTE self->route_state
67+
#define BAD_ROUTE_CONTEXT self->route_context
68+
#define FAIL_ROUTE(context) { \
69+
self->route_state = 1; \
70+
self->route_context = context; \
71+
}
72+
#define RESET_ROUTE() self->route_state = 0
73+
74+
/* Shared globals */
75+
76+
extern char** entitydefs;
77+
78+
extern PyObject* NOARGS;
79+
extern PyObject* definitions;
80+
81+
/* Structs */
82+
83+
typedef struct {
84+
Py_ssize_t capacity;
85+
Py_ssize_t length;
86+
#ifdef PEP_393
87+
PyObject* object;
88+
int kind;
89+
void* data;
90+
#else
91+
Py_UNICODE* data;
92+
#endif
93+
} Textbuffer;
94+
95+
struct Stack {
96+
PyObject* stack;
97+
uint64_t context;
98+
Textbuffer* textbuffer;
99+
struct Stack* next;
100+
};
101+
typedef struct Stack Stack;
102+
103+
typedef struct {
104+
PyObject* object; /* base PyUnicodeObject object */
105+
Py_ssize_t length; /* length of object, in code points */
106+
#ifdef PEP_393
107+
int kind; /* object's kind value */
108+
void* data; /* object's raw unicode buffer */
109+
#else
110+
Py_UNICODE* buf; /* object's internal buffer */
111+
#endif
112+
} TokenizerInput;
113+
114+
typedef struct {
115+
PyObject_HEAD
116+
TokenizerInput text; /* text to tokenize */
117+
Stack* topstack; /* topmost stack */
118+
Py_ssize_t head; /* current position in text */
119+
int global; /* global context */
120+
int depth; /* stack recursion depth */
121+
int cycles; /* total number of stack recursions */
122+
int route_state; /* whether a BadRoute has been triggered */
123+
uint64_t route_context; /* context when the last BadRoute was triggered */
124+
int skip_style_tags; /* temp fix for the sometimes broken tag parser */
125+
} Tokenizer;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
Copyright (C) 2012-2015 Ben Kurtovic <[email protected]>
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy of
5+
this software and associated documentation files (the "Software"), to deal in
6+
the Software without restriction, including without limitation the rights to
7+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
8+
of the Software, and to permit persons to whom the Software is furnished to do
9+
so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in all
12+
copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20+
SOFTWARE.
21+
*/
22+
23+
#pragma once
24+
25+
/* Local contexts */
26+
27+
#define LC_TEMPLATE 0x0000000000000007
28+
#define LC_TEMPLATE_NAME 0x0000000000000001
29+
#define LC_TEMPLATE_PARAM_KEY 0x0000000000000002
30+
#define LC_TEMPLATE_PARAM_VALUE 0x0000000000000004
31+
32+
#define LC_ARGUMENT 0x0000000000000018
33+
#define LC_ARGUMENT_NAME 0x0000000000000008
34+
#define LC_ARGUMENT_DEFAULT 0x0000000000000010
35+
36+
#define LC_WIKILINK 0x0000000000000060
37+
#define LC_WIKILINK_TITLE 0x0000000000000020
38+
#define LC_WIKILINK_TEXT 0x0000000000000040
39+
40+
#define LC_EXT_LINK 0x0000000000000180
41+
#define LC_EXT_LINK_URI 0x0000000000000080
42+
#define LC_EXT_LINK_TITLE 0x0000000000000100
43+
44+
#define LC_HEADING 0x0000000000007E00
45+
#define LC_HEADING_LEVEL_1 0x0000000000000200
46+
#define LC_HEADING_LEVEL_2 0x0000000000000400
47+
#define LC_HEADING_LEVEL_3 0x0000000000000800
48+
#define LC_HEADING_LEVEL_4 0x0000000000001000
49+
#define LC_HEADING_LEVEL_5 0x0000000000002000
50+
#define LC_HEADING_LEVEL_6 0x0000000000004000
51+
52+
#define LC_TAG 0x0000000000078000
53+
#define LC_TAG_OPEN 0x0000000000008000
54+
#define LC_TAG_ATTR 0x0000000000010000
55+
#define LC_TAG_BODY 0x0000000000020000
56+
#define LC_TAG_CLOSE 0x0000000000040000
57+
58+
#define LC_STYLE 0x0000000000780000
59+
#define LC_STYLE_ITALICS 0x0000000000080000
60+
#define LC_STYLE_BOLD 0x0000000000100000
61+
#define LC_STYLE_PASS_AGAIN 0x0000000000200000
62+
#define LC_STYLE_SECOND_PASS 0x0000000000400000
63+
64+
#define LC_DLTERM 0x0000000000800000
65+
66+
#define LC_SAFETY_CHECK 0x000000007F000000
67+
#define LC_HAS_TEXT 0x0000000001000000
68+
#define LC_FAIL_ON_TEXT 0x0000000002000000
69+
#define LC_FAIL_NEXT 0x0000000004000000
70+
#define LC_FAIL_ON_LBRACE 0x0000000008000000
71+
#define LC_FAIL_ON_RBRACE 0x0000000010000000
72+
#define LC_FAIL_ON_EQUALS 0x0000000020000000
73+
#define LC_HAS_TEMPLATE 0x0000000040000000
74+
75+
#define LC_TABLE 0x0000001F80000000
76+
#define LC_TABLE_CELL_LINE_CONTEXTS 0x0000001A00000000
77+
#define LC_TABLE_OPEN 0x0000000080000000
78+
#define LC_TABLE_CELL_OPEN 0x0000000100000000
79+
#define LC_TABLE_CELL_STYLE 0x0000000200000000
80+
#define LC_TABLE_ROW_OPEN 0x0000000400000000
81+
#define LC_TABLE_TD_LINE 0x0000000800000000
82+
#define LC_TABLE_TH_LINE 0x0000001000000000
83+
84+
/* Global contexts */
85+
86+
#define GL_HEADING 0x1
87+
88+
/* Aggregate contexts */
89+
90+
#define AGG_FAIL (LC_TEMPLATE | LC_ARGUMENT | LC_WIKILINK | LC_EXT_LINK_TITLE | LC_HEADING | LC_TAG | LC_STYLE | LC_TABLE_OPEN)
91+
#define AGG_UNSAFE (LC_TEMPLATE_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_TITLE | LC_TEMPLATE_PARAM_KEY | LC_ARGUMENT_NAME)
92+
#define AGG_DOUBLE (LC_TEMPLATE_PARAM_KEY | LC_TAG_CLOSE | LC_TABLE_ROW_OPEN)
93+
#define AGG_NO_WIKILINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK_URI)
94+
#define AGG_NO_EXT_LINKS (LC_TEMPLATE_NAME | LC_ARGUMENT_NAME | LC_WIKILINK_TITLE | LC_EXT_LINK)
95+
96+
/* Tag contexts */
97+
98+
#define TAG_NAME 0x01
99+
#define TAG_ATTR_READY 0x02
100+
#define TAG_ATTR_NAME 0x04
101+
#define TAG_ATTR_VALUE 0x08
102+
#define TAG_QUOTED 0x10
103+
#define TAG_NOTE_SPACE 0x20
104+
#define TAG_NOTE_EQUALS 0x40
105+
#define TAG_NOTE_QUOTE 0x80

0 commit comments

Comments
 (0)