Skip to content

Commit 50818bc

Browse files
committed
Replaced _tagStack with _contentFlags, tweaked DefaultHandler
That fixed tautologistics/node-htmlparser#29.
1 parent 590eaad commit 50818bc

File tree

3 files changed

+109
-91
lines changed

3 files changed

+109
-91
lines changed

lib/DefaultHandler.js

+31-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ var ElementType = require("./ElementType.js");
33
function DefaultHandler(callback, options) {
44
this.dom = [];
55
this._done = false;
6+
this._inSpecialTag = false;
67
this._tagStack = [];
78
if (options) {
89
this._options = options;
@@ -45,6 +46,7 @@ var emptyTags = {
4546
DefaultHandler.prototype.reset = function() {
4647
this.dom = [];
4748
this._done = false;
49+
this._inSpecialTag = false;
4850
this._tagStack = [];
4951
};
5052
//Signals the handler that parsing is done
@@ -71,19 +73,36 @@ DefaultHandler.prototype._closeTag = function(name) {
7173

7274
var pos = this._tagStack.length - 1;
7375
while (pos !== -1 && this._tagStack[pos--].name !== name) {}
74-
if (++pos !== 0 || this._tagStack[0].name === name)
75-
this._tagStack.splice(pos, this._tagStack.length);
76+
if (pos !== -1 || this._tagStack[0].name === name)
77+
this._tagStack.splice(pos + 1);
7678
};
7779

7880
DefaultHandler.prototype._addDomElement = function(element) {
7981
if (!this._options.verbose) delete element.raw;
8082

81-
var lastTag = this._tagStack[this._tagStack.length - 1];
83+
var lastTag = this._tagStack[this._tagStack.length - 1],
84+
tmp;
8285
if (!lastTag) this.dom.push(element);
8386
else {
8487
//There are parent elements
85-
if (!lastTag.children) lastTag.children = [element];
86-
else lastTag.children.push(element);
88+
if (!lastTag.children) {
89+
lastTag.children = [element];
90+
return;
91+
}
92+
tmp = lastTag.children[lastTag.children.length - 1];
93+
if (
94+
element.type === ElementType.Comment &&
95+
tmp.type === ElementType.Comment
96+
) {
97+
tmp.data += element.data;
98+
if (this._options.verbose) tmp.raw = tmp.data;
99+
} else if (this._inSpecialTag && element.type === ElementType.Text) {
100+
if (tmp.type !== ElementType.Text) lastTag.children.push(element);
101+
else {
102+
tmp.data += element.data;
103+
if (this._options.verbose) tmp.raw = tmp.data;
104+
}
105+
} else lastTag.children.push(element);
87106
}
88107
};
89108

@@ -92,6 +111,13 @@ DefaultHandler.prototype._openTag = function(element) {
92111

93112
this._addDomElement(element);
94113

114+
if (
115+
element.type === ElementType.Script ||
116+
element.type === ElementType.Style
117+
) {
118+
this._inSpecialTag = true;
119+
}
120+
95121
//Don't add tags to the tag stack that can't have children
96122
if (!this._isEmptyTag(element.name)) this._tagStack.push(element);
97123
};

lib/Parser.js

+75-83
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ function Parser(handler, options) {
1111

1212
this._buffer = "";
1313
this._prevTagSep = "";
14+
this._contentFlags = 0;
1415
this._done = false;
15-
this._tagStack = [];
1616
this._elements = [];
1717
this._current = 0;
1818
this._location = {
@@ -82,7 +82,7 @@ Parser.prototype.done = function() {
8282
this._elements.push(element);
8383
}
8484

85-
this.writeHandler(true);
85+
this.writeHandler();
8686
this._handler.done();
8787
};
8888

@@ -92,14 +92,14 @@ Parser.prototype.reset = function() {
9292
this._prevTagSep = "";
9393
this._done = false;
9494
this._current = 0;
95+
this._contentFlags = 0;
9596
this._location = {
9697
row: 0,
9798
col: 0,
9899
charOffset: 0,
99100
inBuffer: 0
100101
};
101102
this._parseState = ElementType.Text;
102-
this._tagStack = [];
103103
this._elements = [];
104104
this._handler.reset();
105105
};
@@ -132,13 +132,18 @@ var parseTagName = function(data) {
132132
return match[1] + match[2];
133133
};
134134

135+
//Special tags that are threated differently
136+
var SpecialTags = {};
137+
SpecialTags[ElementType.Style] = 1; //2^0
138+
SpecialTags[ElementType.Script] = 2; //2^1
139+
SpecialTags["w"] = 4; //2^2 - if set, append prev tag sep to data
140+
SpecialTags[ElementType.Comment] = 8; //2^8
141+
135142
//Parses through HTML text and returns an array of found elements
136143
Parser.prototype.parseTags = function() {
137-
var buffer = this._buffer,
138-
stack = this._tagStack;
144+
var buffer = this._buffer;
139145

140146
var next,
141-
type,
142147
tagSep,
143148
rawData,
144149
element,
@@ -174,75 +179,65 @@ Parser.prototype.parseTags = function() {
174179
elementName = "";
175180
}
176181

177-
type = stack[stack.length - 1];
178-
179182
//This section inspects the current tag stack and modifies the current
180183
//element if we're actually parsing a special area (script/comment/style tag)
181-
if (type === ElementType.Comment) {
184+
if (this._contentFlags === 0) {
185+
/*do nothing*/
186+
} else if (this._contentFlags >= SpecialTags[ElementType.Comment]) {
182187
//We're currently in a comment tag
183-
184-
prevElement = this._elements[this._elements.length - 1];
188+
elementType = ElementType.Comment; //Change the current element's type to a comment
185189

186190
if (tagSep === ">" && rawData.substr(-2) === "--") {
187191
//comment ends
188-
stack.pop();
189-
rawData = rawData.slice(0, -2);
190-
//If the previous element is a comment, append the current text to it
191-
if (prevElement && prevElement.type === ElementType.Comment) {
192-
//Previous element was a comment
193-
prevElement.data = prevElement.raw += rawData;
194-
//This causes the current element to not be added to the element list
195-
rawData = elementData = "";
196-
elementType = ElementType.Text;
197-
} else elementType = ElementType.Comment; //Change the current element's type to a comment
198-
} else {
199-
//Still in a comment tag
200-
elementType = ElementType.Comment;
201-
//If the previous element is a comment, append the current text to it
202-
if (prevElement && prevElement.type === ElementType.Comment) {
203-
prevElement.data = prevElement.raw += rawData + tagSep;
204-
//This causes the current element to not be added to the element list
205-
rawData = elementData = "";
206-
elementType = ElementType.Text;
207-
} else elementData = rawData += tagSep;
208-
}
209-
} else if (type === ElementType.Script && elementName === "/script")
210-
stack.pop();
211-
else if (type === ElementType.Style && elementName === "/style")
212-
stack.pop();
192+
this._contentFlags -= SpecialTags[ElementType.Comment];
193+
elementData = rawData = rawData.slice(0, -2);
194+
} else elementData = rawData += tagSep;
195+
this._prevTagSep = tagSep;
196+
}
197+
//if it's a closing tag, remove the flag
213198
else if (
214-
!this._options.xmlMode &&
215-
(type === ElementType.Script || type === ElementType.Style)
199+
this._contentFlags >= SpecialTags[ElementType.Script] &&
200+
elementName === "/script"
201+
) {
202+
this._contentFlags %= SpecialTags["w"]; //remove the written flag
203+
this._contentFlags -= SpecialTags[ElementType.Script];
204+
} else if (
205+
this._contentFlags >= SpecialTags[ElementType.Style] &&
206+
elementName === "/style"
216207
) {
217-
//special behaviour for script & style tags
218-
if (rawData.substring(0, 3) !== "!--") {
219-
//Make sure we're not in a comment
220-
//All data from here to style close is now a text element
221-
elementType = ElementType.Text;
222-
//If the previous element is text, append the current text to it
223-
prevElement = this._elements[this._elements.length - 1];
224-
if (prevElement && prevElement.type === ElementType.Text) {
225-
prevElement.data = prevElement.raw +=
226-
this._prevTagSep + rawData;
227-
//This causes the current element to not be added to the element list
228-
rawData = elementData = "";
229-
} else elementData = rawData; //The previous element was not text
208+
this._contentFlags %= SpecialTags["w"]; //remove the written flag
209+
this._contentFlags -= SpecialTags[ElementType.Style];
210+
}
211+
//special behaviour for script & style tags
212+
//Make sure we're not in a comment
213+
else if (!this._options.xmlMode && rawData.substring(0, 3) !== "!--") {
214+
//All data from here to style close is now a text element
215+
elementType = ElementType.Text;
216+
//If the previous element is text, append the last tag sep to element
217+
if (this._contentFlags >= SpecialTags["w"]) {
218+
elementData = rawData = this._prevTagSep + rawData;
219+
} else {
220+
//The previous element was not text
221+
this._contentFlags += SpecialTags["w"];
222+
elementData = rawData;
230223
}
224+
this._prevTagSep = tagSep;
231225
}
232226

233227
//Processing of non-special tags
234228
if (elementType === ElementType.Tag) {
235229
if (rawData.substring(0, 3) === "!--") {
236230
//This tag is really comment
237231
elementType = ElementType.Comment;
238-
elementData = rawData = rawData.substr(3);
232+
this._contentFlags %= SpecialTags["w"]; //remove the written flag
239233
//Check if the comment is terminated in the current element
240234
if (tagSep === ">" && rawData.substr(-2) === "--")
241-
elementData = rawData = rawData.slice(0, -2);
235+
elementData = rawData = rawData.slice(3, -2);
242236
else {
243237
//It's not so push the comment onto the tag stack
244-
rawData += tagSep;
245-
stack.push(ElementType.Comment);
238+
elementData = rawData = rawData.substr(3) + tagSep;
239+
this._contentFlags += SpecialTags[ElementType.Comment];
240+
this._prevTagSep = tagSep;
246241
}
247242
} else {
248243
includeName = true;
@@ -259,13 +254,17 @@ Parser.prototype.parseTags = function() {
259254
} else if (elementName === "script") {
260255
elementType = ElementType.Script;
261256
//Special tag, push onto the tag stack if not terminated
262-
if (elementData.substr(-1) !== "/")
263-
stack.push(ElementType.Script);
257+
if (elementData.substr(-1) !== "/") {
258+
this._contentFlags += SpecialTags[ElementType.Script];
259+
this._prevTagSep = tagSep;
260+
}
264261
} else if (elementName === "style") {
265262
elementType = ElementType.Style;
266263
//Special tag, push onto the tag stack if not terminated
267-
if (elementData.substr(-1) !== "/")
268-
stack.push(ElementType.Style);
264+
if (elementData.substr(-1) !== "/") {
265+
this._contentFlags += SpecialTags[ElementType.Style];
266+
this._prevTagSep = tagSep;
267+
}
269268
}
270269
}
271270
}
@@ -298,32 +297,26 @@ Parser.prototype.parseTags = function() {
298297
/*
299298
switch(elementType){
300299
case ElementType.Text:
301-
this._handler.ontext(rawData);
302-
break;
303-
case ElementType.Tag:
304-
case ElementType.Style:
305-
case ElementType.Script:
306-
if(elementName[0] === "/") this._handler.onclosetag(elementName.substr(1));
307-
else this._handler.onopentag(elementName, parseAttributes(elementData));
300+
this._handler.writeText(element);
308301
break;
309302
case ElementType.Comment:
310-
this._handler.oncomment(rawData);
303+
this._handler.writeComment(element);
311304
break;
312305
case ElementType.Directive:
313-
this._handler.onprocessinginstruction(rawData);
306+
this._handler.writeDirective(element);
314307
break;
315-
default: throw Error("Unsupported type: " + elementType);
308+
//case ElementType.Tag:
309+
//case ElementType.Style:
310+
//case ElementType.Script:
311+
default:
312+
if(elementName[0] === "/") this._handler._closeTag(elementName.substr(1));
313+
else this._handler._openTag(elementName, parseAttributes(elementData));
316314
}
317315
*/
318316

319317
//If tag self-terminates, add an explicit, separate closing tag
320-
if (
321-
elementType !== ElementType.Text &&
322-
elementType !== ElementType.Comment &&
323-
elementType !== ElementType.Directive &&
324-
elementData.substr(-1) === "/"
325-
) {
326-
//this._handler.onclosetag(elementName);
318+
if (tagTypes[elementType] && elementData.substr(-1) === "/") {
319+
//this._handler._closeTag(elementName);
327320
this._elements.push({
328321
raw: (elementName = "/" + elementName),
329322
data: elementName,
@@ -334,7 +327,6 @@ Parser.prototype.parseTags = function() {
334327
}
335328
this._parseState = tagSep === "<" ? ElementType.Tag : ElementType.Text;
336329
this._current = next + 1;
337-
this._prevTagSep = tagSep;
338330
}
339331

340332
if (this._options.includeLocation) {
@@ -360,17 +352,18 @@ Parser.prototype.getLocation = function(startTag) {
360352
(end = this._current), (chunk = false);
361353
}
362354

363-
var rows = this._buffer
364-
.substring(l.charOffset, (l.charOffset = end))
365-
.split("\n"),
355+
var rows = this._buffer.substring(l.charOffset, end).split("\n"),
366356
rowNum = rows.length - 1;
367357

358+
l.charOffset = end;
368359
l.inBuffer += rowNum;
369360

370361
var num = rows[rowNum].replace(_reRow, "").length;
371-
if (rowNum == 0) l.col += num;
362+
if (rowNum === 0) l.col += num;
372363
else l.col = num;
373364

365+
if (arguments.length === 0) return;
366+
374367
return {
375368
line: l.row + l.inBuffer + 1,
376369
col: l.col + (chunk ? 0 : 1)
@@ -394,8 +387,7 @@ var validateHandler = function(handler) {
394387
};
395388

396389
//Writes parsed elements out to the handler
397-
Parser.prototype.writeHandler = function(forceFlush) {
398-
if (this._tagStack.length && !forceFlush) return;
390+
Parser.prototype.writeHandler = function() {
399391
while (this._elements.length) {
400392
var element = this._elements.shift();
401393
switch (element.type) {

tests/23-template_script_tags.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ exports.options = {
33
handler: {},
44
parser: {}
55
};
6-
exports.html = '<script type="text/template"> <h1>Heading1</h1></script>';
6+
exports.html = '<script type="text/template"><h1>Heading1</h1></script>';
77
exports.expected = [
88
{
99
raw: 'script type="text/template"',
@@ -13,8 +13,8 @@ exports.expected = [
1313
attribs: { type: "text/template" },
1414
children: [
1515
{
16-
raw: " <h1>Heading1</h1>",
17-
data: " <h1>Heading1</h1>",
16+
raw: "<h1>Heading1</h1>",
17+
data: "<h1>Heading1</h1>",
1818
type: "text"
1919
}
2020
]

0 commit comments

Comments
 (0)