Skip to content

Commit

Permalink
Change <ref> syntax
Browse files Browse the repository at this point in the history
Instead of a nonstandard void element <ref spec=FOO>, use the text content: <ref>FOO</ref>.

This allows us to use standard HTML parser/serializer tooling with the HTML source, e.g. for whatwg/html-build#279.
  • Loading branch information
domenic committed Aug 6, 2023
1 parent 59c6e78 commit 4cabebb
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 76 deletions.
4 changes: 2 additions & 2 deletions Syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ To produce the familiar bracketed references to other specifications, e.g.

> There are a number of dynamic selectors that can be used with HTML. This section defines when these selectors match HTML elements. [[SELECTORS]](https://html.spec.whatwg.org/#refsSELECTORS) [[CSSUI]](https://html.spec.whatwg.org/#refsCSSUI)
you can use the `<ref>` void element:
you can use the (nonstandard) `<ref>` element:

```html
<p>There are a number of dynamic selectors that can be used with HTML. This section defines when
these selectors match HTML elements. <ref spec=SELECTORS> <ref spec=CSSUI></p>
these selectors match HTML elements. <ref>SELECTORS</ref> <ref>CSSUI</ref></p>
```

These match against a bibliography, which is a manually-maintained and sorted `<dl>` at the end of the source file, identified by having the ID "`ref-list`". Its entries look like the following:
Expand Down
120 changes: 52 additions & 68 deletions src/html/htmlparser.pas
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ TToken = record // this is an expensive type, as it is basically all token
const
Marker = nil;
var
FProprietaryVoids: specialize PlasticArray <TCanonicalString, TCanonicalString>;
FInputStream: TInputStream;
{$IFDEF PARSEERROR} FOnParseError: TParseErrorHandler; {$ENDIF}
FTokeniserState: TTokeniserState;
Expand Down Expand Up @@ -314,7 +313,6 @@ TToken = record // this is an expensive type, as it is basically all token
procedure SpoonFeed(const Data: UTF8String); // call this any number of times until all characters have been provided
{$ENDIF}
procedure SpoonFeed(const Data: Pointer; const Length: QWord); // call this any number of times until all characters have been provided
procedure RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString);
function Parse(): TDocument; // then call this
// XXX need a fragment parsing mode (if we support fragment parsing, set FFragmentParsingMode to true)
{$IFDEF PARSEERROR} property OnParseError: TParseErrorHandler read FOnParseError write FOnParseError; {$ENDIF}
Expand Down Expand Up @@ -1136,15 +1134,6 @@ procedure THTMLParser.SpoonFeed(const Data: Pointer; const Length: QWord);
FInputStream.PushData(Data, Length);
end;

procedure THTMLParser.RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString);
var
Name: TCanonicalString;
begin
{$IFOPT C+} Assert(not FInputStream.WasStarted); {$ENDIF}
for Name in TagNames do
FProprietaryVoids.Push(Name);
end;

function THTMLParser.Parse(): TDocument;
var
OldKind: TTokenKind;
Expand Down Expand Up @@ -1936,7 +1925,7 @@ procedure THTMLParser.Tokenise();
end;
BogusComment();
end;

procedure TryForCDATASection(); inline;
begin
// seen [
Expand Down Expand Up @@ -2125,7 +2114,7 @@ procedure THTMLParser.Tokenise();
CommentSize := 0;
repeat
case (FInputStream.CurrentCharacter.Value) of
$003E, kEOF: break;
$003E, kEOF: break;
$0000: Inc(CommentSize, FFFD.Length);
else Inc(CommentSize, FInputStream.CurrentCharacterLength);
end;
Expand Down Expand Up @@ -2562,7 +2551,7 @@ procedure THTMLParser.Tokenise();
{$IFDEF PARSEERROR} ParseError('unexpected U+0000 in attribute name'); {$ENDIF}
FCurrentToken.CurrentAttributeName.Append($FFFD);
end;
$0022, $0027, $003C:
$0022, $0027, $003C:
begin
{$IFDEF PARSEERROR} ParseError('invalid character in attribute name'); {$ENDIF}
FCurrentToken.CurrentAttributeName.Append(FInputStream.CurrentCharacter);
Expand Down Expand Up @@ -5907,7 +5896,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if (Token.TagName = eBody) then
begin
{$IFDEF PARSEERROR} ParseError('unexpected body start tag'); {$ENDIF}
if ((FStackOfOpenElements.Length < 2) or
if ((FStackOfOpenElements.Length < 2) or
(not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody)) or
(StackOfOpenElementsHas(nsHTML, eTemplate))) then
exit; // ignore the token
Expand All @@ -5930,7 +5919,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if (Token.TagName = eFrameset) then
begin
{$IFDEF PARSEERROR} ParseError('unexpected body frameset start tag'); {$ENDIF}
if ((FStackOfOpenElements.Length < 2) or
if ((FStackOfOpenElements.Length < 2) or
(not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody))) then
exit; // ignore the token
if (not FFramesetOkFlag) then
Expand Down Expand Up @@ -6298,11 +6287,6 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
// any other start tag
ReconstructTheActiveFormattingElements();
InsertAnHTMLElementFor(Token);
if (FProprietaryVoids.Contains(Token.TagName)) then
begin
FStackOfOpenElements.Pop();
{$IFDEF PARSEERROR} Token.AcknowledgeSelfClosingFlag(); {$ENDIF}
end;
end;
tkEndTag:
// in this section things are hoisted also
Expand Down Expand Up @@ -6381,7 +6365,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if ((Token.TagName = eDiv) or
(Token.TagName = ePre) or
(Token.TagName = eOL) or
(Token.TagName = eDL)) then
(Token.TagName = eDL)) then
begin
if (not StackOfOpenElementsHasInScope(Token.TagName)) then
begin
Expand Down Expand Up @@ -6467,7 +6451,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
//(Token.TagName = ePre) or // hoisted
(Token.TagName = eSection) or
(Token.TagName = eSummary) or
(Token.TagName = eUL)) then
(Token.TagName = eUL)) then
begin
if (not StackOfOpenElementsHasInScope(Token.TagName)) then
begin
Expand Down Expand Up @@ -6815,7 +6799,7 @@ procedure THTMLParser.TheInTableInsertionMode(var Token: TToken);
exit;
end
else
if ((Token.TagName = eBody) or
if ((Token.TagName = eBody) or
(Token.TagName = eCaption) or
(Token.TagName = eCol) or
(Token.TagName = eColGroup) or
Expand Down Expand Up @@ -7869,7 +7853,7 @@ procedure THTMLParser.TheAfterFramesetInsertionMode(var Token: TToken);
end;
end;
tkExtraSpaceCharacter: InsertCharacters(Token.ExtraChars);
{$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R-
{$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R-
tkComment: InsertAComment(Token);
{$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF}
tkStartTag:
Expand Down Expand Up @@ -8131,49 +8115,49 @@ procedure THTMLParser.TheRulesForParsingTokensInForeignContent(var Token: TToken
tkComment: InsertAComment(Token); // http://bugs.freepascal.org/view.php?id=26403
{$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF}
tkStartTag:
if ((Token.TagName = eB) or
(Token.TagName = eBig) or
(Token.TagName = eBlockQuote) or
(Token.TagName = eBody) or
(Token.TagName = eBr) or
(Token.TagName = eCenter) or
(Token.TagName = eCode) or
(Token.TagName = eDD) or
(Token.TagName = eDiv) or
(Token.TagName = eDL) or
(Token.TagName = eDT) or
(Token.TagName = eEm) or
(Token.TagName = eEmbed) or
(Token.TagName = eH1) or
(Token.TagName = eH2) or
(Token.TagName = eH3) or
(Token.TagName = eH4) or
(Token.TagName = eH5) or
(Token.TagName = eH6) or
(Token.TagName = eHead) or
(Token.TagName = eHR) or
(Token.TagName = eI) or
(Token.TagName = eImg) or
(Token.TagName = eLI) or
(Token.TagName = eListing) or
(Token.TagName = eMenu) or
(Token.TagName = eMeta) or
(Token.TagName = eNoBr) or
(Token.TagName = eOL) or
(Token.TagName = eP) or
(Token.TagName = ePre) or
(Token.TagName = eRuby) or
(Token.TagName = eS) or
(Token.TagName = eSmall) or
(Token.TagName = eSpan) or
(Token.TagName = eStrong) or
(Token.TagName = eStrike) or
(Token.TagName = eSub) or
(Token.TagName = eSup) or
(Token.TagName = eTable) or
(Token.TagName = eTT) or
(Token.TagName = eU) or
(Token.TagName = eUL) or
if ((Token.TagName = eB) or
(Token.TagName = eBig) or
(Token.TagName = eBlockQuote) or
(Token.TagName = eBody) or
(Token.TagName = eBr) or
(Token.TagName = eCenter) or
(Token.TagName = eCode) or
(Token.TagName = eDD) or
(Token.TagName = eDiv) or
(Token.TagName = eDL) or
(Token.TagName = eDT) or
(Token.TagName = eEm) or
(Token.TagName = eEmbed) or
(Token.TagName = eH1) or
(Token.TagName = eH2) or
(Token.TagName = eH3) or
(Token.TagName = eH4) or
(Token.TagName = eH5) or
(Token.TagName = eH6) or
(Token.TagName = eHead) or
(Token.TagName = eHR) or
(Token.TagName = eI) or
(Token.TagName = eImg) or
(Token.TagName = eLI) or
(Token.TagName = eListing) or
(Token.TagName = eMenu) or
(Token.TagName = eMeta) or
(Token.TagName = eNoBr) or
(Token.TagName = eOL) or
(Token.TagName = eP) or
(Token.TagName = ePre) or
(Token.TagName = eRuby) or
(Token.TagName = eS) or
(Token.TagName = eSmall) or
(Token.TagName = eSpan) or
(Token.TagName = eStrong) or
(Token.TagName = eStrike) or
(Token.TagName = eSub) or
(Token.TagName = eSup) or
(Token.TagName = eTable) or
(Token.TagName = eTT) or
(Token.TagName = eU) or
(Token.TagName = eUL) or
(Token.TagName = eVar) or
((Token.TagName = eFont) and (Token.HasAttributes(['color', 'face', 'size'])))) then
begin
Expand Down
9 changes: 3 additions & 6 deletions src/wattsi.pas
Original file line number Diff line number Diff line change
Expand Up @@ -1579,8 +1579,7 @@ TCrossReferences = record
else
if (Element.IsIdentity(nsHTML, eRef)) then
begin
ExtractedData := Element.GetAttribute('spec');
ReferenceName := ExtractedData.AsString;
ReferenceName := Element.TextContent.AsString;
New(ListNode);
ListNode^.Value := Element;
ListNode^.Next := References[ReferenceName];
Expand All @@ -1589,12 +1588,11 @@ TCrossReferences = record
NewLink := ConstructHTMLElement(eA);
Scratch := Default(Rope);
Scratch.Append('#refs');
Scratch.AppendDestructively(ExtractedData); // $R-
Scratch.Append(ReferenceName);
NewLink.SetAttributeDestructively('href', Scratch);
ExtractedData := Element.GetAttribute('spec');
Scratch := Default(Rope);
Scratch.Append('[');
Scratch.AppendDestructively(ExtractedData); // $R-
Scratch.Append(ReferenceName);
Scratch.Append(']');
NewLink.AppendChild(TText.CreateDestructively(Scratch));
(Node.ParentNode as TElement).ReplaceChild(NewLink, Node);
Expand Down Expand Up @@ -2961,7 +2959,6 @@ function Main(): Boolean;
Source := ReadFile(SourceFile);
try
Parser := THTMLParser.Create();
Parser.RegisterProperietaryVoidElements([eRef]);
try
try
Parser.SpoonFeed(Source.Start, Source.Length);
Expand Down

0 comments on commit 4cabebb

Please sign in to comment.