Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change <ref> syntax #152

Merged
merged 1 commit into from
Aug 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ To produce the familiar bracketed references to other specifications, e.g.

> There are a number of dynamic selectors that can be used with HTML. This section defines when these selectors match HTML elements. [[SELECTORS]](https://html.spec.whatwg.org/#refsSELECTORS) [[CSSUI]](https://html.spec.whatwg.org/#refsCSSUI)

you can use the `<ref>` void element:
you can use the (nonstandard) `<ref>` element:

```html
<p>There are a number of dynamic selectors that can be used with HTML. This section defines when
these selectors match HTML elements. <ref spec=SELECTORS> <ref spec=CSSUI></p>
these selectors match HTML elements. <ref>SELECTORS</ref> <ref>CSSUI</ref></p>
```

These match against a bibliography, which is a manually-maintained and sorted `<dl>` at the end of the source file, identified by having the ID "`ref-list`". Its entries look like the following:
Expand Down
120 changes: 52 additions & 68 deletions src/html/htmlparser.pas
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ TToken = record // this is an expensive type, as it is basically all token
const
Marker = nil;
var
FProprietaryVoids: specialize PlasticArray <TCanonicalString, TCanonicalString>;
FInputStream: TInputStream;
{$IFDEF PARSEERROR} FOnParseError: TParseErrorHandler; {$ENDIF}
FTokeniserState: TTokeniserState;
Expand Down Expand Up @@ -314,7 +313,6 @@ TToken = record // this is an expensive type, as it is basically all token
procedure SpoonFeed(const Data: UTF8String); // call this any number of times until all characters have been provided
{$ENDIF}
procedure SpoonFeed(const Data: Pointer; const Length: QWord); // call this any number of times until all characters have been provided
procedure RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString);
function Parse(): TDocument; // then call this
// XXX need a fragment parsing mode (if we support fragment parsing, set FFragmentParsingMode to true)
{$IFDEF PARSEERROR} property OnParseError: TParseErrorHandler read FOnParseError write FOnParseError; {$ENDIF}
Expand Down Expand Up @@ -1136,15 +1134,6 @@ procedure THTMLParser.SpoonFeed(const Data: Pointer; const Length: QWord);
FInputStream.PushData(Data, Length);
end;

procedure THTMLParser.RegisterProperietaryVoidElements(const TagNames: array of TCanonicalString);
var
Name: TCanonicalString;
begin
{$IFOPT C+} Assert(not FInputStream.WasStarted); {$ENDIF}
for Name in TagNames do
FProprietaryVoids.Push(Name);
end;

function THTMLParser.Parse(): TDocument;
var
OldKind: TTokenKind;
Expand Down Expand Up @@ -1936,7 +1925,7 @@ procedure THTMLParser.Tokenise();
end;
BogusComment();
end;

procedure TryForCDATASection(); inline;
begin
// seen [
Expand Down Expand Up @@ -2125,7 +2114,7 @@ procedure THTMLParser.Tokenise();
CommentSize := 0;
repeat
case (FInputStream.CurrentCharacter.Value) of
$003E, kEOF: break;
$003E, kEOF: break;
$0000: Inc(CommentSize, FFFD.Length);
else Inc(CommentSize, FInputStream.CurrentCharacterLength);
end;
Expand Down Expand Up @@ -2562,7 +2551,7 @@ procedure THTMLParser.Tokenise();
{$IFDEF PARSEERROR} ParseError('unexpected U+0000 in attribute name'); {$ENDIF}
FCurrentToken.CurrentAttributeName.Append($FFFD);
end;
$0022, $0027, $003C:
$0022, $0027, $003C:
begin
{$IFDEF PARSEERROR} ParseError('invalid character in attribute name'); {$ENDIF}
FCurrentToken.CurrentAttributeName.Append(FInputStream.CurrentCharacter);
Expand Down Expand Up @@ -5907,7 +5896,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if (Token.TagName = eBody) then
begin
{$IFDEF PARSEERROR} ParseError('unexpected body start tag'); {$ENDIF}
if ((FStackOfOpenElements.Length < 2) or
if ((FStackOfOpenElements.Length < 2) or
(not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody)) or
(StackOfOpenElementsHas(nsHTML, eTemplate))) then
exit; // ignore the token
Expand All @@ -5930,7 +5919,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if (Token.TagName = eFrameset) then
begin
{$IFDEF PARSEERROR} ParseError('unexpected body frameset start tag'); {$ENDIF}
if ((FStackOfOpenElements.Length < 2) or
if ((FStackOfOpenElements.Length < 2) or
(not FStackOfOpenElements[1].IsIdentity(nsHTML, eBody))) then
exit; // ignore the token
if (not FFramesetOkFlag) then
Expand Down Expand Up @@ -6298,11 +6287,6 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
// any other start tag
ReconstructTheActiveFormattingElements();
InsertAnHTMLElementFor(Token);
if (FProprietaryVoids.Contains(Token.TagName)) then
begin
FStackOfOpenElements.Pop();
{$IFDEF PARSEERROR} Token.AcknowledgeSelfClosingFlag(); {$ENDIF}
end;
end;
tkEndTag:
// in this section things are hoisted also
Expand Down Expand Up @@ -6381,7 +6365,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
if ((Token.TagName = eDiv) or
(Token.TagName = ePre) or
(Token.TagName = eOL) or
(Token.TagName = eDL)) then
(Token.TagName = eDL)) then
begin
if (not StackOfOpenElementsHasInScope(Token.TagName)) then
begin
Expand Down Expand Up @@ -6467,7 +6451,7 @@ procedure THTMLParser.TheInBodyInsertionMode(var Token: TToken);
//(Token.TagName = ePre) or // hoisted
(Token.TagName = eSection) or
(Token.TagName = eSummary) or
(Token.TagName = eUL)) then
(Token.TagName = eUL)) then
begin
if (not StackOfOpenElementsHasInScope(Token.TagName)) then
begin
Expand Down Expand Up @@ -6815,7 +6799,7 @@ procedure THTMLParser.TheInTableInsertionMode(var Token: TToken);
exit;
end
else
if ((Token.TagName = eBody) or
if ((Token.TagName = eBody) or
(Token.TagName = eCaption) or
(Token.TagName = eCol) or
(Token.TagName = eColGroup) or
Expand Down Expand Up @@ -7869,7 +7853,7 @@ procedure THTMLParser.TheAfterFramesetInsertionMode(var Token: TToken);
end;
end;
tkExtraSpaceCharacter: InsertCharacters(Token.ExtraChars);
{$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R-
{$IFDEF PARSEERROR} tkExtraCharacters: ParseError('unexpected character token after frameset', Length(Token.ExtraChars)); {$ENDIF} // $R-
tkComment: InsertAComment(Token);
{$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF}
tkStartTag:
Expand Down Expand Up @@ -8131,49 +8115,49 @@ procedure THTMLParser.TheRulesForParsingTokensInForeignContent(var Token: TToken
tkComment: InsertAComment(Token); // http://bugs.freepascal.org/view.php?id=26403
{$IFDEF PARSEERROR} tkDOCTYPE: ParseError('unexpected DOCTYPE'); {$ENDIF}
tkStartTag:
if ((Token.TagName = eB) or
(Token.TagName = eBig) or
(Token.TagName = eBlockQuote) or
(Token.TagName = eBody) or
(Token.TagName = eBr) or
(Token.TagName = eCenter) or
(Token.TagName = eCode) or
(Token.TagName = eDD) or
(Token.TagName = eDiv) or
(Token.TagName = eDL) or
(Token.TagName = eDT) or
(Token.TagName = eEm) or
(Token.TagName = eEmbed) or
(Token.TagName = eH1) or
(Token.TagName = eH2) or
(Token.TagName = eH3) or
(Token.TagName = eH4) or
(Token.TagName = eH5) or
(Token.TagName = eH6) or
(Token.TagName = eHead) or
(Token.TagName = eHR) or
(Token.TagName = eI) or
(Token.TagName = eImg) or
(Token.TagName = eLI) or
(Token.TagName = eListing) or
(Token.TagName = eMenu) or
(Token.TagName = eMeta) or
(Token.TagName = eNoBr) or
(Token.TagName = eOL) or
(Token.TagName = eP) or
(Token.TagName = ePre) or
(Token.TagName = eRuby) or
(Token.TagName = eS) or
(Token.TagName = eSmall) or
(Token.TagName = eSpan) or
(Token.TagName = eStrong) or
(Token.TagName = eStrike) or
(Token.TagName = eSub) or
(Token.TagName = eSup) or
(Token.TagName = eTable) or
(Token.TagName = eTT) or
(Token.TagName = eU) or
(Token.TagName = eUL) or
if ((Token.TagName = eB) or
(Token.TagName = eBig) or
(Token.TagName = eBlockQuote) or
(Token.TagName = eBody) or
(Token.TagName = eBr) or
(Token.TagName = eCenter) or
(Token.TagName = eCode) or
(Token.TagName = eDD) or
(Token.TagName = eDiv) or
(Token.TagName = eDL) or
(Token.TagName = eDT) or
(Token.TagName = eEm) or
(Token.TagName = eEmbed) or
(Token.TagName = eH1) or
(Token.TagName = eH2) or
(Token.TagName = eH3) or
(Token.TagName = eH4) or
(Token.TagName = eH5) or
(Token.TagName = eH6) or
(Token.TagName = eHead) or
(Token.TagName = eHR) or
(Token.TagName = eI) or
(Token.TagName = eImg) or
(Token.TagName = eLI) or
(Token.TagName = eListing) or
(Token.TagName = eMenu) or
(Token.TagName = eMeta) or
(Token.TagName = eNoBr) or
(Token.TagName = eOL) or
(Token.TagName = eP) or
(Token.TagName = ePre) or
(Token.TagName = eRuby) or
(Token.TagName = eS) or
(Token.TagName = eSmall) or
(Token.TagName = eSpan) or
(Token.TagName = eStrong) or
(Token.TagName = eStrike) or
(Token.TagName = eSub) or
(Token.TagName = eSup) or
(Token.TagName = eTable) or
(Token.TagName = eTT) or
(Token.TagName = eU) or
(Token.TagName = eUL) or
(Token.TagName = eVar) or
((Token.TagName = eFont) and (Token.HasAttributes(['color', 'face', 'size'])))) then
begin
Expand Down
9 changes: 3 additions & 6 deletions src/wattsi.pas
Original file line number Diff line number Diff line change
Expand Up @@ -1579,8 +1579,7 @@ TCrossReferences = record
else
if (Element.IsIdentity(nsHTML, eRef)) then
begin
ExtractedData := Element.GetAttribute('spec');
ReferenceName := ExtractedData.AsString;
ReferenceName := Element.TextContent.AsString;
New(ListNode);
ListNode^.Value := Element;
ListNode^.Next := References[ReferenceName];
Expand All @@ -1589,12 +1588,11 @@ TCrossReferences = record
NewLink := ConstructHTMLElement(eA);
Scratch := Default(Rope);
Scratch.Append('#refs');
Scratch.AppendDestructively(ExtractedData); // $R-
Scratch.Append(ReferenceName);
NewLink.SetAttributeDestructively('href', Scratch);
ExtractedData := Element.GetAttribute('spec');
Scratch := Default(Rope);
Scratch.Append('[');
Scratch.AppendDestructively(ExtractedData); // $R-
Scratch.Append(ReferenceName);
Scratch.Append(']');
NewLink.AppendChild(TText.CreateDestructively(Scratch));
(Node.ParentNode as TElement).ReplaceChild(NewLink, Node);
Expand Down Expand Up @@ -2961,7 +2959,6 @@ function Main(): Boolean;
Source := ReadFile(SourceFile);
try
Parser := THTMLParser.Create();
Parser.RegisterProperietaryVoidElements([eRef]);
try
try
Parser.SpoonFeed(Source.Start, Source.Length);
Expand Down