Skip to content

Commit 8395427

Browse files
committed
Add script to auto generate keyword list for HTML and XML.
Update HTML attribute list to December 2024.
1 parent a995328 commit 8395427

File tree

9 files changed

+303
-228
lines changed

9 files changed

+303
-228
lines changed

readme.md

+6-6
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Latest development builds (artifacts in Release configuration for each compiler
7474
* [Groovy](tools/lang/Groovy.groovy), up to Groovy 5.0.
7575
* [Haskell](tools/lang/Haskell.hs), up to GHC 9.6.
7676
* [Haxe](tools/lang/Haxe.hx), up to Haxe 4.3.
77-
* [HTML](tools/lang/html.html), up to [WHATWG](https://html.spec.whatwg.org/multipage/) September 2024. [Screenshots](https://github.com/zufuliu/notepad4/wiki/Screenshots#html)
77+
* [HTML](tools/lang/html.html), up to [WHATWG](https://html.spec.whatwg.org/multipage/) December 2024. [Screenshots](https://github.com/zufuliu/notepad4/wiki/Screenshots#html)
7878
* ASP
7979
* ASP.NET
8080
* JSP
@@ -138,16 +138,16 @@ Latest development builds (artifacts in Release configuration for each compiler
138138
* [Texinfo](tools/lang/Texinfo.texi), up to Texinfo 7.1.
139139
* [TOML File](https://toml.io/)
140140
* [TypeScript](tools/lang/TypeScript.ts), up to TypeScript 5.4.
141-
* Visual Basic, VB6, VBA, VB.NET
142-
* VBScript
141+
* Visual Basic, [VB.NET](tools/lang/VB.NET.vb), [VB 6.0 and VBA](tools/lang/VBA.bas)
142+
* [VBScript](tools/lang/VBScript.vbs)
143143
* [VHDL](tools/lang/VHDL.vhdl), up to VHDL 2019.
144144
* Verilog HDL, up to [Verilog](tools/lang/Verilog.v) 2005 and [SystemVerilog](tools/lang/SystemVerilog.sv) 2017.
145145
* [Vim Script](tools/lang/Vim.vim)
146146
* [WebAssembly](https://github.com/WebAssembly/wabt/blob/main/src/lexer-keywords.txt), up to wabt 1.0.
147147
* [WinHex Script](tools/lang/WinHex.whs) and template, up to WinHex 20.9.
148-
* XML Document, [Screenshots](https://github.com/zufuliu/notepad4/wiki/Screenshots#xml)
149-
* XML Schema
150-
* XML Stylesheet
148+
* [XML Document](tools/lang/XML.xml), [Screenshots](https://github.com/zufuliu/notepad4/wiki/Screenshots#xml)
149+
* [XML Schema](https://www.w3.org/XML/Schema)
150+
* [XML Stylesheet](https://www.w3.org/Style/XSL/)
151151
* SGML and DTD
152152
* Apple Property List
153153
* [YAML Document](https://yaml.org/)

scintilla/lexers/LexHTML.cxx

+25-23
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,17 @@ using namespace Lexilla;
2828

2929
namespace {
3030

31+
//KeywordIndex++Autogenerated -- start of section automatically generated
32+
enum {
33+
KeywordIndex_Tag = 0,
34+
KeywordIndex_JavaScript = 1,
35+
KeywordIndex_VBScript = 2,
36+
KeywordIndex_SGML = 3,
37+
KeywordIndex_Attribute = 4,
38+
MaxKeywordSize = 32,
39+
};
40+
//KeywordIndex--Autogenerated -- end of section automatically generated
41+
3142
#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
3243
#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
3344

@@ -140,26 +151,24 @@ constexpr bool isCommentASPState(int state) noexcept {
140151
|| state == SCE_HB_COMMENTLINE;
141152
}
142153

143-
bool classifyAttribHTML(script_mode inScriptType, Sci_PositionU start, Sci_PositionU end, const WordList &keywords, const WordList &keywordsEvent, LexAccessor &styler) {
154+
bool classifyAttribHTML(script_mode inScriptType, Sci_PositionU start, Sci_PositionU end, const WordList &keywords, LexAccessor &styler) {
144155
int chAttr = SCE_H_ATTRIBUTEUNKNOWN;
145156
bool isLanguageType = false;
146157
if (IsNumberChar(styler[start])) {
147158
chAttr = SCE_H_NUMBER;
148159
} else {
149-
char s[64];
160+
char s[MaxKeywordSize];
150161
styler.GetRangeLowered(start, end, s, sizeof(s));
151-
if (keywords.InList(s) || keywordsEvent.InList(s))
152-
chAttr = SCE_H_ATTRIBUTE;
153162
if (inScriptType == eNonHtmlScript) {
154163
// see https://html.spec.whatwg.org/multipage/scripting.html
155164
if (StrEqualsAny(s, "type", "language")) {
156165
isLanguageType = true;
157166
}
158167
}
168+
if (!keywords || keywords.InList(s)) {
169+
chAttr = SCE_H_ATTRIBUTE;
170+
}
159171
}
160-
if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
161-
// No keywords -> all are known
162-
chAttr = SCE_H_ATTRIBUTE;
163172
styler.ColorTo(end, chAttr);
164173
return isLanguageType;
165174
}
@@ -334,13 +343,6 @@ constexpr bool IsOKBeforeJSRE(int ch) noexcept {
334343
}
335344

336345
void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, LexerWordList keywordLists, Accessor &styler, bool isXml) {
337-
const WordList &keywordsTag = keywordLists[0];
338-
const WordList &keywordsJS = keywordLists[1];
339-
const WordList &keywordsVBS = keywordLists[2];
340-
const WordList &keywordsSGML = keywordLists[5]; // SGML (DTD)
341-
const WordList &keywordsAttr = keywordLists[6];
342-
const WordList &keywordsEvent = keywordLists[7];
343-
344346
int StateToPrint = initStyle;
345347
int state = stateForPrintState(StateToPrint);
346348

@@ -653,10 +655,10 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
653655
// Bounce out of any ASP mode
654656
switch (state) {
655657
case SCE_HJ_WORD:
656-
classifyWordHTJS(styler.GetStartSegment(), i, keywordsJS, styler, inScriptType);
658+
classifyWordHTJS(styler.GetStartSegment(), i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
657659
break;
658660
case SCE_HB_WORD:
659-
classifyWordHTVB(styler.GetStartSegment(), i, keywordsVBS, styler, inScriptType);
661+
classifyWordHTVB(styler.GetStartSegment(), i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
660662
break;
661663
case SCE_H_XCCOMMENT:
662664
styler.ColorTo(i, state);
@@ -754,7 +756,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
754756
break;
755757
case SCE_H_SGML_COMMAND:
756758
if (!IsSGMLWordChar(ch)) {
757-
if (isWordHSGML(styler.GetStartSegment(), i, keywordsSGML, styler)) {
759+
if (isWordHSGML(styler.GetStartSegment(), i, keywordLists[KeywordIndex_SGML], styler)) {
758760
styler.ColorTo(i, StateToPrint);
759761
state = SCE_H_SGML_1ST_PARAM;
760762
} else {
@@ -860,7 +862,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
860862
case SCE_H_TAGUNKNOWN:
861863
if (!IsTagContinue(ch) && !((ch == '/') && (chPrev == '<'))) {
862864
int eClass = classifyTagHTML(styler.GetStartSegment(),
863-
i, keywordsTag, styler, tagDontFold, isXml, allowScripts);
865+
i, keywordLists[KeywordIndex_Tag], styler, tagDontFold, isXml, allowScripts);
864866
if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
865867
if (!tagClosing) {
866868
inScriptType = eNonHtmlScript;
@@ -917,7 +919,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
917919
break;
918920
case SCE_H_ATTRIBUTE:
919921
if (!IsAttributeContinue(ch)) {
920-
isLanguageType = classifyAttribHTML(inScriptType, styler.GetStartSegment(), i, keywordsAttr, keywordsEvent, styler);
922+
isLanguageType = classifyAttribHTML(inScriptType, styler.GetStartSegment(), i, keywordLists[KeywordIndex_Attribute], styler);
921923
if (ch == '>') {
922924
styler.ColorTo(i + 1, SCE_H_TAG);
923925
if (inScriptType == eNonHtmlScript) {
@@ -1092,7 +1094,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
10921094
if (state == SCE_HJ_NUMBER) {
10931095
styler.ColorTo(i, statePrintForState(SCE_HJ_NUMBER, inScriptType));
10941096
} else {
1095-
classifyWordHTJS(styler.GetStartSegment(), i, keywordsJS, styler, inScriptType);
1097+
classifyWordHTJS(styler.GetStartSegment(), i, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
10961098
}
10971099
state = SCE_HJ_DEFAULT;
10981100
}
@@ -1199,7 +1201,7 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
11991201
styler.ColorTo(i, statePrintForState(SCE_HB_NUMBER, inScriptType));
12001202
state = SCE_HB_DEFAULT;
12011203
} else {
1202-
state = classifyWordHTVB(styler.GetStartSegment(), i, keywordsVBS, styler, inScriptType);
1204+
state = classifyWordHTVB(styler.GetStartSegment(), i, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
12031205
}
12041206
}
12051207
break;
@@ -1270,10 +1272,10 @@ void ColouriseHyperTextDoc(Sci_PositionU startPos, Sci_Position length, int init
12701272

12711273
switch (state) {
12721274
case SCE_HJ_WORD:
1273-
classifyWordHTJS(styler.GetStartSegment(), lengthDoc, keywordsJS, styler, inScriptType);
1275+
classifyWordHTJS(styler.GetStartSegment(), lengthDoc, keywordLists[KeywordIndex_JavaScript], styler, inScriptType);
12741276
break;
12751277
case SCE_HB_WORD:
1276-
classifyWordHTVB(styler.GetStartSegment(), lengthDoc, keywordsVBS, styler, inScriptType);
1278+
classifyWordHTVB(styler.GetStartSegment(), lengthDoc, keywordLists[KeywordIndex_VBScript], styler, inScriptType);
12771279
break;
12781280
default:
12791281
StateToPrint = statePrintForState(state, inScriptType);

src/EditAutoC.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -708,9 +708,8 @@ enum {
708708
GroovyKeywordIndex_Annotation = 7,
709709
GroovyKeywordIndex_GroovyDoc = 9,
710710
HTMLKeywordIndex_Tag = 0,
711-
HTMLKeywordIndex_Attribute = 6,
712-
HTMLKeywordIndex_EventHandler = 7,
713-
HTMLKeywordIndex_Value = 8,
711+
HTMLKeywordIndex_Attribute = 4,
712+
HTMLKeywordIndex_Value = 5,
714713
HaxeKeywordIndex_Preprocessor = 1,
715714
HaxeKeywordIndex_CommentTag = 8,
716715
InnoKeywordIndex_Directive = 4,
@@ -1021,7 +1020,6 @@ static void AutoC_AddKeyword(WordList &pWList, int iCurrentStyle) noexcept {
10211020
if (iLexer == SCLEX_PHPSCRIPT || iLexer == SCLEX_JAVASCRIPT || iLexer == SCLEX_MARKDOWN) {
10221021
pWList.AddListEx(lexHTML.pKeyWords->pszKeyWords[HTMLKeywordIndex_Tag]);
10231022
pWList.AddListEx(lexHTML.pKeyWords->pszKeyWords[HTMLKeywordIndex_Attribute]);
1024-
pWList.AddListEx(lexHTML.pKeyWords->pszKeyWords[HTMLKeywordIndex_EventHandler]);
10251023
pWList.AddListEx(lexHTML.pKeyWords->pszKeyWords[HTMLKeywordIndex_Value]);
10261024
}
10271025
}

0 commit comments

Comments
 (0)