Skip to content

Commit b8b3497

Browse files
committed
Initial commit
0 parents  commit b8b3497

17 files changed

+7815
-0
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/.build/
2+
/.vscode/
3+
/dist/
4+
/node_modules/
5+
6+
*.tgz

.npmignore

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/.build/
2+
/.vscode/
3+
4+
/test/
5+
6+
/etc/build.bash
7+
/etc/mjs-resolver.cjs
8+
9+
/jest.config.mjs
10+
/prettier.config.cjs
11+
/rollup.config.mjs
12+
13+
*.tgz

LICENSE.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
© 2023 Yuri Zemskov
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
this software and associated documentation files (the “Software”), to deal in
5+
the Software without restriction, including without limitation the rights to
6+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
the Software, and to permit persons to whom the Software is furnished to do so,
8+
subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all
11+
copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.md

+229
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
# μXML
2+
3+
Minimal and fast non-validating SAX-like XML reader.
4+
5+
- When to use it:
6+
- You just need to communicate with XML-based API.
7+
- You just need to read XML-based configs or whatever.
8+
- You don’t care of ill-formed or invalid markup.
9+
- You don’t care of comments and processing instructions.
10+
- You don’t care of source locations.
11+
- When **NOT** to use it:
12+
- You need to parse HTML, SVG, JSX, templates, etc.
13+
- You need to validate, debug, or format XML.
14+
- You need to handle comments and/or processing instructions.
15+
- You need to read XML streamingly.
16+
17+
## Usage
18+
19+
```bash
20+
yarn add microxml
21+
```
22+
23+
```bash
24+
npm install microxml
25+
```
26+
27+
```typescript
28+
import { fast_xml, FastBackend } from 'microxml';
29+
30+
class ExampleBackend implements FastBackend {
31+
/** A table of entity definitions. */
32+
defs = new Map([
33+
['foo', '"&bar;"'],
34+
['bar', '<baz/>'],
35+
]);
36+
37+
/** Handle `<?xml...?>` and `<!DOCTYPE>`. */
38+
async head(text: string) {
39+
console.log('prolog %o', text);
40+
}
41+
42+
otag(tag: string, attrs: Map<string, string>) {
43+
console.log('opening tag %o %o', tag, attrs);
44+
}
45+
46+
ctag(tag: string) {
47+
console.log('closing tag %o', tag);
48+
}
49+
50+
text(text: string) {
51+
console.log('text %o', text);
52+
}
53+
}
54+
55+
const src = `
56+
<?xml version="1.0" encoding="UTF-8"?>
57+
<!DOCTYPE example>
58+
<test a="&foo;">
59+
&foo;
60+
</test>
61+
`;
62+
63+
fast_xml(src, new ExampleBackend());
64+
```
65+
66+
## Features and non-features
67+
68+
- <span id="br1"></span> The fastest[¹](#fn1).
69+
- <span id="br2"></span> The smallest[¹](#fn1) (≈1.5kB minified, **no gzip**).
70+
- ~~The smartest.~~
71+
- ~~The strongest.~~
72+
- Unlike many others, **DOES** reparse entity replacements:
73+
- With `x`=`<b>&y;</b>`, `y`=`"<c/>"`:
74+
- `<a>&x;</a>``<a><b>"<c/>"</b></a>`,
75+
- `<a b="&x;" />``<a b='<b>"<c/>"</b>'/>`.
76+
- May or may not explode in your face at ill-formed code.
77+
- May or may not explode in your face at invalid code.
78+
- Doesn’t parse `<?xml...?>` and `<!DOCTYPE>` declarations.
79+
- But the `async head(text: string)` hook may do the trick.
80+
- Doesn’t parse HTML.
81+
- Doesn’t parse SVG.
82+
- Doesn’t parse JSX.
83+
- Doesn’t parse templates.
84+
- Doesn’t handle boolean and unquoted attributes `<a b c=d>`.
85+
- Doesn’t handle references without the trailing semicolon `&ampwtf`.
86+
- Doesn’t handle tags without the name `<></>`.
87+
- Doesn’t handle tags like `<script>` and `<style>`.
88+
- Doesn’t handle void tags differently.
89+
- Doesn’t read streaming inputs.
90+
- Doesn’t report source locations.
91+
- Doesn’t report errors.
92+
- Doesn’t trim nor collapse whitespace.
93+
- But merges adjacent text chunks.
94+
- Silently ignores comments and processing instructions.
95+
- Silently ignores undefined entities.
96+
- Silently ignores text before the first tag.
97+
- Silently ignores text after the last tag.
98+
- Silently aborts at EOF-terminated attributes and attribute lists.
99+
- Silently aborts at expansion of unterminated attribute lists.
100+
101+
---
102+
103+
1. <span id="fn1"></span> [[]](#br1), [[]](#br2) Probably.
104+
105+
## API
106+
107+
### `fast_xml(src, impl)`
108+
109+
Read an XML document using the provided implementation.
110+
111+
**Arguments:**
112+
113+
- `src: string` — the XML document source string.
114+
- `impl: FastBackend` — the backend to use.
115+
116+
**Return:**
117+
118+
- `Promise<void>` — a promise that resolves on error or document end.
119+
120+
### `FastBackend`
121+
122+
A backend that provides entities table and token hooks.
123+
124+
All the properties and hooks are assumed to be mutable.
125+
126+
All the hooks are called as methods, so it’s safe to use `this` in them.
127+
128+
### `defs`
129+
130+
The entity definitions table.
131+
132+
**Type:**
133+
134+
- `Map<string, string>`,
135+
- `undefined`.
136+
137+
Keys are entity names without leading `&` and trailing `;`.
138+
139+
Values are replacements, that are allowed to include markup and other
140+
references. When an entity is referenced in the markup mode, its replacement
141+
will be reparsed as markup with both tags, comments, entity references, etc.
142+
handled as usual. When an entity is referenced in an attribute value, everything
143+
except other references is ignored, including `["']` delimiters that normally
144+
terminate the attribute value.
145+
146+
When filling the table from `<!DOCTYPE>` and/or external DTDs, be careful to
147+
expand numeric references and parametric entities `%...;` **before** adding
148+
entries to the table.
149+
150+
The table is never consulted for numeric and predefined entities:
151+
152+
- `&#...;`,
153+
- `&#x...;`,
154+
- `&lt;`,
155+
- `&gt;`,
156+
- `&amp;`,
157+
- `&apos;`,
158+
- `&quot;`.
159+
160+
The table is assumed to be mutable, so it’s safe to update or completely replace
161+
it anytime you want.
162+
163+
### `head(head)`
164+
165+
XML prolog hook.
166+
167+
Triggered even if there is no prolog, or it doesn’t include the `<?xml...?>` or
168+
`<!DOCTYPE>` declaration.
169+
170+
Use it to parse the XML declaration and doctype.
171+
172+
The hook can return promise we’ll await, so you can do some async stuff here.
173+
174+
**Arguments:**
175+
176+
- `head: string` — the XML prolog text.
177+
178+
**Return:**
179+
180+
- `any` — anything you want, possibly awaitable.
181+
182+
### `otag(tag, attrs)`
183+
184+
Opening tag hook. Also triggered for void tags.
185+
186+
**Arguments:**
187+
188+
- `tag: string` — the tag name.
189+
- `attrs: Map<string, string>` — the attributes map.
190+
191+
**Return:**
192+
193+
- `any` — the return value is ignored.
194+
195+
### `ctag(tag)`
196+
197+
Closing tag hook. For void tags, triggered immediately after the `otag` hook.
198+
199+
**Arguments:**
200+
201+
- `tag: string` — the tag name.
202+
203+
**Return:**
204+
205+
- `any` — the return value is ignored.
206+
207+
### `text(text)`
208+
209+
Plain text hook.
210+
211+
Triggered immediately before `otag` or `ctag` with all pending plain text and
212+
<nobr>`<![CDATA[...]]>`</nobr> chunks merged, and only if the merged text is
213+
non-empty.
214+
215+
For example, when parsing a document like
216+
<nobr>`<a>b<!--x-->c<?y?>d<![CDATA[e]]>f</a>`</nobr>, we’ll only trigger `text`
217+
once with the `bcdef` argument.
218+
219+
**Arguments:**
220+
221+
- `text: string` — the plain text string.
222+
223+
**Return:**
224+
225+
- `any` — the return value is ignored.
226+
227+
## License
228+
229+
MIT © 2023 Yuri Zemskov

etc/build.bash

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/usr/bin/env bash
2+
3+
npm run compile
4+
5+
cp ./.build/*.{mts,mts.map} ./dist/
6+
7+
cp ./dist/index.d.mts ./dist/index.d.ts
8+
cp ./dist/index.d.mts.map ./dist/index.d.ts.map
9+
10+
cp ./dist/index.d.mts ./dist/index.d.cts
11+
cp ./dist/index.d.mts.map ./dist/index.d.cts.map
12+
13+
sed -i 's/index\.d\.mts\.map/index.d.ts.map/' ./dist/index.d.ts
14+
sed -i 's/index\.d\.mts\.map/index.d.cts.map/' ./dist/index.d.cts

etc/mjs-resolver.cjs

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
const MJS = /\.mjs$/;
2+
3+
module.exports = (path, options) => {
4+
const resolver = options.defaultResolver;
5+
6+
if (MJS.test(path)) {
7+
try {
8+
return resolver(path.replace(MJS, '.mts'), options);
9+
} catch {}
10+
}
11+
12+
return resolver(path, options);
13+
};

etc/tsconfig.json

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
{
2+
"compilerOptions": {
3+
"allowJs": false,
4+
"checkJs": false,
5+
"composite": false,
6+
"incremental": true,
7+
"noEmit": false,
8+
"noEmitOnError": true,
9+
"emitDeclarationOnly": false,
10+
"isolatedModules": false,
11+
"preserveWatchOutput": true,
12+
"pretty": true,
13+
"noErrorTruncation": true,
14+
"assumeChangesOnlyAffectDirectDependencies": false,
15+
"disableReferencedProjectLoad": false,
16+
"disableSolutionSearching": false,
17+
"disableSourceOfProjectReferenceRedirect": false,
18+
"disableSizeLimit": false,
19+
"target": "esnext",
20+
"downlevelIteration": true,
21+
"useDefineForClassFields": true,
22+
"preserveConstEnums": false,
23+
"preserveValueImports": false,
24+
"experimentalDecorators": false,
25+
"emitDecoratorMetadata": false,
26+
"jsx": "react-jsx",
27+
"jsxImportSource": "react",
28+
"declaration": true,
29+
"declarationMap": true,
30+
"removeComments": false,
31+
"sourceMap": true,
32+
"inlineSources": true,
33+
"noImplicitUseStrict": false,
34+
"alwaysStrict": true,
35+
"newLine": "lf",
36+
"emitBOM": false,
37+
"stripInternal": false,
38+
"module": "nodenext",
39+
"lib": [
40+
"esnext"
41+
],
42+
"moduleResolution": "nodenext",
43+
"moduleDetection": "auto",
44+
"moduleSuffixes": [
45+
""
46+
],
47+
"esModuleInterop": true,
48+
"allowSyntheticDefaultImports": true,
49+
"importsNotUsedAsValues": "remove",
50+
"resolveJsonModule": true,
51+
"importHelpers": true,
52+
"noEmitHelpers": false,
53+
"forceConsistentCasingInFileNames": true,
54+
"maxNodeModuleJsDepth": 0,
55+
"noLib": false,
56+
"noResolve": false,
57+
"allowUmdGlobalAccess": false,
58+
"skipLibCheck": true,
59+
"skipDefaultLibCheck": false,
60+
"preserveSymlinks": false,
61+
"strict": true,
62+
"noImplicitAny": true,
63+
"noImplicitThis": true,
64+
"strictBindCallApply": true,
65+
"strictFunctionTypes": true,
66+
"strictNullChecks": true,
67+
"strictPropertyInitialization": true,
68+
"exactOptionalPropertyTypes": true,
69+
"useUnknownInCatchVariables": true,
70+
"keyofStringsOnly": false,
71+
"noStrictGenericChecks": false,
72+
"noFallthroughCasesInSwitch": true,
73+
"noImplicitOverride": true,
74+
"noImplicitReturns": true,
75+
"noPropertyAccessFromIndexSignature": true,
76+
"noUncheckedIndexedAccess": true,
77+
"noUnusedLocals": true,
78+
"noUnusedParameters": true,
79+
"allowUnreachableCode": true,
80+
"allowUnusedLabels": true,
81+
"suppressExcessPropertyErrors": false,
82+
"suppressImplicitAnyIndexErrors": false,
83+
"diagnostics": false,
84+
"extendedDiagnostics": false,
85+
"traceResolution": false,
86+
"explainFiles": false,
87+
"listFiles": false,
88+
"listEmittedFiles": false
89+
}
90+
}

jest.config.mjs

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export default {
2+
testMatch: ['**/test/*.mts'],
3+
testPathIgnorePatterns: ['/build/', '/node_modules/'],
4+
resolver: '<rootDir>/etc/mjs-resolver.cjs',
5+
transform: {
6+
'\\.mts$': ['ts-jest', { useESM: true }],
7+
},
8+
moduleFileExtensions: ['js', 'mts'],
9+
};

0 commit comments

Comments
 (0)