Skip to content

Commit

Permalink
Enable wide Unicode support for names
Browse files Browse the repository at this point in the history
  • Loading branch information
viktor-yakubiv committed Jan 11, 2024
1 parent 7f23ba8 commit b34ad7a
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 48 deletions.
33 changes: 22 additions & 11 deletions dev/lib/factory-name.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').Effects} Effects
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').TokenType} TokenType
*/

import {asciiAlpha, asciiAlphanumeric} from 'micromark-util-character'
import {codes} from 'micromark-util-symbol'
import {asciiAlphanumeric} from 'micromark-util-character'
import {classifyCharacter} from 'micromark-util-classify-character'
import {codes, constants} from 'micromark-util-symbol'

/** @param {Code} code **/
const allowedCharacter = (code) =>
code !== null && code <= codes.del
? code === codes.dash ||
code === codes.dot ||
code === codes.underscore ||
asciiAlphanumeric(code)
: classifyCharacter(code) !== constants.characterGroupWhitespace

/** @param {Code} code **/
const allowedEdgeCharacter = (code) =>
allowedCharacter(code) &&
classifyCharacter(code) !== constants.characterGroupPunctuation &&
code !== codes.underscore

/**
* @this {TokenizeContext}
Expand All @@ -22,7 +39,7 @@ export function factoryName(effects, ok, nok, type) {

/** @type {State} */
function start(code) {
if (asciiAlpha(code)) {
if (allowedEdgeCharacter(code)) {
effects.enter(type)
effects.consume(code)
return name
Expand All @@ -33,18 +50,12 @@ export function factoryName(effects, ok, nok, type) {

/** @type {State} */
function name(code) {
if (
code === codes.dash ||
code === codes.underscore ||
asciiAlphanumeric(code)
) {
if (allowedCharacter(code)) {
effects.consume(code)
return name
}

effects.exit(type)
return self.previous === codes.dash || self.previous === codes.underscore
? nok(code)
: ok(code)
return allowedEdgeCharacter(self.previous) ? ok(code) : nok(code)
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"micromark-factory-space": "^2.0.0",
"micromark-factory-whitespace": "^2.0.0",
"micromark-util-character": "^2.0.0",
"micromark-util-classify-character": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0",
"parse-entities": "^4.0.0"
Expand Down
84 changes: 47 additions & 37 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,9 @@ test('micromark-extension-directive (syntax, text)', async function (t) {
}
)

await t.test(
'should not support a colon not followed by an alpha',
async function () {
assert.equal(micromark(':', options()), '<p>:</p>')
}
)
await t.test('should not support a lonely colon', async function () {
assert.equal(micromark(':', options()), '<p>:</p>')
})

await t.test(
'should support a colon followed by an alpha',
Expand All @@ -57,24 +54,17 @@ test('micromark-extension-directive (syntax, text)', async function (t) {
}
)

await t.test(
'should not support a colon followed by a digit',
async function () {
assert.equal(micromark(':9', options()), '<p>:9</p>')
}
)
await t.test('should support a colon followed by a digit', async function () {
assert.equal(micromark(':9', options()), '<p></p>')
})

await t.test(
'should not support a colon followed by a dash',
'should not support a colon followed by a punctuation',
async function () {
assert.equal(micromark(':-', options()), '<p>:-</p>')
}
)

await t.test(
'should not support a colon followed by an underscore',
async function () {
assert.equal(micromark(':_', options()), '<p>:_</p>')
assert.equal(micromark(':.', options()), '<p>:.</p>')
assert.equal(micromark(':\u2014', options()), '<p>:\u2014</p>') // Em dash
}
)

Expand All @@ -86,21 +76,18 @@ test('micromark-extension-directive (syntax, text)', async function (t) {
assert.equal(micromark(':a-b', options()), '<p></p>')
})

await t.test(
'should *not* support a dash at the end of a name',
async function () {
assert.equal(micromark(':a-', options()), '<p>:a-</p>')
}
)

await t.test('should support an underscore in a name', async function () {
assert.equal(micromark(':a_b', options()), '<p></p>')
await t.test('should support unicode alphabets in name', async function () {
// Latin, Greek, Cyrillic respectively
assert.equal(micromark(':xγз', options()), '<p></p>')
})

await t.test(
'should *not* support an underscore at the end of a name',
'should *not* support punctuation at the end of a name',
async function () {
assert.equal(micromark(':a-', options()), '<p>:a-</p>')
assert.equal(micromark(':a_', options()), '<p>:a_</p>')
assert.equal(micromark(':a.', options()), '<p>:a.</p>')
assert.equal(micromark(':a\u2014', options()), '<p>:a\u2014</p>') // Em dash
}
)

Expand Down Expand Up @@ -411,25 +398,37 @@ test('micromark-extension-directive (syntax, leaf)', async function (t) {
)

await t.test(
'should not support two colons followed by a digit',
'should support two colons followed by a digit',
async function () {
assert.equal(micromark('::9', options()), '<p>::9</p>')
assert.equal(micromark('::9', options()), '')
}
)

await t.test(
'should not support two colons followed by a dash',
'should not support two colons followed by punctuation',
async function () {
assert.equal(micromark('::-', options()), '<p>::-</p>')
assert.equal(micromark('::_', options()), '<p>::_</p>')
assert.equal(micromark('::.', options()), '<p>::.</p>')
assert.equal(micromark('::\u2014', options()), '<p>::\u2014</p>') // Em dash
}
)

await t.test('should support a digit in a name', async function () {
assert.equal(micromark('::a9', options()), '')
})

await t.test('should support a dash in a name', async function () {
await t.test('should support punctuation in a name', async function () {
assert.equal(micromark('::a-b', options()), '')
assert.equal(micromark('::a-b', options()), '')
assert.equal(micromark('::a_b', options()), '')
assert.equal(micromark('::a.b', options()), '')
assert.equal(micromark('::a\u2014b', options()), '')
})

await t.test('should support unicode alphabets in name', async function () {
// Latin, Greek, Cyrillic respectively
assert.equal(micromark('::xγз', options()), '')
})

await t.test(
Expand Down Expand Up @@ -773,25 +772,36 @@ test('micromark-extension-directive (syntax, container)', async function (t) {
)

await t.test(
'should not support three colons followed by a digit',
'should support three colons followed by a digit',
async function () {
assert.equal(micromark(':::9', options()), '<p>:::9</p>')
assert.equal(micromark(':::9', options()), '')
}
)

await t.test(
'should not support three colons followed by a dash',
'should not support three colons followed by punctuation',
async function () {
assert.equal(micromark(':::-', options()), '<p>:::-</p>')
assert.equal(micromark(':::_', options()), '<p>:::_</p>')
assert.equal(micromark(':::.', options()), '<p>:::.</p>')
assert.equal(micromark(':::\u2014', options()), '<p>:::\u2014</p>') // Em dash
}
)

await t.test('should support a digit in a name', async function () {
assert.equal(micromark(':::a9', options()), '')
})

await t.test('should support a dash in a name', async function () {
await t.test('should support punctuation in a name', async function () {
assert.equal(micromark(':::a-b', options()), '')
assert.equal(micromark(':::a_b', options()), '')
assert.equal(micromark(':::a.b', options()), '')
assert.equal(micromark(':::a\u2014b', options()), '') // Em dash
})

await t.test('should support unicode alphabets in name', async function () {
// Latin, Greek, Cyrillic respectively
assert.equal(micromark(':::xγз', options()), '')
})

await t.test(
Expand Down

0 comments on commit b34ad7a

Please sign in to comment.