From b34ad7a9c6b7e473b98eb3e114b6bce175c3f045 Mon Sep 17 00:00:00 2001 From: Viktor Yakubiv Date: Thu, 11 Jan 2024 18:27:51 +0200 Subject: [PATCH] Enable wide Unicode support for names --- dev/lib/factory-name.js | 33 ++++++++++------ package.json | 1 + test/index.js | 84 +++++++++++++++++++++++------------------ 3 files changed, 70 insertions(+), 48 deletions(-) diff --git a/dev/lib/factory-name.js b/dev/lib/factory-name.js index bab07a4..1f2f78c 100644 --- a/dev/lib/factory-name.js +++ b/dev/lib/factory-name.js @@ -1,12 +1,29 @@ /** + * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Effects} Effects * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').TokenType} TokenType */ -import {asciiAlpha, asciiAlphanumeric} from 'micromark-util-character' -import {codes} from 'micromark-util-symbol' +import {asciiAlphanumeric} from 'micromark-util-character' +import {classifyCharacter} from 'micromark-util-classify-character' +import {codes, constants} from 'micromark-util-symbol' + +/** @param {Code} code **/ +const allowedCharacter = (code) => + code !== null && code <= codes.del + ? code === codes.dash || + code === codes.dot || + code === codes.underscore || + asciiAlphanumeric(code) + : classifyCharacter(code) !== constants.characterGroupWhitespace + +/** @param {Code} code **/ +const allowedEdgeCharacter = (code) => + allowedCharacter(code) && + classifyCharacter(code) !== constants.characterGroupPunctuation && + code !== codes.underscore /** * @this {TokenizeContext} @@ -22,7 +39,7 @@ export function factoryName(effects, ok, nok, type) { /** @type {State} */ function start(code) { - if (asciiAlpha(code)) { + if (allowedEdgeCharacter(code)) { effects.enter(type) effects.consume(code) return name @@ -33,18 +50,12 @@ export function factoryName(effects, ok, nok, type) { /** @type {State} */ function name(code) { - if ( - code === codes.dash || - code === codes.underscore || - asciiAlphanumeric(code) - ) { + if (allowedCharacter(code)) { effects.consume(code) return name } effects.exit(type) - return self.previous === codes.dash || self.previous === codes.underscore - ? nok(code) - : ok(code) + return allowedEdgeCharacter(self.previous) ? ok(code) : nok(code) } } diff --git a/package.json b/package.json index 971935c..67505ba 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "micromark-factory-space": "^2.0.0", "micromark-factory-whitespace": "^2.0.0", "micromark-util-character": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", "micromark-util-symbol": "^2.0.0", "micromark-util-types": "^2.0.0", "parse-entities": "^4.0.0" diff --git a/test/index.js b/test/index.js index 03d91f1..45cde9f 100644 --- a/test/index.js +++ b/test/index.js @@ -43,12 +43,9 @@ test('micromark-extension-directive (syntax, text)', async function (t) { } ) - await t.test( - 'should not support a colon not followed by an alpha', - async function () { - assert.equal(micromark(':', options()), '

:

') - } - ) + await t.test('should not support a lonely colon', async function () { + assert.equal(micromark(':', options()), '

:

') + }) await t.test( 'should support a colon followed by an alpha', @@ -57,24 +54,17 @@ test('micromark-extension-directive (syntax, text)', async function (t) { } ) - await t.test( - 'should not support a colon followed by a digit', - async function () { - assert.equal(micromark(':9', options()), '

:9

') - } - ) + await t.test('should support a colon followed by a digit', async function () { + assert.equal(micromark(':9', options()), '

') + }) await t.test( - 'should not support a colon followed by a dash', + 'should not support a colon followed by a punctuation', async function () { assert.equal(micromark(':-', options()), '

:-

') - } - ) - - await t.test( - 'should not support a colon followed by an underscore', - async function () { assert.equal(micromark(':_', options()), '

:_

') + assert.equal(micromark(':.', options()), '

:.

') + assert.equal(micromark(':\u2014', options()), '

:\u2014

') // Em dash } ) @@ -86,21 +76,18 @@ test('micromark-extension-directive (syntax, text)', async function (t) { assert.equal(micromark(':a-b', options()), '

') }) - await t.test( - 'should *not* support a dash at the end of a name', - async function () { - assert.equal(micromark(':a-', options()), '

:a-

') - } - ) - - await t.test('should support an underscore in a name', async function () { - assert.equal(micromark(':a_b', options()), '

') + await t.test('should support unicode alphabets in name', async function () { + // Latin, Greek, Cyrillic respectively + assert.equal(micromark(':xγз', options()), '

') }) await t.test( - 'should *not* support an underscore at the end of a name', + 'should *not* support punctuation at the end of a name', async function () { + assert.equal(micromark(':a-', options()), '

:a-

') assert.equal(micromark(':a_', options()), '

:a_

') + assert.equal(micromark(':a.', options()), '

:a.

') + assert.equal(micromark(':a\u2014', options()), '

:a\u2014

') // Em dash } ) @@ -411,16 +398,19 @@ test('micromark-extension-directive (syntax, leaf)', async function (t) { ) await t.test( - 'should not support two colons followed by a digit', + 'should support two colons followed by a digit', async function () { - assert.equal(micromark('::9', options()), '

::9

') + assert.equal(micromark('::9', options()), '') } ) await t.test( - 'should not support two colons followed by a dash', + 'should not support two colons followed by punctuation', async function () { assert.equal(micromark('::-', options()), '

::-

') + assert.equal(micromark('::_', options()), '

::_

') + assert.equal(micromark('::.', options()), '

::.

') + assert.equal(micromark('::\u2014', options()), '

::\u2014

') // Em dash } ) @@ -428,8 +418,17 @@ test('micromark-extension-directive (syntax, leaf)', async function (t) { assert.equal(micromark('::a9', options()), '') }) - await t.test('should support a dash in a name', async function () { + await t.test('should support punctuation in a name', async function () { + assert.equal(micromark('::a-b', options()), '') assert.equal(micromark('::a-b', options()), '') + assert.equal(micromark('::a_b', options()), '') + assert.equal(micromark('::a.b', options()), '') + assert.equal(micromark('::a\u2014b', options()), '') + }) + + await t.test('should support unicode alphabets in name', async function () { + // Latin, Greek, Cyrillic respectively + assert.equal(micromark('::xγз', options()), '') }) await t.test( @@ -773,16 +772,19 @@ test('micromark-extension-directive (syntax, container)', async function (t) { ) await t.test( - 'should not support three colons followed by a digit', + 'should support three colons followed by a digit', async function () { - assert.equal(micromark(':::9', options()), '

:::9

') + assert.equal(micromark(':::9', options()), '') } ) await t.test( - 'should not support three colons followed by a dash', + 'should not support three colons followed by punctuation', async function () { assert.equal(micromark(':::-', options()), '

:::-

') + assert.equal(micromark(':::_', options()), '

:::_

') + assert.equal(micromark(':::.', options()), '

:::.

') + assert.equal(micromark(':::\u2014', options()), '

:::\u2014

') // Em dash } ) @@ -790,8 +792,16 @@ test('micromark-extension-directive (syntax, container)', async function (t) { assert.equal(micromark(':::a9', options()), '') }) - await t.test('should support a dash in a name', async function () { + await t.test('should support punctuation in a name', async function () { assert.equal(micromark(':::a-b', options()), '') + assert.equal(micromark(':::a_b', options()), '') + assert.equal(micromark(':::a.b', options()), '') + assert.equal(micromark(':::a\u2014b', options()), '') // Em dash + }) + + await t.test('should support unicode alphabets in name', async function () { + // Latin, Greek, Cyrillic respectively + assert.equal(micromark(':::xγз', options()), '') }) await t.test(