From bb1edf671731343d684ad01c3019127cf440dd90 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Tue, 8 Jan 2019 22:33:51 -0500 Subject: [PATCH] support unicode characters --- bootstrap-toc.js | 25 ++++++++++++++++++++----- test/toc-test.js | 8 +++++++- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/bootstrap-toc.js b/bootstrap-toc.js index 4bb1c1c..534f100 100644 --- a/bootstrap-toc.js +++ b/bootstrap-toc.js @@ -20,11 +20,26 @@ generateUniqueIdBase: function(el) { var text = $(el).text(); - var anchor = text - .trim() - .toLowerCase() - .replace(/[^A-Za-z0-9]+/g, "-"); - return anchor || el.tagName.toLowerCase(); + + // adapted from + // https://github.com/bryanbraun/anchorjs/blob/65fede08d0e4a705f72f1e7e6284f643d5ad3cf3/anchor.js#L237-L257 + + // Regex for finding the non-safe URL characters (many need escaping): & +$,:;=?@"#{}|^~[`%!'<>]./()*\ (newlines, tabs, backspace, & vertical tabs) + var nonsafeChars = /[& +$,:;=?@"#{}|^~[`%!'<>\]\.\/\(\)\*\\\n\t\b\v]/g, + urlText; + + // Note: we trim hyphens after truncating because truncating can cause dangling hyphens. + // Example string: // " ⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + urlText = text + .trim() // "⚡⚡ Don't forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + .replace(/\'/gi, "") // "⚡⚡ Dont forget: URL fragments should be i18n-friendly, hyphenated, short, and clean." + .replace(nonsafeChars, "-") // "⚡⚡-Dont-forget--URL-fragments-should-be-i18n-friendly--hyphenated--short--and-clean-" + .replace(/-{2,}/g, "-") // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-short-and-clean-" + .substring(0, 64) // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated-" + .replace(/^-+|-+$/gm, "") // "⚡⚡-Dont-forget-URL-fragments-should-be-i18n-friendly-hyphenated" + .toLowerCase(); // "⚡⚡-dont-forget-url-fragments-should-be-i18n-friendly-hyphenated" + + return urlText || el.tagName.toLowerCase(); }, generateUniqueId: function(el) { diff --git a/test/toc-test.js b/test/toc-test.js index abaa256..73e3c9d 100644 --- a/test/toc-test.js +++ b/test/toc-test.js @@ -43,7 +43,7 @@ describe("Toc", function() { var el = document.createElement("h1"); el.innerHTML = "Some tExt- with aidan's /. stuff "; var base = Toc.helpers.generateUniqueIdBase(el); - expect(base).to.eql("some-text-with-aidan-s-stuff"); + expect(base).to.eql("some-text-with-aidans-stuff"); }); it("uses the tag name of the element if there's no text", function() { @@ -51,6 +51,12 @@ describe("Toc", function() { var base = Toc.helpers.generateUniqueIdBase(el); expect(base).to.eql("h1"); }); + + it("handles unicode", function() { + var el = $("

💃 🕺

")[0]; + var base = Toc.helpers.generateUniqueIdBase(el); + expect(base).to.eql("💃-🕺"); + }); }); describe(".generateUniqueId()", function() {