Skip to content

Commit

Permalink
Reintroduce anchor detection as configurable step
Browse files Browse the repository at this point in the history
Anchor detection got removed during refactoring, as a link will
still point to a sufficient url with a broken anchor, it still
generates wrong/broken links. Especially when markdown is used
to generate pages which are checked with tools like HTMLtest,
which do verify proper anchors etc.

Therefore i reintroduce this change with this pr, but as a
configurable option. This should still allow the proper
outcome of the refactoring, but ensures backwards compatibility
for other users who rely on proper anchor handling.

Relates: tcort#24

Signed-off-by: Simon Schrottner <[email protected]>
  • Loading branch information
aepfli committed Jan 11, 2023
1 parent aad0f35 commit b52956e
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 24 deletions.
25 changes: 22 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@ $ npm install --save markdown-link-extractor
```
## API

### markdownLinkExtractor(markdown)
### markdownLinkExtractor(markdown, anchors = false)

Parameters:

* `markdown` text in markdown format.
* `anchors` if anchors should also be extracted.

Returns:

* an array containing the URLs from the links found.
* an object with the following properties:
* `.anchors`: an array of anchor tag strings (e.g. `[ "#foo", "#bar" ]`) - only filled if `anchors` set `true`.
* `.links`: an array containing the URLs from the links found.

## Examples

Expand All @@ -26,10 +29,26 @@ const markdownLinkExtractor = require('markdown-link-extractor');

const markdown = readFileSync('README.md', {encoding: 'utf8'});

const links = markdownLinkExtractor(markdown);
const { links } = markdownLinkExtractor(markdown);
links.forEach(link => console.log(link));
```

## Upgrading to v5.0.0

- anchor link extraction reintroduced - be careful if you upgrade from version <`3.x` as the `extended` parameter got removed but now there is the `anchors` parameter in place.

Code that looked like this:

```
const links = markdownLinkExtractor(str);
```

Should change to this:

```
const { links } = markdownLinkExtractor(str);
```

## Upgrading to v4.0.0

- anchor link extraction no longer supported
Expand Down
22 changes: 20 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,31 @@
const { marked } = require('marked');
const htmlLinkExtractor = require('html-link-extractor');

module.exports = function markdownLinkExtractor(markdown, extended = false) {
module.exports = function markdownLinkExtractor(markdown, anchors = false) {
const detected_anchors = [];

const renderer = {
heading(text, level, raw, slugger) {
if (this.options.headerIds) {
var id = this.options.headerPrefix + slugger.slug(raw);
if(anchors) {
detected_anchors.push(`#${id}`);
}
return "<h" + level + " id=\"" + id + "\">" + text + "</h" + level + ">\n";
} // ignore IDs


return "<h" + level + ">" + text + "</h" + level + ">\n";
}
};

marked.setOptions({
mangle: false, // don't escape autolinked email address with HTML character references.
});

marked.use({ renderer });

const html = marked(markdown);
const links = htmlLinkExtractor(html);
return links;
return { links, detected_anchors };
};
36 changes: 17 additions & 19 deletions test/markdown-link-extractor.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,88 +6,86 @@ var markdownLinkExtractor = require('../');
describe('markdown-link-extractor', function () {

it('should return an empty array when no links are present', function () {
var links = markdownLinkExtractor('No links here');
var { links } = markdownLinkExtractor('No links here');
expect(links).to.be.an('array');
expect(links).to.have.length(0);
});

it('should extract links with emojis', function () {
var links = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**');
var { links } = markdownLinkExtractor('**[📣 Foo!](https://www.example.com)**');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('https://www.example.com');
});

it('should extract a link in a [tag](http://example.com)', function () {
var links = markdownLinkExtractor('[example](http://www.example.com)');
var { links } = markdownLinkExtractor('[example](http://www.example.com)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('http://www.example.com');
});

it('should extract a hash link in [foobar](#foobar)', function () {
var links = markdownLinkExtractor('[foobar](#foobar)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('#foobar');
});

it('should extract a link from inline html <a href="http://foo.bar.test">foo</a>', function () {
var links = markdownLinkExtractor('<a href="http://foo.bar.test">foo</a>');
var { links } = markdownLinkExtractor('<a href="http://foo.bar.test">foo</a>');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('http://foo.bar.test');
});

it('should extract mailto: link from <[email protected]>', function () {
var links = markdownLinkExtractor('<[email protected]>)');
var { links } = markdownLinkExtractor('<[email protected]>)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('mailto:[email protected]');
});

it('should extract a link in a with escaped braces [tag](http://example.com\(1\))', function () {
var links = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)');
var { links } = markdownLinkExtractor('[XMLHttpRequest](http://msdn.microsoft.com/library/ie/ms535874\\(v=vs.85\\).aspx)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('http://msdn.microsoft.com/library/ie/ms535874(v=vs.85).aspx');
});

it('should extract an image link in a ![tag](http://example.com/image.jpg)', function () {
var links = markdownLinkExtractor('![example](http://www.example.com/image.jpg)');
var { links } = markdownLinkExtractor('![example](http://www.example.com/image.jpg)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('http://www.example.com/image.jpg');
});

it('should extract an image link in a ![tag](foo/image.jpg)', function () {
var links = markdownLinkExtractor('![example](foo/image.jpg)');
var { links } = markdownLinkExtractor('![example](foo/image.jpg)');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('foo/image.jpg');
});

it('should extract two image links', function () {
var links = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)');
var { links } = markdownLinkExtractor('![img](http://www.example.test/hello.jpg) ![img](hello.jpg)');
expect(links).to.be.an('array');
expect(links).to.have.length(2);
expect(links[0]).to.be('http://www.example.test/hello.jpg');
expect(links[1]).to.be('hello.jpg');
});

it('should extract a bare link http://example.com', function () {
var links = markdownLinkExtractor('This is a link: http://www.example.com');
var { links } = markdownLinkExtractor('This is a link: http://www.example.com');
expect(links).to.be.an('array');
expect(links).to.have.length(1);
expect(links[0]).to.be('http://www.example.com');
});

it('should extract multiple links', function () {
var links = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)');
var { links } = markdownLinkExtractor('This is an [example](http://www.example.com). Hope it [works](http://www.example.com/works)');
expect(links).to.be.an('array');
expect(links).to.have.length(2);
expect(links[0]).to.be('http://www.example.com');
expect(links[1]).to.be('http://www.example.com/works');
});

});
it('should collect anchor tags', function () {
var { anchors } = markdownLinkExtractor('# foo\n# foo', true);
expect(anchors).to.eql(['#foo','#foo-1']);
});

});

0 comments on commit b52956e

Please sign in to comment.