-
-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
⚗️ Group similar commits together (#50)
- Loading branch information
1 parent
6a3b1b6
commit 06c8f3c
Showing
14 changed files
with
1,037 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,3 +61,4 @@ typings/ | |
.next | ||
|
||
CHANGELOG.json | ||
.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,6 +47,7 @@ function parseCommit(commit) { | |
emoji, | ||
message, | ||
group, | ||
siblings: [], | ||
body: body.join('\n'), | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
const { deburr } = require('lodash') | ||
const levenshtein = require('fast-levenshtein') | ||
|
||
// this is a magic number, this comes from various testing | ||
// feel free to tweak it | ||
const MAX_DISTANCE_PERCENT = 0.30 | ||
|
||
function groupSentencesByDistance(texts = []) { | ||
const textsWithSortedWords = texts | ||
.map(text => ( | ||
// to basic latin characters | ||
deburr(text) | ||
// replace specials characters by a filler | ||
.replace(/[^\w\s]/gi, '▩') | ||
// split words | ||
.split(' ') | ||
// little words are replaces by fillers | ||
// this way -> we remove useless word like (a, of, etc) | ||
// we keep the string length for the algorithm | ||
.map(word => word.length < 4 ? Array.from({ length: word.length }).join('▩') : word) | ||
// we sort words | ||
.sort() | ||
// we make them a sentence | ||
.join('') | ||
)) | ||
|
||
const alreadyProcessedWords = new Set() | ||
const keyGroups = [] | ||
|
||
for ( | ||
let indexesFromStart = 0; | ||
indexesFromStart < textsWithSortedWords.length; | ||
indexesFromStart += 1 | ||
) { | ||
if (!alreadyProcessedWords.has(indexesFromStart)) { | ||
alreadyProcessedWords.add(indexesFromStart) | ||
const group = [indexesFromStart] | ||
keyGroups.push(group) | ||
|
||
for ( | ||
let indexesFromNext = indexesFromStart + 1; | ||
indexesFromNext < textsWithSortedWords.length; | ||
indexesFromNext += 1 | ||
) { | ||
const textA = textsWithSortedWords[indexesFromStart] | ||
const textB = textsWithSortedWords[indexesFromNext] | ||
const distance = levenshtein.get(textA, textB) | ||
const textAverageLength = (textA.length + textB.length) / 2 | ||
|
||
if ( | ||
// close distance | ||
(textAverageLength * MAX_DISTANCE_PERCENT) >= distance | ||
// not already in a group | ||
&& !alreadyProcessedWords.has(indexesFromNext) | ||
) { | ||
group.push(indexesFromNext) | ||
alreadyProcessedWords.add(indexesFromNext) | ||
} | ||
} | ||
} | ||
} | ||
|
||
return keyGroups | ||
} | ||
|
||
module.exports = { | ||
groupSentencesByDistance, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
const { groupSentencesByDistance } = require('./utils') | ||
|
||
describe('utils', () => { | ||
describe('groupSentencesByDistance', () => { | ||
it('should group values together', () => { | ||
const messages = [ | ||
'add levenshtein', // 0 - group1 | ||
'fix a bug about failures graph', // 1 - group2 | ||
'levenshtein', // 2 - group1 | ||
'fix levenshtein', // 3 - group1 | ||
'nothing to group with me', | ||
'fix a graph of failures bug', // 5 - group2 | ||
] | ||
|
||
expect(groupSentencesByDistance(messages)).toEqual([[0, 2, 3], [1, 5], [4]]) | ||
}) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.