forked from trekhleb/javascript-algorithms
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add regular expression matching algorithm.
- Loading branch information
Showing
4 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
src/algorithms/string/regular-expression-matching/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# Regular Expression Matching | ||
|
||
Given an input string `s` and a pattern `p`, implement regular | ||
expression matching with support for `.` and `*`. | ||
|
||
- `.` Matches any single character. | ||
- `*` Matches zero or more of the preceding element. | ||
|
||
The matching should cover the **entire** input string (not partial). | ||
|
||
**Note** | ||
|
||
- `s` could be empty and contains only lowercase letters `a-z`. | ||
- `p` could be empty and contains only lowercase letters `a-z`, and characters like `.` or `*`. | ||
|
||
## Examples | ||
|
||
**Example #1** | ||
|
||
Input: | ||
``` | ||
s = 'aa' | ||
p = 'a' | ||
``` | ||
|
||
Output: `false` | ||
|
||
Explanation: `a` does not match the entire string `aa`. | ||
|
||
**Example #2** | ||
|
||
Input: | ||
``` | ||
s = 'aa' | ||
p = 'a*' | ||
``` | ||
|
||
Output: `true` | ||
|
||
Explanation: `*` means zero or more of the preceding element, `a`. | ||
Therefore, by repeating `a` once, it becomes `aa`. | ||
|
||
**Example #3** | ||
|
||
Input: | ||
|
||
``` | ||
s = 'ab' | ||
p = '.*' | ||
``` | ||
|
||
Output: `true` | ||
|
||
Explanation: `.*` means "zero or more (`*`) of any character (`.`)". | ||
|
||
**Example #4** | ||
|
||
Input: | ||
|
||
``` | ||
s = 'aab' | ||
p = 'c*a*b' | ||
``` | ||
|
||
Output: `true` | ||
|
||
Explanation: `c` can be repeated 0 times, `a` can be repeated | ||
1 time. Therefore it matches `aab`. | ||
|
||
## References | ||
|
||
- [YouTube](https://www.youtube.com/watch?v=l3hda49XcDE&list=PLLXdhg_r2hKA7DPDsunoDZ-Z769jWn4R8&index=71&t=0s) | ||
- [LeetCode](https://leetcode.com/problems/regular-expression-matching/description/) |
34 changes: 34 additions & 0 deletions
34
src/algorithms/string/regular-expression-matching/__test__/regularExpressionMatching.test.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import regularExpressionMatching from '../regularExpressionMatching'; | ||
|
||
describe('regularExpressionMatching', () => { | ||
it('should match regular expressions in a string', () => { | ||
expect(regularExpressionMatching('', '')).toBeTruthy(); | ||
expect(regularExpressionMatching('a', 'a')).toBeTruthy(); | ||
expect(regularExpressionMatching('aa', 'aa')).toBeTruthy(); | ||
expect(regularExpressionMatching('aab', 'aab')).toBeTruthy(); | ||
expect(regularExpressionMatching('aab', 'aa.')).toBeTruthy(); | ||
expect(regularExpressionMatching('aab', '.a.')).toBeTruthy(); | ||
expect(regularExpressionMatching('aab', '...')).toBeTruthy(); | ||
expect(regularExpressionMatching('a', 'a*')).toBeTruthy(); | ||
expect(regularExpressionMatching('aaa', 'a*')).toBeTruthy(); | ||
expect(regularExpressionMatching('aaab', 'a*b')).toBeTruthy(); | ||
expect(regularExpressionMatching('aaabb', 'a*b*')).toBeTruthy(); | ||
expect(regularExpressionMatching('aaabb', 'a*b*c*')).toBeTruthy(); | ||
expect(regularExpressionMatching('', 'a*')).toBeTruthy(); | ||
expect(regularExpressionMatching('xaabyc', 'xa*b.c')).toBeTruthy(); | ||
expect(regularExpressionMatching('aab', 'c*a*b*')).toBeTruthy(); | ||
expect(regularExpressionMatching('mississippi', 'mis*is*.p*.')).toBeTruthy(); | ||
expect(regularExpressionMatching('ab', '.*')).toBeTruthy(); | ||
|
||
expect(regularExpressionMatching('', 'a')).toBeFalsy(); | ||
expect(regularExpressionMatching('a', '')).toBeFalsy(); | ||
expect(regularExpressionMatching('aab', 'aa')).toBeFalsy(); | ||
expect(regularExpressionMatching('aab', 'baa')).toBeFalsy(); | ||
expect(regularExpressionMatching('aabc', '...')).toBeFalsy(); | ||
expect(regularExpressionMatching('aaabbdd', 'a*b*c*')).toBeFalsy(); | ||
expect(regularExpressionMatching('mississippi', 'mis*is*p*.')).toBeFalsy(); | ||
expect(regularExpressionMatching('ab', 'a*')).toBeFalsy(); | ||
expect(regularExpressionMatching('abba', 'a*b*.c')).toBeFalsy(); | ||
expect(regularExpressionMatching('abba', '.*c')).toBeFalsy(); | ||
}); | ||
}); |
135 changes: 135 additions & 0 deletions
135
src/algorithms/string/regular-expression-matching/regularExpressionMatching.js
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
const ZERO_OR_MORE_CHARS = '*'; | ||
const ANY_CHAR = '.'; | ||
|
||
/** | ||
* Dynamic programming approach. | ||
* | ||
* @param {string} string | ||
* @param {string} pattern | ||
* @return {boolean} | ||
*/ | ||
export default function regularExpressionMatching(string, pattern) { | ||
/* | ||
* Let's initiate dynamic programming matrix for this string and pattern. | ||
* We will have pattern characters on top (as columns) and string characters | ||
* will be placed to the left of the table (as rows). | ||
* | ||
* Example: | ||
* | ||
* a * b . b | ||
* - - - - - - | ||
* a - - - - - - | ||
* a - - - - - - | ||
* b - - - - - - | ||
* y - - - - - - | ||
* b - - - - - - | ||
*/ | ||
const matchMatrix = Array(string.length + 1).fill(null).map(() => { | ||
return Array(pattern.length + 1).fill(null); | ||
}); | ||
|
||
// Let's fill the top-left cell with true. This would mean that empty | ||
// string '' matches to empty pattern ''. | ||
matchMatrix[0][0] = true; | ||
|
||
// Let's fill the first row of the matrix with false. That would mean that | ||
// empty string can't match any non-empty pattern. | ||
// | ||
// Example: | ||
// string: '' | ||
// pattern: 'a.z' | ||
// | ||
// The one exception here is patterns like a*b* that matches the empty string. | ||
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) { | ||
const patternIndex = columnIndex - 1; | ||
|
||
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) { | ||
matchMatrix[0][columnIndex] = matchMatrix[0][columnIndex - 2]; | ||
} else { | ||
matchMatrix[0][columnIndex] = false; | ||
} | ||
} | ||
|
||
// Let's fill the first column with false. That would mean that empty pattern | ||
// can't match any non-empty string. | ||
// | ||
// Example: | ||
// string: 'ab' | ||
// pattern: '' | ||
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) { | ||
matchMatrix[rowIndex][0] = false; | ||
} | ||
|
||
// Not let's go through every letter of the pattern and every letter of | ||
// the string and compare them one by one. | ||
for (let rowIndex = 1; rowIndex <= string.length; rowIndex += 1) { | ||
for (let columnIndex = 1; columnIndex <= pattern.length; columnIndex += 1) { | ||
// Take into account that fact that matrix contain one extra column and row. | ||
const stringIndex = rowIndex - 1; | ||
const patternIndex = columnIndex - 1; | ||
|
||
if (pattern[patternIndex] === ZERO_OR_MORE_CHARS) { | ||
/* | ||
* In case if current pattern character is special '*' character we have | ||
* two options: | ||
* | ||
* 1. Since * char allows it previous char to not be presented in a string we | ||
* need to check if string matches the pattern without '*' char and without the | ||
* char that goes before '*'. That would mean to go two positions left on the | ||
* same row. | ||
* | ||
* 2. Since * char allows it previous char to be presented in a string many times we | ||
* need to check if char before * is the same as current string char. If they are the | ||
* same that would mean that current string matches the current pattern in case if | ||
* the string WITHOUT current char matches the same pattern. This would mean to go | ||
* one position up in the same row. | ||
*/ | ||
if (matchMatrix[rowIndex][columnIndex - 2] === true) { | ||
matchMatrix[rowIndex][columnIndex] = true; | ||
} else if ( | ||
( | ||
pattern[patternIndex - 1] === string[stringIndex] || | ||
pattern[patternIndex - 1] === ANY_CHAR | ||
) && | ||
matchMatrix[rowIndex - 1][columnIndex] === true | ||
) { | ||
matchMatrix[rowIndex][columnIndex] = true; | ||
} else { | ||
matchMatrix[rowIndex][columnIndex] = false; | ||
} | ||
} else if ( | ||
pattern[patternIndex] === string[stringIndex] || | ||
pattern[patternIndex] === ANY_CHAR | ||
) { | ||
/* | ||
* In case if current pattern char is the same as current string char | ||
* or it may be any character (in case if pattern contains '.' char) | ||
* we need to check if there was a match for the pattern and for the | ||
* string by WITHOUT current char. This would mean that we may copy | ||
* left-top diagonal value. | ||
* | ||
* Example: | ||
* | ||
* a b | ||
* a 1 - | ||
* b - 1 | ||
*/ | ||
matchMatrix[rowIndex][columnIndex] = matchMatrix[rowIndex - 1][columnIndex - 1]; | ||
} else { | ||
/* | ||
* In case if pattern char and string char are different we may | ||
* treat this case as "no-match". | ||
* | ||
* Example: | ||
* | ||
* a b | ||
* a - - | ||
* c - 0 | ||
*/ | ||
matchMatrix[rowIndex][columnIndex] = false; | ||
} | ||
} | ||
} | ||
|
||
return matchMatrix[string.length][pattern.length]; | ||
} |