forked from gnachman/iTerm2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
iTermMinimumSubsequenceMatcher.m
199 lines (173 loc) · 6.33 KB
/
iTermMinimumSubsequenceMatcher.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//
// iTermMinimumSubsequenceMatcher.m
// iTerm2
//
// Created by George Nachman on 4/19/15.
//
//
#import "iTermMinimumSubsequenceMatcher.h"
@implementation iTermMinimumSubsequenceMatcher {
NSString *_query; // The original query
NSArray *_queryChars; // NSNumbers, one for each character in the query.
NSDictionary *_postingLists; // Maps a character to an array of sorted document offsets.
NSMutableArray *_indexes; // 1:1 with _queryChars, gives indexes into matching posting list.
}
- (instancetype)initWithQuery:(NSString *)query {
self = [super init];
if (self) {
_query = [query copy];
NSMutableArray *temp = [NSMutableArray array];
for (int i = 0; i < query.length; i++) {
[temp addObject:@([query characterAtIndex:i])];
}
_queryChars = [temp retain];
}
return self;
}
- (void)dealloc {
[_query release];
[_postingLists release];
[_indexes release];
[_queryChars release];
[super dealloc];
}
// Dictionary mapping letter->array(indexes into document where letter occurs)
// Returns nil if no matches are possible.
- (NSDictionary *)postingListsForDocument:(NSString *)document {
NSMutableDictionary *postingLists = [NSMutableDictionary dictionary];
// Create an empty posting list for each character in the query.
const NSInteger queryLength = _queryChars.count;
for (int i = 0; i < queryLength; i++) {
if (!postingLists[_queryChars[i]]) {
postingLists[_queryChars[i]] = [NSMutableArray array];
}
}
// Add indexes to posting lists.
NSInteger documentLength = document.length;
for (NSInteger i = 0; i < documentLength; i++) {
unichar c = [document characterAtIndex:i];
NSMutableArray *indexArray = postingLists[@(c)];
if (indexArray) {
[indexArray addObject:@(i)];
}
}
// Check for empty posting lists for an early abort.
for (int i = 0; i < queryLength; i++) {
if (![postingLists[_queryChars[i]] count]) {
// The letter query[i] did not occur in document.
return nil;
}
}
return postingLists;
}
// Advance the first index and any subsequent indexes that need to be advanced.
// Returns YES if it was possible to advance, NO if all done.
- (BOOL)advanceIndexes {
// Advance first index and as many subsequent indices as needed
NSInteger previousDocOffset = -1;
const NSInteger queryLength = _queryChars.count;
for (NSInteger i = 0; i < queryLength; i++) {
NSMutableArray *postingList = _postingLists[_queryChars[i]];
NSInteger currentIndex = [_indexes[i] integerValue];
NSInteger docOffset = [postingList[currentIndex] integerValue];
if (previousDocOffset == -1) {
// Force the first letter's index to advance.
previousDocOffset = docOffset;
}
// Search linearly for next index in posting list that gives an offset after
// previousDocOffset. An open-ended binary search would be faster but probably not worth the
// extra complexity in this application.
const NSInteger originalIndex = currentIndex;
const NSInteger postingListCount = postingList.count;
while (docOffset <= previousDocOffset) {
++currentIndex;
if (currentIndex == postingListCount) {
return NO;
}
docOffset = [postingList[currentIndex] integerValue];
}
if (currentIndex == originalIndex) {
// No change made, so we can stop.
break;
}
_indexes[i] = @(currentIndex);
previousDocOffset = docOffset;
}
return YES;
}
- (NSRange)range {
NSInteger firstIndex = [_indexes.firstObject integerValue];
NSInteger start = [_postingLists[_queryChars[0]][firstIndex] integerValue];
NSInteger lastIndex = [_indexes.lastObject integerValue];
NSInteger end = [_postingLists[_queryChars.lastObject][lastIndex] integerValue];
return NSMakeRange(start, end - start);
}
- (NSMutableArray *)initialIndexes {
NSInteger largestDocIndexSoFar = -1;
NSMutableArray *indexes = [NSMutableArray array];
const NSInteger queryLength = _queryChars.count;
for (int i = 0; i < queryLength; i++) {
NSNumber *c = _queryChars[i];
NSInteger index = 0;
while ([_postingLists[c][index] integerValue] <= largestDocIndexSoFar) {
index++;
if (index >= [_postingLists[c] count]) {
// Not enough occurrences of query[i] in document for a match.
return nil;
}
}
largestDocIndexSoFar = [_postingLists[c][index] integerValue];
[indexes addObject:@(index)];
}
return indexes;
}
- (NSIndexSet *)indexSetForIndexes:(NSArray *)indexes {
if (!indexes) {
return nil;
}
NSMutableIndexSet *indexSet = [NSMutableIndexSet indexSet];
const NSInteger queryLength = _queryChars.count;
for (int i = 0; i < queryLength; i++) {
NSArray *postingList = _postingLists[_queryChars[i]];
NSInteger index = [indexes[i] integerValue];
NSInteger offset = [postingList[index] integerValue];
[indexSet addIndex:offset];
}
return indexSet;
}
- (NSArray *)bestIndexes {
NSRange bestRange = NSMakeRange(NSNotFound, 0);
NSMutableArray *bestIndexes = [NSMutableArray array];
const NSInteger queryLength = _queryChars.count;
while (1) {
NSRange currentRange = [self range];
if (bestRange.location == NSNotFound || bestRange.length > currentRange.length) {
bestRange = currentRange;
[bestIndexes removeAllObjects];
[bestIndexes addObjectsFromArray:_indexes];
}
if (currentRange.length == queryLength) {
// No need to keep looking.
break;
}
// Move to the next match.
if (![self advanceIndexes]) {
break;
}
}
return bestIndexes;
}
- (NSIndexSet *)indexSetForDocument:(NSString *)document {
[_postingLists release];
_postingLists = [[self postingListsForDocument:document] retain];
if (!_postingLists.count) {
return nil;
}
[_indexes release];
_indexes = [[self initialIndexes] retain];
if (!_indexes) {
return nil;
}
return [self indexSetForIndexes:[self bestIndexes]];
}
@end