-
Notifications
You must be signed in to change notification settings - Fork 302
/
diff_by_n.py
92 lines (72 loc) · 2.52 KB
/
diff_by_n.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
diff_by_n.py
Donald Knuth, Art of Computer Programming, Volume 4 Facsimile 0
Variation on Exercise #28
Find pairs of SGB word vectors that differ by +/-n.
"""
from get_words import get_words
import timeit
def gen_variations(word,fragment,distance,depth,variations):
"""
Recursive backtracking method to assemble strings
differing by +/-distance at each position
"""
if depth==len(word):
variations.add(fragment)
else:
for d in range(1,distance+1):
fragment_pd = fragment + chr(ord(word[depth])+d)
fragment_md = fragment + chr(ord(word[depth])-d)
for new_fragment in [fragment_pd,fragment_md]:
gen_variations(word,new_fragment,distance,depth+1,variations)
def get_all_variations(word, distance):
"""
Return all possible words that differ
from `word` by +/-distance in each index.
This does not include `word` in the
variations.
"""
word_variations = set()
gen_variations(word,'',distance,0,word_variations)
word_variations = list(word_variations)
return word_variations
def main():
"""
Find pairs of SGB word vectors that differ by
+/-distance in each component.
To do this, iterate through each word,
generate the possible candidate matchings,
and if they exist, add the pair to a set.
"""
words = get_words()
#words = words[:1000]
words = set(get_words())
for d in [1,2,3]:
tic = timeit.default_timer()
# List of string tuples
off_by_n = set()
# Iterate over every word
for iw,word in enumerate(words):
# Generate all possible candidate matches
# distance +/-d from this word at each
# position
all_vars = get_all_variations(word,d)
for word_var in all_vars:
if word_var in words:
# Found a new (unordered) pair
if word<word_var:
left=word
right=word_var
else:
left=word_var
right=word
off_by_n.add((left,right))
off_by_n = list(off_by_n)
off_by_n.sort()
toc = timeit.default_timer()
for o in off_by_n[:10]:
print("{:s} {:s}".format(o[0],o[1]))
print("Found {0:d} pairs of words that differ by +/-{1:d} in each component.".format(len(off_by_n),d))
print("Time: %0.4f"%(toc-tic))
if __name__=="__main__":
main()