-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
137 lines (104 loc) · 4.59 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from _collections import defaultdict
import random
from classifyReview import*
a = format_sentence("Hello my name is Sidharth.")
#print(a)
b,c = splitTrainTest(["A", "B", "C", "D"], 0.25)
#print(f'{b} \n {c}')
d = formatForClassifier(["A good one", "The best!"], "pos")
#print(d)
classifyReviews()
#---------------------#
trainingData = "Yeah baby I like it like that You gotta believe me when I tell you I said I like it like that"
filepath = 'harrypotter.txt'
file = open(filepath,'r')
fileString = file.read()
fileString.replace(" ","")
#print(fileString)
def train(string):
'''takes in string of text, returns dictionary. Each key is a word from the string, value is a list of words that possibly follow each key in original string'''
#splits string into list of words
string_list = string.split()
newDict = defaultdict(list)
#iterates through string_list, adds each word as a key and adds word+1 as value
for word in range(len(string_list)-1):
newDict[string_list[word]].append(string_list[word+1])
'''
for key in newDict:
print(str(key)+ " " + str(newDict[key]))
'''
return dict(newDict)
'''
def generate(model, firstWords, sens):
generates specified number of sentences by drawing from random values of each key, starting with the firstWord as the first key.
# model = dictionary
# the key
# numwords number of total words
if sens == -1:
return "."
else:
vals = model[firstWords[sens-1]] # a list of values (possible continuing words) based on firstWord
valsLength = len(vals) # length of vals in order to find random index
if valsLength > 1:
ind = random.randint(0, valsLength - 1) # using random method to find a random index
elif valsLength == 1:
ind = 0
word = vals[ind]
if valsLength == 0:
word = '.'
# print(word)
return word + ' ' + generate(model, word,sens-1) + ' '
elif ind == -1:
# firstWords.remove(firstWords[0])
return word + '.'+ generate(model, word, sens-1)
'''
def generateWords(model,firstWord,numWords): #generating words (1 sentence)
#model = dictionary
#the key
#numwords number of total words
if numWords == 0:
return ""
else:
vals = model[firstWord]
#print(vals)
valsLength = len(vals)
ind = random.randint(0,valsLength-1)
word = vals[ind]
#print(word)
return firstWord +' '+ generateWords(model,word,numWords-1)+' '
def generateSen(model, firstWords, result = ""): #generating sentences (Multiple sentences)
#model = dictionary
#firstWords = the words that start each sentence, user input
#result = the final string that is a sum of all the words we add to it. The product we return
resultList = result.split() #list of words in result
if len(resultList) == 0:
result += firstWords[0] + " "
firstWords.pop(0)
return generateSen(model,firstWords,result)
#vals = model[firstWords[0]] #list of possible next words depending on the first word (Case: first iteration)
'''if len(firstWords) == -1: #base case, when we are out of sentence starters
result += '\n done \n'
return result #return the result because function is over
else:'''
lastWord = resultList[len(resultList)-1]
vals = model[lastWord] #list of possible next words depending on the first word (Case: not first iteration)
valsLength = len(vals) # number of values we have to work with
if valsLength > 1: #if multiple value possibilities, pick a random one
ind = random.randint(0, valsLength - 1) # using random method to find a random index
word = vals[ind] #random word depending on random index
elif valsLength == 1: # if there is only one word in vals
word = vals[0] #setting word to the only word in the list
if '.' in word: #if there is a period in the word, that is an end to the sentence
result+= word + " " #adding the word to the sentence
try:
word = firstWords[0]
firstWords.pop(0)
except:
return result
result += (word + " ") #adding the word to the sentence
return generateSen(model,firstWords,result) #recursion call
e = train(fileString)
a = generateSen(e, ["Dursley","Potter"])
#b = generateWords(e,"Barry", 500)
#generateSen and generateWords both work but take different inputs, words being based off the number of words to be printed and sen off of number of periods (".")
#print(a)