Skip to content

Commit 98705e6

Browse files
author
Pal Majoros
committed
modify branch
1 parent f19bbf5 commit 98705e6

File tree

1 file changed

+1
-36
lines changed

1 file changed

+1
-36
lines changed

Diff for: seq.py

+1-36
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
# ### 4.1.1
1818
# Define a function that splits a text into sentences (on ".", "!", "?", etc.)
1919

20-
# In[ ]:
2120

2221
def to_sentence(szoveg):
2322
mondatok = []
@@ -29,7 +28,6 @@ def to_sentence(szoveg):
2928
return mondatok
3029

3130

32-
# In[9]:
3331

3432
def mondatokra(szoveg):
3533
mondatok = []
@@ -41,19 +39,16 @@ def mondatokra(szoveg):
4139
return mondatok
4240

4341

44-
# In[ ]:
4542

4643
szoveg = open('data/sample_text.txt').read()
4744

4845

49-
# In[ ]:
5046

5147
mondatokra(szoveg)[-3:]
5248

5349

5450
# Define a function that splits sentences into words, and strips punctuation marks (",", ";", etc.) from edges of words.
5551

56-
# In[11]:
5752

5853
def to_words(mondat):
5954
szavak = mondat.split()
@@ -65,7 +60,6 @@ def to_words(mondat):
6560

6661
# Use the last two functions in one that takes a filename as its argument and returns the text in the file as a list of lists. Test it on the file "data/sample_text.txt"
6762

68-
# In[4]:
6963

7064
def process(fajl):
7165
kimenet = []
@@ -76,20 +70,16 @@ def process(fajl):
7670
return kimenet
7771

7872

79-
# In[ ]:
8073

8174
adat = feldolgoz('Sequences/data/sample_text.txt')
8275

8376

84-
# In[ ]:
8577

86-
adat[:3]
8778

8879

8980
# ### 4.1.2
9081
# Use the functions defined in __4.1.1__ and define a function that goes through a text and replaces all proper names (capitalized words not at the beginning of a sentence) with "Joe". Print the first few sentences to test your solution.
9182

92-
# In[5]:
9383

9484
def joe(fajl):
9585
kimenet = []
@@ -107,15 +97,13 @@ def joe(fajl):
10797

10898

10999

110-
# In[12]:
111100

112101
joe('Sequences/data/sample_text.txt')[-3:]
113102

114103

115104
# ### 4.1.3
116105
# Load the sample text using your function from __4.1.1__ and create a game where the user is shown a half of a word in a small context (e.g. "_Many solu\*\*\*\*\* were suggested_") and has to guess the full word (don't worry about randomization, your solution can come up with the same questions every time).
117106

118-
# In[ ]:
119107

120108

121109

@@ -126,7 +114,6 @@ def joe(fajl):
126114
# ### 4.2.1
127115
# Define a function that takes as its input a list of $n$ lists of $n$ numbers (a square matrix) and decides if it is symmetric (i.e. $A[i,j] == A[j,i]$ for all $i, j$).
128116

129-
# In[ ]:
130117

131118
def is_symmetric(matrix):
132119
n = len(matrix)
@@ -136,7 +123,6 @@ def is_symmetric(matrix):
136123
...
137124

138125

139-
# In[ ]:
140126

141127
test_matrix1 = [[1,2], [3,4]]
142128
test_matrix2 = [[1,2], [2,1]]
@@ -147,7 +133,6 @@ def is_symmetric(matrix):
147133
# ### 4.2.2
148134
# Define a function that takes a list containing lists of equal length (i.e. a table of size $n\times k$) and "transposes" it, creating a table of size $k\times n$.
149135

150-
# In[ ]:
151136

152137
def transpose(matrix):
153138
n = len(matrix)
@@ -165,20 +150,17 @@ def transpose(matrix):
165150
# ### 4.2.3
166151
# Redo 4.2.3 using nested list comprehension!
167152

168-
# In[ ]:
169153

170154
def transpose(matrix):
171155
n = len(matrix)
172156
m = len(matrix[0])
173157
return [[matrix[i][j] for i in range(n)] for j in range(m)]
174158

175159

176-
# In[ ]:
177160

178161
test_matrix = [[1,2,3], [4,5,6]]
179162

180163

181-
# In[ ]:
182164

183165
transpose(test_matrix)
184166

@@ -187,9 +169,6 @@ def transpose(matrix):
187169

188170
# Define a function that takes a list and string, then returns all elements that start with the string, along with their indices in the list.
189171

190-
# In[ ]:
191-
192-
193172

194173

195174
# ## 4.3 Dictionaries
@@ -198,7 +177,6 @@ def transpose(matrix):
198177
# ### 4.3.1
199178
# Use a dictionary to count words in our sample text (use your text processing functions!). Then print the most common words, along with their frequencies!
200179

201-
# In[ ]:
202180

203181

204182

@@ -207,15 +185,13 @@ def transpose(matrix):
207185

208186
# Define function that performs the factorial operation ($n!$) but caches all results so that each call requires the least possible number of multiplications.
209187

210-
# In[ ]:
211188

212189

213190

214191

215192
# ### 4.3.3
216193
# Read the dataset in "data/movies.tsv" and store it in a dictionary whose keys are genres and the values are list of tuples of title and year
217194

218-
# In[ ]:
219195

220196
def process_data(fn):
221197
data = {}
@@ -232,28 +208,23 @@ def process_data(fn):
232208
return data
233209

234210

235-
# In[ ]:
236211

237212
data = process_data("data/movies.tsv")
238213

239214

240-
# In[ ]:
241-
242215
data['horror'][:5]
243216

244217

245218
# ### 4.3.4
246219
# Process the movies dataset (the original file or the dictionary built in __4.3.3__) and build a dictionary that indexes movies by the first letter of the title. Then create a small interface for querying (using the input function)
247220

248-
# In[ ]:
249221

250222

251223

252224

253225
# ### 4.3.5
254226
# Build an incremental search of movie titles: users should be able to narrow the set of movies with every character they type. You may create deeply nested dictionaries beforehand or process the data on-the-fly.
255227

256-
# In[ ]:
257228

258229
def build_index(data):
259230
letter_index = {}
@@ -286,7 +257,6 @@ def search(fn):
286257
print letter_index[letter1][letter2][letter3]
287258

288259

289-
# In[ ]:
290260

291261
def unify_dicts(dict1, dict2):
292262
dict3 = {}
@@ -331,13 +301,12 @@ def search(fn):
331301
else:
332302
letter = raw_input()
333303
if letter not in curr_dict:
334-
print 'not found :('
304+
print 'not found('
335305
break
336306
curr_dict = curr_dict[letter]
337307

338308

339309

340-
# In[ ]:
341310

342311
search("data/movies.tsv")
343312

@@ -348,23 +317,20 @@ def search(fn):
348317
# ### 4.4.1
349318
# Modify the word counter in __4.3.1__ so that it uses a defaultdict.
350319

351-
# In[ ]:
352320

353321

354322

355323

356324
# ### 4.4.2
357325
# Modify the word counter in __4.4.1__ so that it uses a Counter.
358326

359-
# In[ ]:
360327

361328

362329

363330

364331
# ### 4.4.3
365332
# Define a function that queries users for their last name, first name, year of birth, and hobby, and populates an OrderedDict whose keys are the last names and values are dictionaries with four keys each. If a second person with the same last name is encountered, both should now have keys of the form "lastname_firstname". If the same person is encountered multiple times, his/her data should be updated. Then test the solution of someone else and ask her to test yours.
366333

367-
# In[ ]:
368334

369335
def query():
370336
last_name = raw_input()
@@ -396,7 +362,6 @@ def query():
396362
# ### 4.4.4
397363
# Convert the database built in __4.4.3__ into a list of namedtuples.
398364

399-
# In[ ]:
400365

401366

402367

0 commit comments

Comments
 (0)