You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: seq.py
+1-36
Original file line number
Diff line number
Diff line change
@@ -17,7 +17,6 @@
17
17
# ### 4.1.1
18
18
# Define a function that splits a text into sentences (on ".", "!", "?", etc.)
19
19
20
-
# In[ ]:
21
20
22
21
defto_sentence(szoveg):
23
22
mondatok= []
@@ -29,7 +28,6 @@ def to_sentence(szoveg):
29
28
returnmondatok
30
29
31
30
32
-
# In[9]:
33
31
34
32
defmondatokra(szoveg):
35
33
mondatok= []
@@ -41,19 +39,16 @@ def mondatokra(szoveg):
41
39
returnmondatok
42
40
43
41
44
-
# In[ ]:
45
42
46
43
szoveg=open('data/sample_text.txt').read()
47
44
48
45
49
-
# In[ ]:
50
46
51
47
mondatokra(szoveg)[-3:]
52
48
53
49
54
50
# Define a function that splits sentences into words, and strips punctuation marks (",", ";", etc.) from edges of words.
55
51
56
-
# In[11]:
57
52
58
53
defto_words(mondat):
59
54
szavak=mondat.split()
@@ -65,7 +60,6 @@ def to_words(mondat):
65
60
66
61
# Use the last two functions in one that takes a filename as its argument and returns the text in the file as a list of lists. Test it on the file "data/sample_text.txt"
67
62
68
-
# In[4]:
69
63
70
64
defprocess(fajl):
71
65
kimenet= []
@@ -76,20 +70,16 @@ def process(fajl):
76
70
returnkimenet
77
71
78
72
79
-
# In[ ]:
80
73
81
74
adat=feldolgoz('Sequences/data/sample_text.txt')
82
75
83
76
84
-
# In[ ]:
85
77
86
-
adat[:3]
87
78
88
79
89
80
# ### 4.1.2
90
81
# Use the functions defined in __4.1.1__ and define a function that goes through a text and replaces all proper names (capitalized words not at the beginning of a sentence) with "Joe". Print the first few sentences to test your solution.
91
82
92
-
# In[5]:
93
83
94
84
defjoe(fajl):
95
85
kimenet= []
@@ -107,15 +97,13 @@ def joe(fajl):
107
97
108
98
109
99
110
-
# In[12]:
111
100
112
101
joe('Sequences/data/sample_text.txt')[-3:]
113
102
114
103
115
104
# ### 4.1.3
116
105
# Load the sample text using your function from __4.1.1__ and create a game where the user is shown a half of a word in a small context (e.g. "_Many solu\*\*\*\*\* were suggested_") and has to guess the full word (don't worry about randomization, your solution can come up with the same questions every time).
117
106
118
-
# In[ ]:
119
107
120
108
121
109
@@ -126,7 +114,6 @@ def joe(fajl):
126
114
# ### 4.2.1
127
115
# Define a function that takes as its input a list of $n$ lists of $n$ numbers (a square matrix) and decides if it is symmetric (i.e. $A[i,j] == A[j,i]$ for all $i, j$).
128
116
129
-
# In[ ]:
130
117
131
118
defis_symmetric(matrix):
132
119
n=len(matrix)
@@ -136,7 +123,6 @@ def is_symmetric(matrix):
136
123
...
137
124
138
125
139
-
# In[ ]:
140
126
141
127
test_matrix1= [[1,2], [3,4]]
142
128
test_matrix2= [[1,2], [2,1]]
@@ -147,7 +133,6 @@ def is_symmetric(matrix):
147
133
# ### 4.2.2
148
134
# Define a function that takes a list containing lists of equal length (i.e. a table of size $n\times k$) and "transposes" it, creating a table of size $k\times n$.
# Define a function that takes a list and string, then returns all elements that start with the string, along with their indices in the list.
189
171
190
-
# In[ ]:
191
-
192
-
193
172
194
173
195
174
# ## 4.3 Dictionaries
@@ -198,7 +177,6 @@ def transpose(matrix):
198
177
# ### 4.3.1
199
178
# Use a dictionary to count words in our sample text (use your text processing functions!). Then print the most common words, along with their frequencies!
200
179
201
-
# In[ ]:
202
180
203
181
204
182
@@ -207,15 +185,13 @@ def transpose(matrix):
207
185
208
186
# Define function that performs the factorial operation ($n!$) but caches all results so that each call requires the least possible number of multiplications.
209
187
210
-
# In[ ]:
211
188
212
189
213
190
214
191
215
192
# ### 4.3.3
216
193
# Read the dataset in "data/movies.tsv" and store it in a dictionary whose keys are genres and the values are list of tuples of title and year
217
194
218
-
# In[ ]:
219
195
220
196
defprocess_data(fn):
221
197
data= {}
@@ -232,28 +208,23 @@ def process_data(fn):
232
208
returndata
233
209
234
210
235
-
# In[ ]:
236
211
237
212
data=process_data("data/movies.tsv")
238
213
239
214
240
-
# In[ ]:
241
-
242
215
data['horror'][:5]
243
216
244
217
245
218
# ### 4.3.4
246
219
# Process the movies dataset (the original file or the dictionary built in __4.3.3__) and build a dictionary that indexes movies by the first letter of the title. Then create a small interface for querying (using the input function)
247
220
248
-
# In[ ]:
249
221
250
222
251
223
252
224
253
225
# ### 4.3.5
254
226
# Build an incremental search of movie titles: users should be able to narrow the set of movies with every character they type. You may create deeply nested dictionaries beforehand or process the data on-the-fly.
255
227
256
-
# In[ ]:
257
228
258
229
defbuild_index(data):
259
230
letter_index= {}
@@ -286,7 +257,6 @@ def search(fn):
286
257
printletter_index[letter1][letter2][letter3]
287
258
288
259
289
-
# In[ ]:
290
260
291
261
defunify_dicts(dict1, dict2):
292
262
dict3= {}
@@ -331,13 +301,12 @@ def search(fn):
331
301
else:
332
302
letter=raw_input()
333
303
ifletternotincurr_dict:
334
-
print'not found :('
304
+
print'not found('
335
305
break
336
306
curr_dict=curr_dict[letter]
337
307
338
308
339
309
340
-
# In[ ]:
341
310
342
311
search("data/movies.tsv")
343
312
@@ -348,23 +317,20 @@ def search(fn):
348
317
# ### 4.4.1
349
318
# Modify the word counter in __4.3.1__ so that it uses a defaultdict.
350
319
351
-
# In[ ]:
352
320
353
321
354
322
355
323
356
324
# ### 4.4.2
357
325
# Modify the word counter in __4.4.1__ so that it uses a Counter.
358
326
359
-
# In[ ]:
360
327
361
328
362
329
363
330
364
331
# ### 4.4.3
365
332
# Define a function that queries users for their last name, first name, year of birth, and hobby, and populates an OrderedDict whose keys are the last names and values are dictionaries with four keys each. If a second person with the same last name is encountered, both should now have keys of the form "lastname_firstname". If the same person is encountered multiple times, his/her data should be updated. Then test the solution of someone else and ask her to test yours.
366
333
367
-
# In[ ]:
368
334
369
335
defquery():
370
336
last_name=raw_input()
@@ -396,7 +362,6 @@ def query():
396
362
# ### 4.4.4
397
363
# Convert the database built in __4.4.3__ into a list of namedtuples.
0 commit comments