modify branch

Pal Majoros · Pal Majoros · commit 98705e6bb5ae · 2016-11-28T10:30:06.000Z
diff --git a/seq.py b/seq.py
@@ -17,7 +17,6 @@
 # ### 4.1.1
 # Define a function that splits a text into sentences (on ".", "!", "?", etc.)
 
-# In[ ]:
 
 def to_sentence(szoveg):
     mondatok = []
@@ -29,7 +28,6 @@ def to_sentence(szoveg):
     return mondatok
 
 
-# In[9]:
 
 def mondatokra(szoveg):
     mondatok = []
@@ -41,19 +39,16 @@ def mondatokra(szoveg):
     return mondatok
 
 
-# In[ ]:
 
 szoveg = open('data/sample_text.txt').read()
 
 
-# In[ ]:
 
 mondatokra(szoveg)[-3:]
 
 
 # Define a function that splits sentences into words, and strips punctuation marks (",", ";", etc.) from edges of words.
 
-# In[11]:
 
 def to_words(mondat):
     szavak = mondat.split()
@@ -65,7 +60,6 @@ def to_words(mondat):
 
 # Use the last two functions in one that takes a filename as its argument and returns the text in the file as a list of lists. Test it on the file "data/sample_text.txt"
 
-# In[4]:
 
 def process(fajl):
     kimenet = []
@@ -76,20 +70,16 @@ def process(fajl):
     return kimenet
 
 
-# In[ ]:
 
 adat = feldolgoz('Sequences/data/sample_text.txt')
 
 
-# In[ ]:
 
-adat[:3]
 
 
 # ### 4.1.2
 # Use the functions defined in __4.1.1__ and define a function that goes through a text and replaces all proper names (capitalized words not at the beginning of a sentence) with "Joe". Print the first few sentences to test your solution.
 
-# In[5]:
 
 def joe(fajl):
     kimenet = []
@@ -107,15 +97,13 @@ def joe(fajl):
 
 
 
-# In[12]:
 
 joe('Sequences/data/sample_text.txt')[-3:]
 
 
 # ### 4.1.3
 # Load the sample text using your function from __4.1.1__ and create a game where the user is shown a half of a word in a small context (e.g. "_Many solu\*\*\*\*\* were suggested_") and has to guess the full word (don't worry about randomization, your solution can come up with the same questions every time).
 
-# In[ ]:
 
 
 
@@ -126,7 +114,6 @@ def joe(fajl):
 # ### 4.2.1
 # Define a function that takes as its input a list of $n$ lists of $n$ numbers (a square matrix) and decides if it is symmetric (i.e. $A[i,j] == A[j,i]$ for all $i, j$).
 
-# In[ ]:
 
 def is_symmetric(matrix):
     n = len(matrix)
@@ -136,7 +123,6 @@ def is_symmetric(matrix):
             ...
 
 
-# In[ ]:
 
 test_matrix1 = [[1,2], [3,4]]
 test_matrix2 = [[1,2], [2,1]]
@@ -147,7 +133,6 @@ def is_symmetric(matrix):
 # ### 4.2.2
 # Define a function that takes a list containing lists of equal length (i.e. a table of size $n\times k$) and "transposes" it, creating a table of size $k\times n$.
 
-# In[ ]:
 
 def transpose(matrix):
     n = len(matrix)
@@ -165,20 +150,17 @@ def transpose(matrix):
 # ### 4.2.3
 # Redo 4.2.3 using nested list comprehension!
 
-# In[ ]:
 
 def transpose(matrix):
     n = len(matrix)
     m = len(matrix[0])
     return [[matrix[i][j] for i in range(n)] for j in range(m)]
 
 
-# In[ ]:
 
 test_matrix = [[1,2,3], [4,5,6]]
 
 
-# In[ ]:
 
 transpose(test_matrix)
 
@@ -187,9 +169,6 @@ def transpose(matrix):
 
 # Define a function that takes a list and string, then returns all elements that start with the string, along with their indices in the list.
 
-# In[ ]:
-
-
 
 
 # ## 4.3 Dictionaries
@@ -198,7 +177,6 @@ def transpose(matrix):
 # ### 4.3.1
 # Use a dictionary to count words in our sample text (use your text processing functions!). Then print the most common words, along with their frequencies!
 
-# In[ ]:
 
 
 
@@ -207,15 +185,13 @@ def transpose(matrix):
 
 # Define function that performs the factorial operation ($n!$) but caches all results so that each call requires the least possible number of multiplications.
 
-# In[ ]:
 
 
 
 
 # ### 4.3.3
 # Read the dataset in "data/movies.tsv" and store it in a dictionary whose keys are genres and the values are list of tuples of title and year
 
-# In[ ]:
 
 def process_data(fn):
     data = {}
@@ -232,28 +208,23 @@ def process_data(fn):
     return data
 
 
-# In[ ]:
 
 data = process_data("data/movies.tsv")
 
 
-# In[ ]:
-
 data['horror'][:5]
 
 
 # ### 4.3.4
 # Process the movies dataset (the original file or the dictionary built in __4.3.3__) and build a dictionary that indexes movies by the first letter of the title. Then create a small interface for querying (using the input function)
 
-# In[ ]:
 
 
 
 
 # ### 4.3.5
 # Build an incremental search of movie titles: users should be able to narrow the set of movies with every character they type. You may create deeply nested dictionaries beforehand or process the data on-the-fly.
 
-# In[ ]:
 
 def build_index(data):
     letter_index = {}
@@ -286,7 +257,6 @@ def search(fn):
     print letter_index[letter1][letter2][letter3]
 
 
-# In[ ]:
 
 def unify_dicts(dict1, dict2):
     dict3 = {}
@@ -331,13 +301,12 @@ def search(fn):
         else:
             letter = raw_input()
             if letter not in curr_dict:
-                print 'not found :('
+                print 'not found('
                 break
             curr_dict = curr_dict[letter]
 
 
 
-# In[ ]:
 
 search("data/movies.tsv")
 
@@ -348,23 +317,20 @@ def search(fn):
 # ### 4.4.1
 # Modify the word counter in __4.3.1__ so that it uses a defaultdict.
 
-# In[ ]:
 
 
 
 
 # ### 4.4.2
 # Modify the word counter in __4.4.1__ so that it uses a Counter.
 
-# In[ ]:
 
 
 
 
 # ### 4.4.3
 # Define a function that queries users for their last name, first name, year of birth, and hobby, and populates an OrderedDict whose keys are the last names and values are dictionaries with four keys each. If a second person with the same last name is encountered, both should now have keys of the form "lastname_firstname". If the same person is encountered multiple times, his/her data should be updated. Then test the solution of someone else and ask her to test yours.
 
-# In[ ]:
 
 def query():
     last_name = raw_input()
@@ -396,7 +362,6 @@ def query():
 # ### 4.4.4
 # Convert the database built in __4.4.3__ into a list of namedtuples.
 
-# In[ ]: