File tree 1 file changed +33
-0
lines changed
1 file changed +33
-0
lines changed Original file line number Diff line number Diff line change
1
+ # -*- coding: utf-8 -*-
2
+ """mapper.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1yCwGyMXJT2qt3_58aLOOiJXO0GIaPcJd
8
+ """
9
+
10
+
11
+
12
+ import sys
13
+ import io
14
+ import re
15
+ import nltk
16
+ nltk .download ('stopwords' ,quiet = True )
17
+ from nltk .corpus import stopwords
18
+ punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
19
+
20
+ stop_words = set (stopwords .words ('english' ))
21
+ input_stream = io .TextIOWrapper (sys .stdin .buffer , encoding = 'latin1' )
22
+ for line in input_stream :
23
+ line = line .strip ()
24
+ line = re .sub (r'[^\w\s]' , '' ,line )
25
+ line = line .lower ()
26
+ for x in line :
27
+ if x in punctuations :
28
+ line = line .replace (x , " " )
29
+
30
+ words = line .split ()
31
+ for word in words :
32
+ if word not in stop_words :
33
+ print ('%s\t %s' % (word , 1 ))
You can’t perform that action at this time.
0 commit comments