Skip to content

Commit 5297715

Browse files
authored
Add files via upload
1 parent fe9760f commit 5297715

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

mapper.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# -*- coding: utf-8 -*-
2+
"""mapper.ipynb
3+
4+
Automatically generated by Colaboratory.
5+
6+
Original file is located at
7+
https://colab.research.google.com/drive/1yCwGyMXJT2qt3_58aLOOiJXO0GIaPcJd
8+
"""
9+
10+
11+
12+
import sys
13+
import io
14+
import re
15+
import nltk
16+
nltk.download('stopwords',quiet=True)
17+
from nltk.corpus import stopwords
18+
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
19+
20+
stop_words = set(stopwords.words('english'))
21+
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='latin1')
22+
for line in input_stream:
23+
line = line.strip()
24+
line = re.sub(r'[^\w\s]', '',line)
25+
line = line.lower()
26+
for x in line:
27+
if x in punctuations:
28+
line=line.replace(x, " ")
29+
30+
words=line.split()
31+
for word in words:
32+
if word not in stop_words:
33+
print('%s\t%s' % (word, 1))

0 commit comments

Comments
 (0)