Skip to content

Commit 66f9f16

Browse files
committed
added Tamil stopwords; resolves #199
1 parent 64d6b8e commit 66f9f16

File tree

2 files changed

+1
-1
lines changed

2 files changed

+1
-1
lines changed

index.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
<package id="snowball_data" name="Snowball Data" languages="Danish, Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish, Turkish" webpage="https://github.com/snowballstem/snowball-data" unzip="0" unzipped_size="36360836" size="6785405" checksum="cba1cf17b887789e6df5f2c87c6e56fb" subdir="stemmers" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/stemmers/snowball_data.zip" />
9595
<package id="spanish_grammars" name="Grammars for Spanish" author="Kepa Sarasola" languages="Spanish" unzip="1" unzipped_size="3980" size="4047" checksum="12f66b8e22beadd6ed202e95453465af" subdir="grammars" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/grammars/spanish_grammars.zip" />
9696
<package id="state_union" name="C-Span State of the Union Address Corpus" webpage="http://www.c-span.org/executive/stateoftheunion.asp" copyright="public domain" license="public domain" unzip="1" unzipped_size="2073917" size="808757" checksum="044f2d20c592b17a26ac0102111833c9" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/state_union.zip" />
97-
<package id="stopwords" name="Stopwords Corpus" webpage="ftp://ftp.cs.cornell.edu/pub/smart/english.stop and http://snowball.tartarus.org/ and others" unzip="1" unzipped_size="85125" size="36150" checksum="d3a562d497c792147963bf9be8dcb945" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" />
97+
<package id="stopwords" name="Stopwords Corpus" webpage="ftp://ftp.cs.cornell.edu/pub/smart/english.stop and http://snowball.tartarus.org/ and others" unzip="1" unzipped_size="87088" size="36779" checksum="0bba32b6547378c1ed3c0a345ec22895" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip" />
9898
<package id="subjectivity" name="Subjectivity Dataset v1.0" author="Bo Pang and Lillian Lee" copyright="Copyright (C) 2004 Bo Pang and Lillian Lee" license="Creative Commons Attribution 4.0 International" licenseurl="http://creativecommons.org/licenses/by/4.0/" webpage=" http://www.cs.cornell.edu/People/pabo/people/pabo/movie-review-data" unzip="1" unzipped_size="1303352" size="521628" checksum="a81a44513903ba6bb86f85aeff149561" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/subjectivity.zip" />
9999
<package id="swadesh" name="Swadesh Wordlists" webpage="http://en.wiktionary.org/wiki/Appendix:Swadesh_list" license="GNU Free Documentation License" unzip="1" unzipped_size="39998" size="22828" checksum="6612ccb71f327e85780dc7813dee40f6" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/swadesh.zip" />
100100
<package id="switchboard" name="Switchboard Corpus Sample" sample="True" license="Permission is granted for use of this material in accordance with the Open Content License [http://opencontent.org/opl.shtml]. This corpus contains transcripts and annotations for 36 calls from the Switchboard Corpus [http://www.ldc.upenn.edu/Catalog/LDC93S7.html]." unzip="1" unzipped_size="2541179" size="791161" checksum="878df010a9f2c2d0a6546a8365f10595" subdir="corpora" url="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/switchboard.zip" />

packages/corpora/stopwords.zip

629 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)