carnotresearch
diff --git a/Diff for: ‎Readme.md
+16 b/Diff for: ‎Readme.md
+16
diff --git a/Diff for: ‎__init__.py b/Diff for: ‎__init__.py
diff --git a/Diff for: ‎__pycache__/myfunctions.cpython-311.pyc
5.65 KB b/Diff for: ‎__pycache__/myfunctions.cpython-311.pyc
5.65 KB
diff --git a/Diff for: ‎build/lib/__init__.py b/Diff for: ‎build/lib/__init__.py
diff --git a/Diff for: ‎build/lib/cr_nlp.egg-info/PKG-INFO
+21 b/Diff for: ‎build/lib/cr_nlp.egg-info/PKG-INFO
+21
diff --git a/Diff for: ‎build/lib/cr_nlp.egg-info/SOURCES.txt
+9 b/Diff for: ‎build/lib/cr_nlp.egg-info/SOURCES.txt
+9
diff --git a/Diff for: ‎build/lib/cr_nlp.egg-info/dependency_links.txt
+1 b/Diff for: ‎build/lib/cr_nlp.egg-info/dependency_links.txt
+1
diff --git a/Diff for: ‎build/lib/cr_nlp.egg-info/requires.txt
+2 b/Diff for: ‎build/lib/cr_nlp.egg-info/requires.txt
+2
diff --git a/Diff for: ‎build/lib/cr_nlp.egg-info/top_level.txt
+1 b/Diff for: ‎build/lib/cr_nlp.egg-info/top_level.txt
+1
diff --git a/Diff for: ‎build/lib/myfunctions.py
+153 b/Diff for: ‎build/lib/myfunctions.py
+153
diff --git a/Diff for: ‎build/lib/test_myfunctions.py
+5 b/Diff for: ‎build/lib/test_myfunctions.py
+5
diff --git a/Diff for: ‎cr_nlp.egg-info/PKG-INFO
+21 b/Diff for: ‎cr_nlp.egg-info/PKG-INFO
+21
diff --git a/Diff for: ‎cr_nlp.egg-info/SOURCES.txt
+9 b/Diff for: ‎cr_nlp.egg-info/SOURCES.txt
+9
diff --git a/Diff for: ‎cr_nlp.egg-info/dependency_links.txt
+1 b/Diff for: ‎cr_nlp.egg-info/dependency_links.txt
+1
diff --git a/Diff for: ‎cr_nlp.egg-info/requires.txt
+2 b/Diff for: ‎cr_nlp.egg-info/requires.txt
+2
diff --git a/Diff for: ‎cr_nlp.egg-info/top_level.txt
+1 b/Diff for: ‎cr_nlp.egg-info/top_level.txt
+1
diff --git a/Diff for: ‎dist/cr_nlp-0.1.6-py3-none-any.whl
4.45 KB b/Diff for: ‎dist/cr_nlp-0.1.6-py3-none-any.whl
4.45 KB
@@ -0,0 +1,16 @@
+cr-nlp provides a set of tools for natural language processing (NLP), including text tokenization, sentiment analysis, word lemmatization, and stemming. Built on top of popular NLP libraries such as NLTK and Hugging Face's Transformers, it simplifies common NLP tasks for developers and researchers.
+
+Features
+Tokenize Text: Break down text into tokens using pre-trained models from Hugging Face's Transformers library.
+Analyze Sentiment: Determine the sentiment of text using both Transformers and NLTK's VADER model.
+Lemmatize Words: Convert words to their base form based on their part of speech.
+Stem Words: Reduce words to their root form using the Porter Stemming algorithm.
+Installation
+To install cr-nlp, run the following command:
+
+
+pip install cr-nlp
+Dependencies:
+Python 3.6+
+nltk
+transformers
@@ -0,0 +1,21 @@
+Metadata-Version: 2.1
+Name: cr_nlp
+Version: 0.1.6
+Summary: Library for NLP tasks by Aryan Oberoi
+Home-page: https://github.com/aryanoberoi/IITD-work/tree/main/library/test_library
+Author: Aryan Oberoi
+Author-email: [email protected]
+License: MIT
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
+Requires-Dist: transformers
+Requires-Dist: nltk
+
+Library for NLP tasks by Aryan Oberoi
@@ -0,0 +1,9 @@
+__init__.py
+myfunctions.py
+setup.py
+test_myfunctions.py
+cr_nlp.egg-info/PKG-INFO
+cr_nlp.egg-info/SOURCES.txt
+cr_nlp.egg-info/dependency_links.txt
+cr_nlp.egg-info/requires.txt
+cr_nlp.egg-info/top_level.txt
@@ -0,0 +1 @@
+
@@ -0,0 +1,2 @@
+transformers
+nltk
@@ -0,0 +1 @@
+
@@ -0,0 +1,153 @@
+from transformers import AutoTokenizer
+from transformers import pipeline
+import nltk
+
+
+def tokenize_text(text, model_name="bert-base-uncased"):
+    """
+    Tokenize a given text using the Hugging Face Transformers library.
+
+    Parameters:
+    - text (str): The input text to tokenize.
+    - model_name (str): The name of the pre-trained model to use for tokenization.
+                       Default is "bert-base-uncased".
+
+    Returns:
+    - tokens (list): List of tokens obtained by tokenizing the input text.
+    """
+
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+    tokens = tokenizer.tokenize(text)
+
+    return tokens
+
+def analyze_sentiment(text, model_name="nlptown/bert-base-multilingual-uncased-sentiment"):
+    """
+    Analyze sentiment of a given text using a pre-trained sentiment analysis model.
+
+    Parameters:
+    - text (str): The input text for sentiment analysis.
+    - model_name (str): The name of the pre-trained sentiment analysis model.
+                       Default is "nlptown/bert-base-multilingual-uncased-sentiment".
+
+    Returns:
+    - sentiment (str): The predicted sentiment (e.g., "POSITIVE", "NEGATIVE", "NEUTRAL").
+    - confidence (float): The confidence score associated with the predicted sentiment.
+    """
+
+    sentiment_analyzer = pipeline('sentiment-analysis', model=model_name)
+
+
+    result = sentiment_analyzer(text)
+
+
+    sentiment = result[0]['label']
+    confidence = result[0]['score']
+
+    return sentiment, confidence
+
+from nltk.stem import WordNetLemmatizer
+from nltk.corpus import wordnet
+from nltk import pos_tag
+
+# Helper function to convert NLTK POS tags to WordNet POS tags
+def get_wordnet_pos(tag):
+    if tag.startswith('J'):
+        return wordnet.ADJ
+    elif tag.startswith('V'):
+        return wordnet.VERB
+    elif tag.startswith('N'):
+        return wordnet.NOUN
+    elif tag.startswith('R'):
+        return wordnet.ADV
+    else:
+        return None
+
+def lemmatize_words(words):
+    """
+    Lemmatize a list of words.
+
+    This function takes a list of words, determines the part of speech for each word, 
+    and then lemmatizes it (converts it to its base or dictionary form) according 
+    to its part of speech. It utilizes the NLTK library's WordNetLemmatizer 
+    and the part-of-speech tagging to accurately lemmatize each word.
+
+    Parameters:
+    - words: A list of words (strings) that you want to lemmatize.
+
+    Returns:
+    - A list of lemmatized words.
+
+    Note: This function requires nltk's WordNetLemmatizer and pos_tag to be imported, 
+    along with the wordnet corpus and a function get_wordnet_pos(tag) that converts 
+    the part-of-speech tagging conventions between nltk and wordnet.
+    """
+    lemmatizer = WordNetLemmatizer()
+    lemmatized_words = []
+    
+    # Get POS tag for each word
+    pos_tagged = pos_tag(words)
+    
+    for word, tag in pos_tagged:
+        wordnet_pos = get_wordnet_pos(tag) or wordnet.NOUN
+        lemmatized_word = lemmatizer.lemmatize(word, pos=wordnet_pos)
+        lemmatized_words.append(lemmatized_word)
+        
+    return lemmatized_words
+
+from nltk.sentiment.vader import SentimentIntensityAnalyzer
+
+
+def analyze_sentiment_vader(text):
+    nltk.download('vader_lexicon')
+    """
+    Analyzes the sentiment of a given text using VADER sentiment analysis.
+
+    Parameters:
+    - text: A string containing the text to analyze.
+
+    Returns:
+    - A dictionary containing the scores for negative, neutral, positive, and compound sentiments.
+    """
+    sid = SentimentIntensityAnalyzer()
+    sentiment_scores = sid.polarity_scores(text)
+    return sentiment_scores
+
+
+
+
+import nltk
+from nltk.stem.porter import PorterStemmer
+
+
+def stem_words(words):
+    """
+    Stems a list of words.
+
+    This function applies the Porter Stemming algorithm to a list of words, 
+    reducing each word to its root or stem form. It's particularly useful in 
+    natural language processing and search applications where the exact form of 
+    a word is less important than its root meaning.
+
+    Parameters:
+    - words: A list of words (strings) to be stemmed.
+
+    Returns:
+    - A list containing the stemmed version of each input word.
+
+    Example:
+    >>> stem_words(["running", "jumps", "easily"])
+    ['run', 'jump', 'easili']
+    
+    Note: This function requires the nltk's PorterStemmer to be imported.
+    """
+    # Initialize the Porter Stemmer
+    stemmer = PorterStemmer()
+    
+    # Stem each word in the list
+    stemmed_words = [stemmer.stem(word) for word in words]
+    return stemmed_words
+
+
+
@@ -0,0 +1,5 @@
+from myfunctions import analyze_sentiment
+text=input("enter data")
+hello=str(analyze_sentiment(text))
+print(hello)
+
@@ -0,0 +1,21 @@
+Metadata-Version: 2.1
+Name: cr_nlp
+Version: 0.1.6
+Summary: Library for NLP tasks by Aryan Oberoi
+Home-page: https://github.com/aryanoberoi/IITD-work/tree/main/library/test_library
+Author: Aryan Oberoi
+Author-email: [email protected]
+License: MIT
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
+Requires-Dist: transformers
+Requires-Dist: nltk
+
+Library for NLP tasks by Aryan Oberoi
@@ -0,0 +1,9 @@
+__init__.py
+myfunctions.py
+setup.py
+test_myfunctions.py
+cr_nlp.egg-info/PKG-INFO
+cr_nlp.egg-info/SOURCES.txt
+cr_nlp.egg-info/dependency_links.txt
+cr_nlp.egg-info/requires.txt
+cr_nlp.egg-info/top_level.txt
@@ -0,0 +1 @@
+
@@ -0,0 +1,2 @@
+transformers
+nltk
@@ -0,0 +1 @@
+