diff --git a/article_to_csv.py b/article_to_csv.py
deleted file mode 100644
index f415e65..0000000
--- a/article_to_csv.py
+++ /dev/null
@@ -1,98 +0,0 @@
-## Carter Merenstein
-## Middlebury College
-''' Reads in text files downloaded from factiva, filters out common words
-stems words, and then outputs a csv of article, date, and frequency of words.'''
-
-
-import csv
-import os
-import re
-from nltk import PorterStemmer
-
-pattern = re.compile('\W|\d') # not an alphanumeric
-
-articles = {} #articles with words
-article_dates = {}
-all_words = {}
-article_sources = {} #document number corresponds to source
-
-txt_paths =  "C:\\CDocs\\Annas_thesis\\txtgroups"
-
-def isNewArticle(line_arr):
-    ''' the easiest identifier for a new article is that they start with a wordcount, e.g. "999 words"
-    takes in a line already split into a list'''
-    try:
-        condition2 = (line_arr[1].strip('\n') == 'words') ## needs to be separate
-    except:
-        return False # an error occurs if there is < 2 words. In this case we know it's not a new article
-    return (re.match('[0-9]', line_arr[0]) != None and condition2)
-
-for txt in os.listdir(txt_paths):
-    started = False #can't classify things before the title
-    new_article = False #used to pull name, source, date from articles
-    filename = txt_paths + '\\' + txt
-    with open(filename, 'r', encoding = "utf8") as group:
-        i = 0
-        article_title = ''  #separate out by article
-        two_lines_ago = ''      #always hold this to be able to snatch the title
-        last_line = ''
-        date = '' #always after wordcount
-        source = txt.strip('.txt') # just a number, Anna can sort later
-        for line in group:
-            line_arr = line.split(' ')
-            if isNewArticle(line_arr):
-                article_title = two_lines_ago
-                articles[article_title] = {}
-                i += 1
-                started = True # so we don't get an error on the first few lines
-                new_article = True
-            elif started: #just if we're past the first 2 lines of the document
-                for word in line_arr:
-                    if new_article:
-                        date = line
-                        article_dates[article_title] = date
-                        new_article = False
-                        article_sources[article_title] = source
-                    else:
-                        word = re.sub(pattern, '', word) #get rid of commas and stuff
-                        word = word.lower()
-                        word = PorterStemmer().stem_word(word) #get root of word
-                        try:
-                            articles[article_title][word] += 1
-                            all_words[word] += 1
-                        except:
-                            articles[article_title][word] = 1
-                            try:
-                                all_words[word] += 1
-                            except:
-                                all_words[word] = 0
-            two_lines_ago = last_line
-            last_line = line
-
-
-with open('stemmedWordFreq.csv ', 'w', newline='', encoding="utf8") as out:
-    w = csv.writer(out)
-    article_names = []
-    header = ['']
-    for article in articles.keys():
-        article_names.append(article)
-        header.append(article)
-    w.writerow(header)
-    article_dates_row = [''] #another row for the dates
-    for article in article_names:
-        article_dates_row.append(article_dates[article]) #put the date in the row below the article
-    w.writerow(article_dates_row)
-    article_source_row = ['']
-    for article in article_names:
-        article_source_row.append(article_sources[article])
-    w.writerow(article_source_row)
-    for word in all_words.keys():
-        if all_words[word] >= 100:
-            line = [word]
-            for article in article_names:
-                try:
-                    line.append(articles[article][word])
-                    
-                except KeyError:
-                    line.append(0)
-w.writerow(line)
\ No newline at end of file
diff --git a/main.py b/main.py
index 3d5720e..b2a015c 100644
--- a/main.py
+++ b/main.py
@@ -152,10 +152,12 @@ def main(self, path):
                     continue
 
                 else:
+                    """
                     degrees = self.get_rotation_info(image_file_name)
 
                     if degrees:
                         self.fix_dpi_and_rotation(image_file_name, degrees, ext)
+                    """
                     
                     call(["tesseract", image_file_name, text_file_path], stdout=FNULL) #Fetch tesseract with FNULL in write mode
 
diff --git a/ocr.py b/ocr.py
deleted file mode 100644
index d6f8819..0000000
--- a/ocr.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# import the necessary packages
-import argparse
-import os
-
-import pytesseract
-from PIL import Image
-
-import cv2
-
-# construct the argument parse and parse the arguments
-ap = argparse.ArgumentParser()
-ap.add_argument("-i", "--image", required=True,
-	help="path to input image to be OCR'd")
-ap.add_argument("-p", "--preprocess", type=str, default="thresh",
-	help="type of preprocessing to be done")
-args = vars(ap.parse_args())
-
-# load the example image and convert it to grayscale
-image = cv2.imread(args["image"])
-gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
- 
-# check to see if we should apply thresholding to preprocess the
-# image
-if args["preprocess"] == "thresh":
-	gray = cv2.threshold(gray, 0, 255,
-		cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
- 
-# make a check to see if median blurring should be done to remove
-# noise
-elif args["preprocess"] == "blur":
-	gray = cv2.medianBlur(gray, 3)
- 
-# write the grayscale image to disk as a temporary file so we can
-# apply OCR to it
-filename = "{}.png".format(os.getpid())
-cv2.imwrite(filename, gray)
-
-# load the image as a PIL/Pillow image, apply OCR, and then delete
-# the temporary file
-text = pytesseract.image_to_string(Image.open(filename))
-os.remove(filename)
-print(text)
- 
-# show the output images
-cv2.imshow("Image", image)
-cv2.imshow("Output", gray)
-cv2.waitKey(0)
diff --git a/rotation.py b/rotation.py
deleted file mode 100644
index c721ad1..0000000
--- a/rotation.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-import subprocess
-import PIL.Image as Image
- 
-from glob import glob
- 
-command = 'c:\\Share\\tesseract.exe'
-image = '337.jpg'
-DPI = 300
-arguments = ' %s - -psm 0'
- 
- 
-def get_rotation_info(filename):
-    stdoutdata = subprocess.getoutput(command + arguments % filename)
-    degrees = None
-    for line in stdoutdata.splitlines():
-        info = 'Orientation in degrees: '
-        if info in line:
-            degrees = -float(line.replace(info, '').strip())
-            #print("Found rotation: %.2f" % degrees)
-    return degrees
- 
-def fix_dpi_and_rotation(filename, degrees, dpi_info):
-    im1 = Image.open(filename)
-    print('Fixing rotation %.2f in %s...' % (degrees, filename))
-    im1.rotate(degrees).save('../%s' % filename,
-                             'JPEG', quality=97, dpi = (dpi_info, dpi_info))
- 
-filenames = sorted(glob('*.jpg'))
-for filename in filenames:
-    print('Checking %s...' % filename)
-    degrees = get_rotation_info(filename)
-    if degrees:
-        fix_dpi_and_rotation(filename, degrees, DPI)
\ No newline at end of file
diff --git a/similarity.py b/similarity.py
deleted file mode 100644
index 7814da9..0000000
--- a/similarity.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.corpus import stopwords,wordnet
-from nltk.stem import WordNetLemmatizer
-from itertools import product
-import numpy
-
-# str1 = "Abhishek is a good boy."
-# str2 = "Abhishek is not a bad boy."
-# str1 = "Cat is drinking water."
-# str2 = "Lions eat flesh."
-# str1 = "He loves to play football."
-# str2 = "Football is his favourite sport."
-# str1 = "Many consider Maradona as the best player in soccer history."
-# str2 = "Maradona is one of the best soccer player."
-
-str1 = "I was given a card by her in the garden."
-str2 = "In the garden, she gave me a card."
-
-# str1 = "Ballmer has been vocal in the past warning that Linux is a threat to Microsoft."
-# str2 = "In the memo, Ballmer reiterated the open-source threat to Microsoft."
-# str1 = "The boy is fetching water from the well."
-# str2 = "The lion is running in the forest."
-# str1 = "A school is a place where kids go to study."
-# str2 = "School is an institution for children who want to study."
-# str1 = "The world knows it has lost a heroic champion of justice and freedom."
-# str2 = "The earth recognizes the loss of a valiant champion of independence and justice."
-# str1 = "A cemetery is a place where dead people's bodies or their ashes are buried."
-# str2 = "A graveyard is an area of land ,sometimes near a church, where dead people are buried." 
-
-##---------------Defining stopwords for English Language---------------##
-stop_words = set(stopwords.words("english"))
-
-##---------------Initialising Lists---------------##
-filtered_sentence1 = []
-filtered_sentence2 = []
-lemm_sentence1 = []
-lemm_sentence2 = []
-sims = []
-temp1 = []
-temp2 = []
-simi = []
-final = []
-same_sent1 = []
-same_sent2 = []
-#ps = PorterStemmer()
-
-##---------------Defining WordNet Lematizer for English Language---------------##
-lemmatizer  =  WordNetLemmatizer()
-
-#myfile =  open('Text1.txt', 'r') 
-#data=myfile.read().replace('\n', '')
-##print(sent_tokenize(example_text))
-##
-##print(word_tokenize(example_text))
-
-##---------------Tokenizing and removing the Stopwords---------------##
-
-for words1 in word_tokenize(str1):
-    if words1 not in stop_words:
-        if words1.isalnum():
-            filtered_sentence1.append(words1)
-
-##---------------Lemmatizing: Root Words---------------##
-
-for i in filtered_sentence1:
-    lemm_sentence1.append(lemmatizer.lemmatize(i))
-    
-#print(lemm_sentence1)
-
-
-##---------------Tokenizing and removing the Stopwords---------------##
-
-for words2 in word_tokenize(str2):
-    if words2 not in stop_words:
-        if words2.isalnum():
-            filtered_sentence2.append(words2)
-
-##---------------Lemmatizing: Root Words---------------##
-
-for i in filtered_sentence2:
-    lemm_sentence2.append(lemmatizer.lemmatize(i))
-    
-#print(lemm_sentence2)
-
-##---------------Removing the same words from the tokens----------------##
-##for word1 in lemm_sentence1:
-##    for word2 in lemm_sentence2:
-##        if word1 == word2:
-##            same_sent1.append(word1)
-##            same_sent2.append(word2)
-##            
-##if same_sent1 != []:
-##   for word1 in same_sent1:
-##    lemm_sentence1.remove(word1)
-##if same_sent2 != []:
-##   for word2 in same_sent2:
-##    lemm_sentence2.remove(word2)
-##            
-##print(lemm_sentence1)
-##print(lemm_sentence2)
-
-##---------------Similarity index calculation for each word---------------##
-for word1 in lemm_sentence1:
-    simi =[]
-    for word2 in lemm_sentence2:
-        sims = []
-       # print(word1)
-        #print(word2)
-        syns1 = wordnet.synsets(word1)
-        #print(syns1)
-        #print(wordFromList1[0])
-        syns2 = wordnet.synsets(word2)
-        #print(wordFromList2[0])
-        for sense1, sense2 in product(syns1, syns2):
-            d = wordnet.wup_similarity(sense1, sense2)
-            if d != None:
-                sims.append(d)
-    
-        #print(sims)
-        #print(max(sims))
-        if sims != []:        
-           max_sim = max(sims)
-           #print(max_sim)
-           simi.append(max_sim)
-             
-    if simi != []:
-        max_final = max(simi)
-        final.append(max_final)
-
-#print(final)
-
-#        if max_sim >= 0.7:
-#           print(word1)
-#           print(word2)
-#           print('\n')
-           
-#           if word1 not in temp1:
-#              temp1.append(word1)
-#           if word2 not in temp2:
-#              temp2.append(word2)   
-           #lemm_sentence1.remove(word1)
-           #lemm_sentence2.remove(word2)          
-        #if wordFromList1 and wordFromList2: #Thanks to @alexis' note
-          #  s = wordFromList1[0].wup_similarity(wordFromList2[0])
-           # list.append(s)
-#for word1 in temp1:
-#    lemm_sentence1.remove(word1)
-
-#for word2 in temp2:
-#    lemm_sentence2.remove(word2)
-    
-#print(lemm_sentence1)
-#print(lemm_sentence2)
-
-
-##---------------Final Output---------------##
-
-similarity_index = numpy.mean(final)
-similarity_index = round(similarity_index , 2)
-print("Sentence 1: ",str1)
-print("Sentence 2: ",str2)
-print("Similarity index value : ", similarity_index)
-
-if similarity_index>0.8:
-    print("Similar")
-elif similarity_index>=0.6:
-    print("Somewhat Similar")
-else:
-    print("Not Similar")
\ No newline at end of file