Skip to content

Latest commit

 

History

History
99 lines (69 loc) · 3.76 KB

README.md

File metadata and controls

99 lines (69 loc) · 3.76 KB

Visit QuantNet

Visit QuantNet Countword Visit QuantNet 2.0

Name of QuantLet:  'Countword'

Published in:       

Description:       'Counts positive and negative words using the lexicon by Bing Liu'

Keywords:          'text, nlp, tokenization, opinion mining, media news, sentiment'

See also:           

Author:            'Guo Li'

Submitted:         'Mon, September 5 2015 by Guo Li'

Datafile:          'positve-words.txt negative-words.txt example.txt'

Input:  

Output:           

Example:            

Picture1

setwd("C:~")
library("methods")
a                                                         = readLines("example.txt")

neg                                                       = readLines("negative-words.txt")
pos                                                       = readLines("positive-words.txt")

# function score.sentiment
score.sentiment                                           = function(sentence, pos.words, neg.words)
{
  # remove punctuation
  sentence                                                = gsub("[[:punct:]]", "", sentence)
  # remove control characters
  sentence                                                = gsub("[[:cntrl:]]", "", sentence)
  # remove digits?
  sentence                                                = gsub('\\d+', '', sentence)
  
  # define error handling function when trying tolower
  tryTolower                                              = function(x)
  {
    # create missing value
    y                                                     = NA
    # tryCatch error
    try_error                                             = tryCatch(tolower(x), error=function(e) e)
    # if not an error
    if (!inherits(try_error, "error"))
      y                                                   = tolower(x)
    # result
    return(y)
  }
  # use tryTolower with sapply 
  sentence                                                = sapply(sentence, tryTolower)
  
  # split sentence into words with str_split (stringr package)
  word.list                                               = strsplit(sentence, "\\s+")
  words                                                   = unlist(word.list)
  
  # compare words to the dictionaries of positive & negative terms
  pos.matches                                             = match(words, pos.words)
  neg.matches                                             = match(words, neg.words)
  
  # get the position of the matched term or NA
  # we just want a TRUE/FALSE
  pos.matches                                             = !is.na(pos.matches)
  neg.matches                                             = !is.na(neg.matches)
  
  # final score
  score                                                   = sum(pos.matches) - sum(neg.matches)
  pos                                                     = sum(pos.matches)
  neg                                                     = sum(neg.matches)
  num                                                     = length(pos.matches)
  sent                                                    = matrix(c(pos,neg,score,num),nrow=1)
  scores.df                                               = data.frame(score=sent)
  
  names(scores.df)                                        = c("pos","neg","opt","totalwords")
  scores.df
}

#sent = paste(rep(sentences,3),collapse="")
score.sentiment(a, pos, neg)