Skip to content

Commit

Permalink
minor style fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
rssdev10 committed Oct 24, 2023
1 parent b2e7bfc commit 1cc64a7
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 14 deletions.
19 changes: 7 additions & 12 deletions src/LM/langmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,18 @@ To get probability of word given that context
In other words, for given context calculate frequency distribution of word
"""
function prob(m::Langmodel, templ_lm::DefaultDict, word, context=nothing)
if context == nothing || context == ""
return(1/float(length(templ_lm))) #provide distribution
else
accum = templ_lm[context]
end
function prob(m::Langmodel, templ_lm::DefaultDict, word, context=nothing)::Float64
(isnothing(context) || isempty(context)) && return 1.0/length(templ_lm) #provide distribution

accum = templ_lm[context]
s = float(sum(accum))
for (text, count) in accum
if text == word
return(float(count) / s)
end
end
if context in keys(m.vocab.vocab)
return(0)
return 0.0
end
return(Inf)
end
Expand Down Expand Up @@ -186,9 +184,8 @@ depending upon the sub-Type
"""
function score(m::InterpolatedLanguageModel, temp_lm::DefaultDict, word, context=nothing)
if context == nothing || context == ""
return prob(m, temp_lm, word, context)
end
(isnothing(context) || isempty(context)) && return prob(m, temp_lm, word)

if context in keys(temp_lm)
alpha,gamma = alpha_gammma(m, temp_lm, word, context)
return (alpha + gamma*score(m, temp_lm, word, context_reduce(context)))
Expand Down Expand Up @@ -242,5 +239,3 @@ function alpha_gammma(m::KneserNeyInterpolated, templ_lm::DefaultDict, word, con
gamma = (m.discount * count_non_zero_vals(accum) /s)
return alpha, gamma
end


2 changes: 1 addition & 1 deletion src/deprecations.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

## Deprecations for Languages

function WordTokenizers.tokenize(::Type{S}, s::T) where {S <: Language, T <: AbstractString}
function tokenize(::Type{S}, s::T) where {S <: Language, T <: AbstractString}
depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S))
tokenize(S(), s)
end
Expand Down
2 changes: 1 addition & 1 deletion src/tokenizer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ julia> tokenize(Languages.English(), "Too foo words!")
See also: [`sentence_tokenize`](@ref)
"""
WordTokenizers.tokenize(lang::S, s::T) where {S <: Language, T <: AbstractString} = WordTokenizers.tokenize(s)
tokenize(lang::S, s::T) where {S <: Language, T <: AbstractString} = WordTokenizers.tokenize(s)


"""
Expand Down

0 comments on commit 1cc64a7

Please sign in to comment.