Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement + Fixes for Hebrew Language - Duration+TimeGrain+Time+Amount #448

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Duckling/AmountOfMoney/HE/Corpus.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ allExamples = concat
, "עשרה שקלים"
, "עשר ש״ח"
, "עשר שח"
,"10 ש\"ח"
, "10₪"
]
, examples (simple ILS 10000)
Expand Down
87 changes: 77 additions & 10 deletions Duckling/AmountOfMoney/HE/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ruleUnitAmount = Rule
ruleOneShekel :: Rule
ruleOneShekel = Rule
{ name = "שקל"
, pattern = [regex "שקל( אחד| בודד)?"]
, pattern = [regex " שקל( אחד| בודד)?"]
, prod = \_ -> Just . Token AmountOfMoney $ withValue 1 $ currencyOnly ILS
}
ruleTwoShekel :: Rule
Expand All @@ -66,7 +66,7 @@ ruleOneAgura = Rule
ruleShekel :: Rule
ruleShekel = Rule
{ name = "שקל"
, pattern = [regex "שקל(ים)?|ש״?ח"]
, pattern = [regex " שקל(ים)?|ש״?ח|ש(\")?ח|₪"]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly ILS
}

Expand Down Expand Up @@ -196,6 +196,30 @@ ruleGBP = Rule
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly GBP
}

ruleExactAmountDescription :: Rule
ruleExactAmountDescription = Rule
{ name = "amount of|amount <amount-of-money>"
, pattern =
[ regex "(ב)?(גובה|סכום|על סך|שווי)( של)?"
, Predicate isPositive
]
, prod = \case
(_:Token Numeral NumeralData{TNumeral.value = v}:_) -> Just . Token AmountOfMoney $ withValue v $ currencyOnly ILS
_ -> Nothing
}

ruleAtAmountDescription :: Rule
ruleAtAmountDescription = Rule
{ name = "at <amount-of-money>"
, pattern =
[ regex "ב|ב |ב-|ב- |ב - |ב -"
, Predicate isPositive
]
, prod = \case
(_:Token Numeral NumeralData{TNumeral.value = v}:_) -> Just . Token AmountOfMoney $ withValue v $ currencyOnly ILS
_ -> Nothing
}

rulePrecision :: Rule
rulePrecision = Rule
{ name = "about|exactly <amount-of-money>"
Expand All @@ -214,9 +238,9 @@ ruleIntervalBetweenNumeral :: Rule
ruleIntervalBetweenNumeral = Rule
{ name = "between|from <numeral> to|and <amount-of-money>"
, pattern =
[ regex "מ?|בין "
[ regex "(מ(ה)?|בין (ה )?|ה)(סכום)?( של)?"
, Predicate isPositive
, regex "עד |ל"
, regex "((ו)?עד( ל| ה)?|[\\-ול](בין )?)(סכום)?( של)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -229,14 +253,32 @@ ruleIntervalBetweenNumeral = Rule
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c
_ -> Nothing
}

ruleNumeralToAmountInterval :: Rule
ruleNumeralToAmountInterval = Rule
{ name = "<numeral> to|and <amount-of-money>"
, pattern =
[ Predicate isPositive
, regex "((ו)?עד( ל| ה)?|[\\-ול](בין )?)(סכום)?( של)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
(Token Numeral NumeralData { TNumeral.value = from }:
_:
Token AmountOfMoney AmountOfMoneyData { TAmountOfMoney.value = Just to,
TAmountOfMoney.currency = c }:
_) | from < to ->
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c
_ -> Nothing
}

ruleIntervalBetween :: Rule
ruleIntervalBetween = Rule
{ name = "between|from <amount-of-money> to|and <amount-of-money>"
, pattern =
[ regex "מ?|בין "
[ regex "(מ(ה)?|בין (ה )?|ה)(סכום)?( של)?"
, Predicate isSimpleAmountOfMoney
, regex "עד |ל"
, regex "((ו)?עד( ל| ה)?|[\\-ול](בין )?)(סכום)?( של)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -251,13 +293,33 @@ ruleIntervalBetween = Rule
currencyOnly c1
_ -> Nothing
}

ruleAmountToAmountInterval :: Rule
ruleAmountToAmountInterval = Rule
{ name = "<amount-of-money> to|and <amount-of-money>"
, pattern =
[ Predicate isSimpleAmountOfMoney
, regex "((ו)?עד( ל| ה)?|[\\-ול](בין )?)(סכום)?( של)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
(Token AmountOfMoney AmountOfMoneyData { TAmountOfMoney.value = Just from,
TAmountOfMoney.currency = c1 }:
_:
Token AmountOfMoney AmountOfMoneyData { TAmountOfMoney.value = Just to,
TAmountOfMoney.currency = c2 }:
_) | from < to && c1 == c2 ->
Just . Token AmountOfMoney . withInterval (from, to) $
currencyOnly c1
_ -> Nothing
}

ruleIntervalNumeralDash :: Rule
ruleIntervalNumeralDash = Rule
{ name = "<numeral> - <amount-of-money>"
, pattern =
[ Predicate isPositive
, regex "-"
, regex "-| -|- | - "
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -275,7 +337,7 @@ ruleIntervalDash = Rule
{ name = "<amount-of-money> - <amount-of-money>"
, pattern =
[ Predicate isSimpleAmountOfMoney
, regex "-"
, regex "-| -|- | - "
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -294,7 +356,7 @@ ruleIntervalMax :: Rule
ruleIntervalMax = Rule
{ name = "under/less/lower/no more than <amount-of-money>"
, pattern =
[ regex "פחות מ|עד|לא יותר מ|מתחת ל?|לא מעל"
[ regex "פחות מ-?( -)?|עד ל?-?( -)?|לא יותר מ-?( -)?|מחת ל?-?( -)?|מתחת ל?-?( -)?|לא מעל ל?-?( -)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -309,7 +371,7 @@ ruleIntervalMin :: Rule
ruleIntervalMin = Rule
{ name = "over/above/at least/more than <amount-of-money>"
, pattern =
[ regex "יותר מ|מעל|לא פחות מ|לא מתחת ל"
[ regex "יותר מ-?( -)?|מעל ל?-?( -)?|מ-?( -)?|לא פחות מ-?( -)?|לא מתחת ל?-?( -)?"
, Predicate isSimpleAmountOfMoney
]
, prod = \case
Expand All @@ -319,6 +381,7 @@ ruleIntervalMin = Rule
_) -> Just . Token AmountOfMoney . withMin to $ currencyOnly c
_ -> Nothing
}


rules :: [Rule]
rules =
Expand All @@ -327,14 +390,18 @@ rules =
, ruleEUR
, ruleIntersectAndXCents
, ruleIntervalBetweenNumeral
, ruleNumeralToAmountInterval
, ruleIntervalBetween
, ruleAmountToAmountInterval
, ruleAtAmountDescription
, ruleIntervalMax
, ruleIntervalMin
, ruleIntervalNumeralDash
, ruleIntervalDash
, rulePounds
, ruleOneGBP
, ruleGBP
, ruleExactAmountDescription
, rulePrecision
, ruleIntersect
, ruleAgura
Expand Down
60 changes: 59 additions & 1 deletion Duckling/Duration/HE/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,72 @@ ruleExactlyDuration = Rule
(_:token:_) -> Just token
_ -> Nothing
}


ruleHalfAYear :: Rule
ruleHalfAYear = Rule
{ name = "half a year"
, pattern =
[ regex "(1/2 שנה|חצי שנה)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Month 6
}

-- this rule handles TG.Week and TG.Month
ruleDualUnitofduration :: Rule
ruleDualUnitofduration = Rule
{ name = "dual <unit-of-duration>"
, pattern =
[ dimension TimeGrain
, regex "(יים)"
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 2
_ -> Nothing
}

ruleTwoDays :: Rule
ruleTwoDays = Rule
{ name = "dual days"
, pattern =
[ regex "יומיים"
]
, prod = \_ -> Just . Token Duration $ duration TG.Day 2
}

ruleTwoYears :: Rule
ruleTwoYears = Rule
{ name = "dual years"
, pattern =
[ regex "שנתיים"
]
, prod = \_ -> Just . Token Duration $ duration TG.Year 2
}

--There's no word in Hebrew for "a" (and no need), so just the TimeGrain represents a duration
ruleSingleUnitofduration :: Rule
ruleSingleUnitofduration = Rule
{ name = "single <unit-of-duration>"
, pattern =
[ dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
_ -> Nothing
}

rules :: [Rule]
rules =
[ ruleAboutDuration
[ ruleDualUnitofduration
, ruleTwoDays
, ruleTwoYears
, ruleHalfAYear
, ruleAboutDuration
, ruleExactlyDuration
, ruleHalfAnHour
, ruleIntegerAndAnHalfHours
, ruleNumbernumberHours
, ruleQuarterOfAnHour
, ruleThreequartersOfAnHour
, ruleSingleUnitofduration
]
7 changes: 5 additions & 2 deletions Duckling/Numeral/HE/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern =
[ regex "-|מינוס"
[ regex "מינוס"
, Predicate isPositive
]
, prod = \tokens -> case tokens of
Expand All @@ -118,7 +118,7 @@ ruleInteger15 :: Rule
ruleInteger15 = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(עשרים|שלושים|ארבעים|חמישים|שישים|שבעים|שמונים|תשעים)"
[ regex "(עשרים|שלושים|ארבעים|חמישים|שישים|שבעים|שמונים|תשעים|מאתיים)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case match of
Expand All @@ -130,6 +130,7 @@ ruleInteger15 = Rule
"שבעים" -> integer 70
"שמונים" -> integer 80
"תשעים" -> integer 90
"מאתיים" -> integer 200
_ -> Nothing
_ -> Nothing
}
Expand Down Expand Up @@ -203,6 +204,8 @@ rulePowersOfTen = Rule
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"מאה" ->
double 1e2 >>= withGrain 2 >>= withMultipliable
"מאתיים" ->
double 2e2 >>= withGrain 2 >>= withMultipliable
"מאות" ->
double 1e2 >>= withGrain 2 >>= withMultipliable
"אלף" ->
Expand Down
2 changes: 1 addition & 1 deletion Duckling/Ranking/Classifiers/AF_XX.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types

classifiers :: Classifiers
classifiers = HashMap.fromList []
classifiers = HashMap.fromList []
19 changes: 11 additions & 8 deletions Duckling/Ranking/Classifiers/EN_AU.hs
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("hour (grain)",
Classifier{okData =
ClassData{prior = -1.4294665329850993,
unseen = -2.9444389791664407,
likelihoods = HashMap.fromList [("", 0.0)], n = 17},
ClassData{prior = -1.3723081191451507, unseen = -2.995732273553991,
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
koData =
ClassData{prior = -0.27369583047704105, unseen = -4.02535169073515,
likelihoods = HashMap.fromList [("", 0.0)], n = 54}}),
ClassData{prior = -0.2923879634891936, unseen = -4.007333185232471,
likelihoods = HashMap.fromList [("", 0.0)], n = 53}}),
("Parsi New Year",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
Expand Down Expand Up @@ -1588,10 +1587,14 @@ classifiers
n = 1}}),
("number.number hours",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods =
HashMap.fromList
[("hour (grain)", -0.6931471805599453),
("hour", -0.6931471805599453)],
n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("from <time-of-day> - <time-of-day> (interval)",
Classifier{okData =
Expand Down
19 changes: 11 additions & 8 deletions Duckling/Ranking/Classifiers/EN_BZ.hs
Original file line number Diff line number Diff line change
Expand Up @@ -864,12 +864,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("hour (grain)",
Classifier{okData =
ClassData{prior = -1.4294665329850993,
unseen = -2.9444389791664407,
likelihoods = HashMap.fromList [("", 0.0)], n = 17},
ClassData{prior = -1.3723081191451507, unseen = -2.995732273553991,
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
koData =
ClassData{prior = -0.27369583047704105, unseen = -4.02535169073515,
likelihoods = HashMap.fromList [("", 0.0)], n = 54}}),
ClassData{prior = -0.2923879634891936, unseen = -4.007333185232471,
likelihoods = HashMap.fromList [("", 0.0)], n = 53}}),
("Parsi New Year",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
Expand Down Expand Up @@ -1588,10 +1587,14 @@ classifiers
n = 1}}),
("number.number hours",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods =
HashMap.fromList
[("hour (grain)", -0.6931471805599453),
("hour", -0.6931471805599453)],
n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("from <time-of-day> - <time-of-day> (interval)",
Classifier{okData =
Expand Down
Loading