diff --git a/CHANGELOG.md b/CHANGELOG.md index 473615f..ac4bc9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,14 @@ # Changelog All notable changes to this project will be documented in this file. -## [Unreleased] +## [0.54.2] - 2018-04-10 + +### Added +- Examples for builtin entities in all languages +- Japanese support for all builtin entities + +### Fixed +- Issue with the entity kinds order used in BuiltinEntityParser ## [0.54.1] - 2018-04-03 @@ -15,6 +22,5 @@ All notable changes to this project will be documented in this file. ### Modified - Updated Rustling ontoloty to `0.16.4` - -[Unreleased]: https://github.com/snipsco/snips-nlu-ontology/compare/0.54.1...HEAD -[0.54.1]: https://github.com/snipsco/snips-nlu-ontology/compare/0.54.0...0.54.1 \ No newline at end of file +[0.54.2]: https://github.com/snipsco/snips-nlu-ontology/compare/0.54.1...0.54.2 +[0.54.1]: https://github.com/snipsco/snips-nlu-ontology/compare/0.54.0...0.54.1 diff --git a/README.rst b/README.rst index d3d71b6..911f93e 100644 --- a/README.rst +++ b/README.rst @@ -23,10 +23,10 @@ Supported languages +----------+------------+ | French | fr | +----------+------------+ -| Korean | ko | -+----------+------------+ | Japanese | ja | +----------+------------+ +| Korean | ko | ++----------+------------+ Supported builtin entities -------------------------- @@ -34,46 +34,53 @@ Supported builtin entities +---------------+---------------------+---------------------+ | Entity | Identifier | Supported languages | +===============+=====================+=====================+ -| AmountOfMoney | snips/amountOfMoney | | English | -| | | | French | -| | | | German | +| AmountOfMoney | snips/amountOfMoney | | German | +| | | | English | | | | | Spanish | +| | | | French | +| | | | Japanese | | | | | Korean | +---------------+---------------------+---------------------+ -| Time | snips/datetime | | English | +| Time | snips/datetime | | German | +| | | | English | | | | | Spanish | | | | | French | +| | | | Japanese | | | | | Korean | -| | | | German | +---------------+---------------------+---------------------+ -| Duration | snips/duration | | English | +| Duration | snips/duration | | German | +| | | | English | | | | | Spanish | | | | | French | +| | | | Japanese | | | | | Korean | -| | | | German | +---------------+---------------------+---------------------+ -| Number | snips/number | | English | +| Number | snips/number | | German | +| | | | English | | | | | Spanish | | | | | French | +| | | | Japanese | | | | | Korean | -| | | | German | +---------------+---------------------+---------------------+ -| Ordinal | snips/ordinal | | English | +| Ordinal | snips/ordinal | | German | +| | | | English | | | | | Spanish | | | | | French | +| | | | Japanese | | | | | Korean | -| | | | German | +---------------+---------------------+---------------------+ -| Percentage | snips/percentage | | English | +| Percentage | snips/percentage | | German | +| | | | English | | | | | Spanish | | | | | French | -| | | | German | +| | | | Japanese | +---------------+---------------------+---------------------+ -| Temperature | snips/temperature | | English | +| Temperature | snips/temperature | | German | +| | | | English | | | | | Spanish | | | | | French | +| | | | Japanese | | | | | Korean | -| | | | German | +---------------+---------------------+---------------------+ Results Examples diff --git a/platforms/snips-nlu-ontology-kotlin/build.gradle b/platforms/snips-nlu-ontology-kotlin/build.gradle index fecc240..4db0d90 100644 --- a/platforms/snips-nlu-ontology-kotlin/build.gradle +++ b/platforms/snips-nlu-ontology-kotlin/build.gradle @@ -8,7 +8,7 @@ buildscript { } } -version = "0.54.1" +version = "0.54.2" group = "ai.snips" diff --git a/platforms/snips-nlu-ontology-python/.gitignore b/platforms/snips-nlu-ontology-python/.gitignore index 52fabe3..49ad4c9 100644 --- a/platforms/snips-nlu-ontology-python/.gitignore +++ b/platforms/snips-nlu-ontology-python/.gitignore @@ -1,4 +1,5 @@ venv/ +venv2/ venv3/ venv34/ venv35/ diff --git a/platforms/snips-nlu-ontology-python/snips-nlu-ontology-rs/Cargo.toml b/platforms/snips-nlu-ontology-python/snips-nlu-ontology-rs/Cargo.toml index 2c18470..7c7a1f5 100644 --- a/platforms/snips-nlu-ontology-python/snips-nlu-ontology-rs/Cargo.toml +++ b/platforms/snips-nlu-ontology-python/snips-nlu-ontology-rs/Cargo.toml @@ -1,13 +1,13 @@ [package] name = "snips-nlu-ontology-rs" -version = "0.54.1" +version = "0.54.2" authors = ["Adrien Ball "] [dependencies] libc = "0.2" -snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.1" } -snips-nlu-ontology-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.1" } -snips-nlu-ontology-parsers-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.1" } +snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.2" } +snips-nlu-ontology-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.2" } +snips-nlu-ontology-parsers-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.54.2" } [lib] name = "snips_nlu_ontology_rs" diff --git a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__init__.py b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__init__.py index 903889e..99ef7fc 100644 --- a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__init__.py +++ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__init__.py @@ -2,4 +2,4 @@ from snips_nlu_ontology.builtin_entities import ( BuiltinEntityParser, get_all_languages, get_all_builtin_entities, - get_supported_entities, get_ontology_version) + get_supported_entities, get_ontology_version, get_builtin_entity_examples) diff --git a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__version__ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__version__ index 1942d77..71e0bca 100644 --- a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__version__ +++ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/__version__ @@ -1 +1 @@ -0.54.1 +0.54.2 diff --git a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/builtin_entities.py b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/builtin_entities.py index afcabc3..6499bc1 100644 --- a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/builtin_entities.py +++ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/builtin_entities.py @@ -14,6 +14,7 @@ _ALL_LANGUAGES = None _SUPPORTED_ENTITIES = dict() +_ENTITIES_EXAMPLES = dict() _ALL_BUILTIN_ENTITIES = None _ONTOLOGY_VERSION = None @@ -75,6 +76,35 @@ def get_supported_entities(language): return _SUPPORTED_ENTITIES[language] +def get_builtin_entity_examples(builtin_entity_kind, language): + """Provides some examples of the builtin entity in the specified language + """ + global _ENTITIES_EXAMPLES + + if not isinstance(builtin_entity_kind, str): + raise TypeError("Expected `builtin_entity_kind` to be of type 'str' " + "but found: %s" % type(builtin_entity_kind)) + if not isinstance(language, str): + raise TypeError("Expected `language` to be of type 'str' but found: %s" + % type(language)) + + if builtin_entity_kind not in _ENTITIES_EXAMPLES: + _ENTITIES_EXAMPLES[builtin_entity_kind] = dict() + + if language not in _ENTITIES_EXAMPLES[builtin_entity_kind]: + with string_array_pointer(pointer(CStringArray())) as ptr: + exit_code = lib.nlu_ontology_builtin_entity_examples( + builtin_entity_kind.encode("utf8"), + language.encode("utf8"), byref(ptr)) + if exit_code: + raise ValueError("Something wrong happened while retrieving " + "builtin entity examples. See stderr.") + array = ptr.contents + _ENTITIES_EXAMPLES[builtin_entity_kind][language] = list( + array.data[i].decode("utf8") for i in range(array.size)) + return _ENTITIES_EXAMPLES[builtin_entity_kind][language] + + class BuiltinEntityParser(object): """Extract builtin entities diff --git a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entities.py b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entities.py index 6bc4127..ded4a38 100644 --- a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entities.py +++ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entities.py @@ -2,8 +2,8 @@ from builtins import str from snips_nlu_ontology.builtin_entities import ( - get_all_languages, get_all_builtin_entities, get_supported_entities, - get_ontology_version) + get_all_languages, get_all_builtin_entities, get_builtin_entity_examples, + get_supported_entities, get_ontology_version) class TestBuiltinEntities(unittest.TestCase): @@ -39,3 +39,10 @@ def test_should_get_supported_builtin_entities(self): def test_should_get_ontology_version(self): get_ontology_version() + + def test_should_get_builtin_entity_examples(self): + for language in get_all_languages(): + for builtin_entity in get_supported_entities(language): + examples = get_builtin_entity_examples(builtin_entity, + language) + self.assertGreaterEqual(len(examples), 1) diff --git a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entity_parser.py b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entity_parser.py index a5a19c3..d20033e 100644 --- a/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entity_parser.py +++ b/platforms/snips-nlu-ontology-python/snips_nlu_ontology/tests/test_builtin_entity_parser.py @@ -32,7 +32,7 @@ def test_should_parse_without_scope(self): def test_should_parse_with_scope(self): # Given parser = BuiltinEntityParser("en") - scope = ["snips/temperature", "snips/number"] + scope = ["snips/duration", "snips/temperature"] # When res = parser.parse("Raise to sixty two", scope) diff --git a/snips-nlu-ontology-doc/Cargo.toml b/snips-nlu-ontology-doc/Cargo.toml index 6d7e38c..0622658 100644 --- a/snips-nlu-ontology-doc/Cargo.toml +++ b/snips-nlu-ontology-doc/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-doc" -version = "0.54.1" +version = "0.54.2" authors = ["Adrien Ball "] [build-dependencies] diff --git a/snips-nlu-ontology-ffi-macros/Cargo.toml b/snips-nlu-ontology-ffi-macros/Cargo.toml index 6cd2686..5539827 100644 --- a/snips-nlu-ontology-ffi-macros/Cargo.toml +++ b/snips-nlu-ontology-ffi-macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-ffi-macros" -version = "0.54.1" +version = "0.54.2" authors = [ "Kevin Lefevre ", "Thibaut Lorrain ", diff --git a/snips-nlu-ontology-ffi-macros/src/builtin_entity.rs b/snips-nlu-ontology-ffi-macros/src/builtin_entity.rs index 9436831..78f04c7 100644 --- a/snips-nlu-ontology-ffi-macros/src/builtin_entity.rs +++ b/snips-nlu-ontology-ffi-macros/src/builtin_entity.rs @@ -110,3 +110,23 @@ pub fn get_supported_builtin_entities( } Ok(()) } + +pub fn get_builtin_entity_examples( + builtin_entity_kind: *const libc::c_char, + language: *const libc::c_char, + results: *mut *const CStringArray, +) -> Result<()> { + let entity_kind_str = unsafe { CStr::from_ptr(builtin_entity_kind) }.to_str()?; + let entity_kind = BuiltinEntityKind::from_identifier(&*entity_kind_str)?; + let language_str = unsafe { CStr::from_ptr(language) }.to_str()?; + let language = Language::from_str(&*language_str.to_uppercase())?; + let examples = entity_kind.examples(language) + .iter() + .map(|example| example.to_string()) + .collect::>(); + let c_examples = CStringArray::from(examples); + unsafe { + *results = Box::into_raw(Box::new(c_examples)); + } + Ok(()) +} diff --git a/snips-nlu-ontology-ffi-macros/src/lib.rs b/snips-nlu-ontology-ffi-macros/src/lib.rs index 1490269..60c02ed 100644 --- a/snips-nlu-ontology-ffi-macros/src/lib.rs +++ b/snips-nlu-ontology-ffi-macros/src/lib.rs @@ -64,5 +64,14 @@ macro_rules! export_nlu_ontology_c_symbols { ) -> $crate::ffi_utils::CResult { wrap!($crate::get_supported_builtin_entities(language, results)) } + + #[no_mangle] + pub extern "C" fn nlu_ontology_builtin_entity_examples( + builtin_entity_kind: *const libc::c_char, + language: *const libc::c_char, + results: *mut *const $crate::CStringArray, + ) -> $crate::ffi_utils::CResult { + wrap!($crate::get_builtin_entity_examples(builtin_entity_kind, language, results)) + } }; } diff --git a/snips-nlu-ontology-ffi-with-parsers/Cargo.toml b/snips-nlu-ontology-ffi-with-parsers/Cargo.toml index d85c86d..2926e7c 100644 --- a/snips-nlu-ontology-ffi-with-parsers/Cargo.toml +++ b/snips-nlu-ontology-ffi-with-parsers/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-ffi-with-parsers" -version = "0.54.1" +version = "0.54.2" authors = ["Kevin Lefevre "] [dependencies] diff --git a/snips-nlu-ontology-ffi/Cargo.toml b/snips-nlu-ontology-ffi/Cargo.toml index 2d7664a..6301e05 100644 --- a/snips-nlu-ontology-ffi/Cargo.toml +++ b/snips-nlu-ontology-ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-ffi" -version = "0.54.1" +version = "0.54.2" authors = ["Kevin Lefevre "] [dependencies] diff --git a/snips-nlu-ontology-parsers-ffi-macros/Cargo.toml b/snips-nlu-ontology-parsers-ffi-macros/Cargo.toml index c8ec207..7273369 100644 --- a/snips-nlu-ontology-parsers-ffi-macros/Cargo.toml +++ b/snips-nlu-ontology-parsers-ffi-macros/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-parsers-ffi-macros" -version = "0.54.1" +version = "0.54.2" authors = ["Kevin Lefevre "] [dependencies] diff --git a/snips-nlu-ontology-parsers/Cargo.toml b/snips-nlu-ontology-parsers/Cargo.toml index 8267db3..f546dab 100644 --- a/snips-nlu-ontology-parsers/Cargo.toml +++ b/snips-nlu-ontology-parsers/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology-parsers" -version = "0.54.1" +version = "0.54.2" authors = ["Kevin Lefevre "] [dependencies] @@ -9,5 +9,5 @@ lazy_static = "1.0" maplit = "1.0" snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.6.0" } regex = "0.2" -rustling-ontology = { git = "https://github.com/snipsco/rustling-ontology", tag = "0.16.4" } +rustling-ontology = { git = "https://github.com/snipsco/rustling-ontology", tag = "0.16.5" } snips-nlu-ontology = { path = "../snips-nlu-ontology" } diff --git a/snips-nlu-ontology-parsers/src/builtin_entity_parser.rs b/snips-nlu-ontology-parsers/src/builtin_entity_parser.rs index 24dfa7f..1d05319 100644 --- a/snips-nlu-ontology-parsers/src/builtin_entity_parser.rs +++ b/snips-nlu-ontology-parsers/src/builtin_entity_parser.rs @@ -15,6 +15,7 @@ use rustling_ontology::{build_parser, OutputKind, Parser, ResolverContext}; pub struct BuiltinEntityParser { parser: Parser, lang: Language, + supported_entity_kinds: Vec, } lazy_static! { @@ -37,9 +38,17 @@ impl BuiltinEntityParser { .unwrap() .entry(lang.to_string()) .or_insert_with(|| { + let supported_entity_kinds = BuiltinEntityKind::supported_entity_kinds(lang); + let ordered_entity_kinds = OutputKind::all() + .iter() + .map(|output_kind| output_kind.into_builtin()) + .filter(|builtin_entity_kind| supported_entity_kinds.contains(&builtin_entity_kind)) + .collect(); + Arc::new(BuiltinEntityParser { parser: build_parser(lang.into_builtin()).unwrap(), lang, + supported_entity_kinds: ordered_entity_kinds, }) }) .clone() @@ -118,47 +127,38 @@ impl BuiltinEntityParser { let key = CacheKey { lang: self.lang.to_string(), input: sentence.into(), - kinds: filter_entity_kinds.map(|kinds| kinds.to_vec()), + kinds: filter_entity_kinds + .map(|kinds| + self.supported_entity_kinds.clone() + .into_iter() + .filter(|entity_kind| kinds.contains(&entity_kind)) + .collect()) + .unwrap_or_else(|| self.supported_entity_kinds.clone()), }; CACHED_ENTITY .lock() .unwrap() .cache(&key, |key| { let context = ResolverContext::default(); - if let Some(kinds) = key.kinds.as_ref() { - let kind_order = kinds - .iter() - .map(|kind| kind.into_builtin()) - .collect::>(); - self.parser - .parse_with_kind_order(&sentence.to_lowercase(), &context, &kind_order) - .unwrap_or(Vec::new()) - .iter() - .filter_map(|m| { - let entity_kind = BuiltinEntityKind::from_rustling(&m.value); - kinds.iter().find(|kind| **kind == entity_kind).map(|kind| { - BuiltinEntity { - value: sentence[m.byte_range.0..m.byte_range.1].into(), - range: m.char_range.0..m.char_range.1, - entity: m.value.clone().into_builtin(), - entity_kind: kind.clone(), - } - }) - }) - .sorted_by(|a, b| Ord::cmp(&a.range.start, &b.range.start)) - } else { - self.parser - .parse(&sentence.to_lowercase(), &context) - .unwrap_or(Vec::new()) - .iter() - .map(|entity| BuiltinEntity { - value: sentence[entity.byte_range.0..entity.byte_range.1].into(), - range: entity.char_range.0..entity.char_range.1, - entity: entity.value.clone().into_builtin(), - entity_kind: BuiltinEntityKind::from_rustling(&entity.value), + let kind_order = key.kinds.iter() + .map(|kind| kind.into_builtin()) + .collect::>(); + self.parser + .parse_with_kind_order(&sentence.to_lowercase(), &context, &kind_order) + .unwrap_or_else(|_| vec![]) + .iter() + .filter_map(|m| { + let entity_kind = BuiltinEntityKind::from_rustling(&m.value); + key.kinds.iter().find(|kind| **kind == entity_kind).map(|kind| { + BuiltinEntity { + value: sentence[m.byte_range.0..m.byte_range.1].into(), + range: m.char_range.0..m.char_range.1, + entity: m.value.clone().into_builtin(), + entity_kind: kind.clone(), + } }) - .sorted_by(|a, b| Ord::cmp(&a.range.start, &b.range.start)) - } + }) + .sorted_by(|a, b| Ord::cmp(&a.range.start, &b.range.start)) }) .entities } @@ -225,7 +225,7 @@ impl EntityCache { struct CacheKey { lang: String, input: String, - kinds: Option>, + kinds: Vec, } #[derive(Debug, Clone)] @@ -253,6 +253,7 @@ mod test { use itertools::Itertools; use nlu_ontology::SlotValue::InstantTime; + use nlu_ontology::language::Language; #[test] fn test_entities_extraction() { @@ -289,7 +290,7 @@ mod test { parser .extract_entities( "I would like to do a bank transfer of ten euros for my friends", - None + None, ) .iter() .map(|e| e.entity_kind) @@ -361,7 +362,7 @@ mod test { let key = CacheKey { lang: "en".into(), input: "test".into(), - kinds: None, + kinds: vec![], }; let mut cache = EntityCache::new(10); // caching for 10s @@ -376,4 +377,21 @@ mod test { cache.cache(&key, parse).instant ); } + + #[test] + fn test_entity_examples_should_be_parsed() { + for language in Language::all() { + let parser = BuiltinEntityParser::get(*language); + for entity_kind in BuiltinEntityKind::all() { + for example in entity_kind.examples(*language) { + let results = parser.extract_entities(example, Some(&[*entity_kind])); + assert_eq!( + 1, results.len(), + "Expected 1 result for entity kind '{:?}' in language '{:?}' for example \ + {:?}, but found: {:?}", entity_kind, language, example, results); + assert_eq!(example.to_string(), results[0].value); + } + } + } + } } diff --git a/snips-nlu-ontology-parsers/src/rustling_converters.rs b/snips-nlu-ontology-parsers/src/rustling_converters.rs index f903e1f..c36190b 100644 --- a/snips-nlu-ontology-parsers/src/rustling_converters.rs +++ b/snips-nlu-ontology-parsers/src/rustling_converters.rs @@ -208,6 +208,20 @@ impl<'a> FromRustling<&'a Output> for BuiltinEntityKind { } } +impl<'a> FromRustling<&'a OutputKind> for BuiltinEntityKind { + fn from_rustling(v: &OutputKind) -> Self { + match *v { + OutputKind::AmountOfMoney => BuiltinEntityKind::AmountOfMoney, + OutputKind::Duration => BuiltinEntityKind::Duration, + OutputKind::Number => BuiltinEntityKind::Number, + OutputKind::Ordinal => BuiltinEntityKind::Ordinal, + OutputKind::Temperature => BuiltinEntityKind::Temperature, + OutputKind::Time => BuiltinEntityKind::Time, + OutputKind::Percentage => BuiltinEntityKind::Percentage, + } + } +} + impl<'a> FromRustling<&'a BuiltinEntityKind> for OutputKind { fn from_rustling(v: &BuiltinEntityKind) -> Self { match *v { diff --git a/snips-nlu-ontology/Cargo.toml b/snips-nlu-ontology/Cargo.toml index d969947..b698433 100644 --- a/snips-nlu-ontology/Cargo.toml +++ b/snips-nlu-ontology/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "snips-nlu-ontology" -version = "0.54.1" +version = "0.54.2" authors = [ "Adrien Ball ", "Thibaut Lorrain ", diff --git a/snips-nlu-ontology/src/builtin_entity.rs b/snips-nlu-ontology/src/builtin_entity.rs index c82fd44..442b741 100644 --- a/snips-nlu-ontology/src/builtin_entity.rs +++ b/snips-nlu-ontology/src/builtin_entity.rs @@ -86,22 +86,269 @@ impl BuiltinEntityKind { } impl BuiltinEntityKind { - pub fn examples(&self) -> &[&str] { + pub fn examples(&self, language: Language) -> &[&str] { + match language { + Language::DE => self.de_examples(), + Language::EN => self.en_examples(), + Language::ES => self.es_examples(), + Language::FR => self.fr_examples(), + Language::JA => self.ja_examples(), + Language::KO => self.ko_examples(), + } + } + + fn de_examples(&self) -> &[&str] { + match *self { + BuiltinEntityKind::AmountOfMoney => &[ + "10$", + "ungefähr 5€", + "zwei tausend Dollar", + ], + BuiltinEntityKind::Duration => &[ + "2stdn", + "drei monate", + "ein halbe Stunde", + "8 Jahre und zwei Tage", + ], + BuiltinEntityKind::Number => &[ + "2001", + "einundzwanzig", + "zwei tausend", + "zwei tausend und drei" + ], + BuiltinEntityKind::Ordinal => &[ + "Erste", + "der zweite", + "zwei und zwanzigster" + ], + BuiltinEntityKind::Temperature => &[ + "70K", + "3°C", + "Dreiundzwanzig Grad", + "zweiunddreißig Grad Fahrenheit", + ], + BuiltinEntityKind::Time => &[ + "Heute", + "16.30 Uhr", + "in 1 Stunde", + "dritter Dienstag im Juni", + ], + BuiltinEntityKind::Percentage => &[ + "25%", + "zwanzig Prozent", + "zwei tausend und fünfzig Prozent", + ], + } + } + + fn en_examples(&self) -> &[&str] { match *self { - BuiltinEntityKind::AmountOfMoney => &["ten dollars and five cents", "around 5€"], - BuiltinEntityKind::Duration => &["3 month", "4 seconds", "8 years"], - BuiltinEntityKind::Number => &["twenty-two", "1.2"], - BuiltinEntityKind::Ordinal => &["the second"], - BuiltinEntityKind::Temperature => &["Twenty three degrees celsius", "3°C"], + BuiltinEntityKind::AmountOfMoney => &[ + "10$", + "around 5€", + "ten dollars and five cents", + ], + BuiltinEntityKind::Duration => &[ + "1h", + "3 months", + "half an hour", + "8 years and two days", + ], + BuiltinEntityKind::Number => &[ + "2001", + "twenty one", + "three hundred and four", + ], + BuiltinEntityKind::Ordinal => &[ + "1st", + "the second", + "the twenty third", + ], + BuiltinEntityKind::Temperature => &[ + "70K", + "3°C", + "Twenty three degrees", + "one hundred degrees fahrenheit", + ], BuiltinEntityKind::Time => &[ "Today", "4:30 pm", - "next monday at 8p.m.", - "yesterday morning", + "in 1 hour", "3rd tuesday of June", - "June 2nd at 9 pm", ], - BuiltinEntityKind::Percentage => &["twenty percent", "25%"], + BuiltinEntityKind::Percentage => &[ + "25%", + "twenty percent", + "two hundred and fifty percents", + ], + } + } + + fn es_examples(&self) -> &[&str] { + match *self { + BuiltinEntityKind::AmountOfMoney => &[ + "10$", + "cinco euros", + "diez dólares y cinco centavos", + ], + BuiltinEntityKind::Duration => &[ + "1h", + "3 meses", + // TODO: Add these examples when they are supported by the BuiltinEntityParser + // "ciento dos minutos", + // "8 años y dos dias", + ], + BuiltinEntityKind::Number => &[ + "2001", + "diez y ocho", + // TODO: Add these examples when they are supported by the BuiltinEntityParser + // "ciento dos", + // "tres mil nueve", + // "ciento cuarenta y nueve", + ], + BuiltinEntityKind::Ordinal => &[ + "primer", + // TODO: Add these examples when they are supported by the BuiltinEntityParser + // "vigésimo primero", + ], + BuiltinEntityKind::Temperature => &[ + "70K", + "3°C", + "veintitrés grados", + // TODO: Add these examples when they are supported by the BuiltinEntityParser + // "tres mil grados Fahrenheit", + ], + BuiltinEntityKind::Time => &[ + "hoy", + "esta noche", + "a la 1:30", + "el primer jueves de junio", + ], + BuiltinEntityKind::Percentage => &[ + "25%", + "quince porcientos", + "20 por ciento", + // TODO: Add these examples when they are supported by the BuiltinEntityParser + // "tres mil por ciento", + ], + } + } + + fn fr_examples(&self) -> &[&str] { + match *self { + BuiltinEntityKind::AmountOfMoney => &[ + "10$", + "environ 5€", + "dix dollars et cinq centimes", + ], + BuiltinEntityKind::Duration => &[ + "1h", + "3 mois", + "une demi heure", + "8 ans et deux jours", + ], + BuiltinEntityKind::Number => &[ + "2001", + "vingt deux", + "deux cent trois", + "quatre vingt dix neuf" + ], + BuiltinEntityKind::Ordinal => &[ + "1er", + "le deuxième", + "vingt et unieme", + ], + BuiltinEntityKind::Temperature => &[ + "70K", + "3°C", + "vingt trois degrés", + "deux cent degrés Fahrenheit", + ], + BuiltinEntityKind::Time => &[ + "Aujourd'hui", + "à 14:30", + "dans 1 heure", + "le premier jeudi de Juin", + ], + BuiltinEntityKind::Percentage => &[ + "25%", + "20 pourcents", + "quatre vingt dix pourcents", + ], + } + } + + fn ja_examples(&self) -> &[&str] { + match *self { + BuiltinEntityKind::AmountOfMoney => &[ + "八ドル", + "五十二アメリカドル", + ], + BuiltinEntityKind::Duration => &[ + "一秒間", + "五日間", + "十ヶ月間", + ], + BuiltinEntityKind::Number => &[ + "十二", + "二千五", + "四千三百二", + ], + BuiltinEntityKind::Ordinal => &[ + "十一番目", + "九十一番目", + ], + BuiltinEntityKind::Temperature => &[ + "五度", + "二十五度", + "マイナス十度", + ], + BuiltinEntityKind::Time => &[ + "一昨日", + "次の水曜日", + "十三時三十分", + "二千十三年十二月二十三日", + ], + BuiltinEntityKind::Percentage => &[ + "十五%", + "五パーセント", + ], + } + } + + fn ko_examples(&self) -> &[&str] { + match *self { + BuiltinEntityKind::AmountOfMoney => &[ + "10$", + "약 5 유로", + "10 달러 5 센트", + ], + BuiltinEntityKind::Duration => &[ + "양일", + "1시간", + "3 개월", + ], + BuiltinEntityKind::Number => &[ + "2001", + "삼천", + "스물 둘", + "천 아흔 아홉", + ], + BuiltinEntityKind::Ordinal => &[ + "첫", + "첫번째" + ], + BuiltinEntityKind::Temperature => &[ + "5도", + "섭씨 20도", + "화씨 백 도", + ], + BuiltinEntityKind::Time => &[ + "오늘", + "14시 30 분에", + "5 월 첫째 목요일", + ], + BuiltinEntityKind::Percentage => &[], } } } @@ -167,54 +414,74 @@ impl BuiltinEntityKind { pub fn supported_languages(&self) -> &[Language] { match *self { BuiltinEntityKind::AmountOfMoney => &[ - Language::EN, - Language::FR, Language::DE, + Language::EN, Language::ES, + Language::FR, + Language::JA, Language::KO, ], BuiltinEntityKind::Duration => &[ + Language::DE, Language::EN, Language::ES, Language::FR, + Language::JA, Language::KO, - Language::DE, ], BuiltinEntityKind::Number => &[ + Language::DE, Language::EN, Language::ES, Language::FR, + Language::JA, Language::KO, - Language::DE, ], BuiltinEntityKind::Ordinal => &[ + Language::DE, Language::EN, Language::ES, Language::FR, + Language::JA, Language::KO, - Language::DE, ], BuiltinEntityKind::Temperature => &[ + Language::DE, Language::EN, Language::ES, Language::FR, + Language::JA, Language::KO, - Language::DE, ], BuiltinEntityKind::Time => &[ + Language::DE, Language::EN, Language::ES, Language::FR, + Language::JA, Language::KO, - Language::DE, ], - BuiltinEntityKind::Percentage => { - &[Language::EN, Language::ES, Language::FR, Language::DE] - } + BuiltinEntityKind::Percentage => &[ + Language::DE, + Language::EN, + Language::ES, + Language::FR, + Language::JA, + ] } } } +impl BuiltinEntityKind { + pub fn supported_entity_kinds(language: Language) -> Vec { + Self::all() + .to_vec() + .into_iter() + .filter(|e| e.supported_languages().contains(&language)) + .collect() + } +} + #[cfg(test)] mod tests { use super::*; @@ -231,6 +498,18 @@ mod tests { assert_eq!(expected_description, description); } + #[test] + fn test_entity_examples_should_be_provided_for_all_supported_languages() { + for entity_kind in BuiltinEntityKind::all() { + for language in entity_kind.supported_languages() { + let examples = entity_kind.examples(*language); + assert!(examples.len() >= 1, + "No examples provided for entity '{:?}' in language '{:?}'", entity_kind, + language) + } + } + } + #[test] fn test_builtin_entity_ser_de() { let entity = BuiltinEntity { diff --git a/snips-nlu-ontology/src/language.rs b/snips-nlu-ontology/src/language.rs index 7c72e9b..66a8079 100644 --- a/snips-nlu-ontology/src/language.rs +++ b/snips-nlu-ontology/src/language.rs @@ -36,7 +36,7 @@ macro_rules! language_enum { } } -language_enum!([DE, EN, ES, FR, KO, JA]); +language_enum!([DE, EN, ES, FR, JA, KO]); impl Language { pub fn full_name(&self) -> &'static str {