From cc345d919bdf9b864ca0a4144243b4f261f4f1f9 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Thu, 29 Aug 2024 13:37:15 +0200 Subject: [PATCH 1/8] Update formatRules.txt - number expansion as year --- .../marytts/language/hsb/formatRules.txt | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/main/resources/marytts/language/hsb/formatRules.txt b/src/main/resources/marytts/language/hsb/formatRules.txt index 57e6da6..19b269f 100644 --- a/src/main/resources/marytts/language/hsb/formatRules.txt +++ b/src/main/resources/marytts/language/hsb/formatRules.txt @@ -35,6 +35,7 @@ x.x: << koma >>; 400: štyrista[>>]; 500: <>]; 1000: tysac[>>]; +1100-1999: <%%alt-hundreds<[ >>]; 2000: <>]; 1000000: jedyn milion[ >>]; 2000000: dwaj milionaj[ >>]; @@ -173,13 +174,22 @@ x.x: << koma >>; 80: [>>a]wosomdźesate; 90: [>>a]dźewjećdźesate; 100: stote; -%alt-teens: -=%spellout-numbering=; -1000>: <%%alt-hundreds<[ >>]; -2000: =%spellout-numbering=; %%alt-hundreds: -0: SHOULD NEVER GET HERE!; -10: <%spellout-numbering< tysac; -11: =%spellout-numbering= stow>%%empty>; -%%empty: -0:; +1100: jědnaćestow; +1101-1199: jědnaćestow[ >>]; +1200: dwanaćestow; +1201-1299: dwanaćestow[ >>]; +1300: třinaćestow; +1301-1399: třinaćestow[ >>]; +1400: štyrnaćestow; +1401-1499: štyrnaćestow[ >>]; +1500: pjatnaćestow; +1501-1599: pjatnaćestow[ >>]; +1600: šěsnaćestow; +1601-1699: šěsnaćestow[ >>]; +1700: sydomnaćestow; +1701-1799: sydomnaćestow[ >>]; +1800: wosomnaćestow; +1801-1899: wosomnaćestow[ >>]; +1900: dźewjatnaćestow; +1901-1999: dźewjatnaćestow[ >>]; From d7e00ee1ba9dd1d16e6ed1b57d9aacf0fcd30bd4 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Thu, 29 Aug 2024 14:02:22 +0200 Subject: [PATCH 2/8] Update numbers.csv - annual figures adjusted --- src/test/resources/marytts/language/hsb/numbers.csv | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/test/resources/marytts/language/hsb/numbers.csv b/src/test/resources/marytts/language/hsb/numbers.csv index 8c96cb1..ba30328 100644 --- a/src/test/resources/marytts/language/hsb/numbers.csv +++ b/src/test/resources/marytts/language/hsb/numbers.csv @@ -17,8 +17,11 @@ 665,šěsćstowpjećašěsćdźesat 827,wosomstowsydomadwaceći 1033,tysactřiatřiceći -1500,tysacpjećstow -1984,tysacdźewjećstowštyriawosomdźesat +1217,dwanaćestow sydomnaće +1500,pjatnaćestow +1601,šěsnaćestow jedyn +1854,wosomnaćestow štyriapołsta +1984,dźewjatnaćestow štyriawosomdźesat 5022,pjećtysacdwajadwaceći 10101,dźesaćtysacstojedyn 42099,dwajaštyrcećitysacdźewjećadźewjećdźesat @@ -31,4 +34,4 @@ 375730163855,"třistapjećasydomdźesat miliardow sydomstowtřiceći milionow stotřiašěsćdźesattysacwosomstowpjećapołsta" 2455353628018,"dwaj bilionaj štyristapjećapołsta miliardow třistatřiapołsta milionow šěsćstowwosomadwacećitysacwosomnaće" 1001001001001001,"jedna biliarda jedyn bilion jedna miliarda jedyn milion tysacjedyn" -3.14159,"tři koma jedyn štyri jedyn pjeć dźewjeć" \ No newline at end of file +3.14159,"tři koma jedyn štyri jedyn pjeć dźewjeć" From e2116cb2a9ad4db2fd4be9c7743c977b929ac959 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:20:41 +0200 Subject: [PATCH 3/8] Update formatRules.txt --- .../marytts/language/hsb/formatRules.txt | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/main/resources/marytts/language/hsb/formatRules.txt b/src/main/resources/marytts/language/hsb/formatRules.txt index 19b269f..e1811c3 100644 --- a/src/main/resources/marytts/language/hsb/formatRules.txt +++ b/src/main/resources/marytts/language/hsb/formatRules.txt @@ -35,7 +35,7 @@ x.x: << koma >>; 400: štyrista[>>]; 500: <>]; 1000: tysac[>>]; -1100-1999: <%%alt-hundreds<[ >>]; +1100/100: <%spellout-numbering>]; 2000: <>]; 1000000: jedyn milion[ >>]; 2000000: dwaj milionaj[ >>]; @@ -174,22 +174,6 @@ x.x: << koma >>; 80: [>>a]wosomdźesate; 90: [>>a]dźewjećdźesate; 100: stote; -%%alt-hundreds: -1100: jědnaćestow; -1101-1199: jědnaćestow[ >>]; -1200: dwanaćestow; -1201-1299: dwanaćestow[ >>]; -1300: třinaćestow; -1301-1399: třinaćestow[ >>]; -1400: štyrnaćestow; -1401-1499: štyrnaćestow[ >>]; -1500: pjatnaćestow; -1501-1599: pjatnaćestow[ >>]; -1600: šěsnaćestow; -1601-1699: šěsnaćestow[ >>]; -1700: sydomnaćestow; -1701-1799: sydomnaćestow[ >>]; -1800: wosomnaćestow; 1801-1899: wosomnaćestow[ >>]; 1900: dźewjatnaćestow; 1901-1999: dźewjatnaćestow[ >>]; From aaee9f6b8d6e1543e5aba7a3e0f22966890483b7 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:44:03 +0200 Subject: [PATCH 4/8] Update formatRules.txt formal correction --- src/main/resources/marytts/language/hsb/formatRules.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/resources/marytts/language/hsb/formatRules.txt b/src/main/resources/marytts/language/hsb/formatRules.txt index e1811c3..aa051da 100644 --- a/src/main/resources/marytts/language/hsb/formatRules.txt +++ b/src/main/resources/marytts/language/hsb/formatRules.txt @@ -174,6 +174,3 @@ x.x: << koma >>; 80: [>>a]wosomdźesate; 90: [>>a]dźewjećdźesate; 100: stote; -1801-1899: wosomnaćestow[ >>]; -1900: dźewjatnaćestow; -1901-1999: dźewjatnaćestow[ >>]; From a70b2dcb94590371aa3b12c2b86e064997fb4d79 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Fri, 25 Oct 2024 15:27:38 +0200 Subject: [PATCH 5/8] Update abbreviations.csv Adding abbreviations --- src/main/resources/marytts/language/hsb/abbreviations.csv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/resources/marytts/language/hsb/abbreviations.csv b/src/main/resources/marytts/language/hsb/abbreviations.csv index fcbfeec..4de5582 100644 --- a/src/main/resources/marytts/language/hsb/abbreviations.csv +++ b/src/main/resources/marytts/language/hsb/abbreviations.csv @@ -1,3 +1,5 @@ +"ca.","circa" +"dr.","doktor" "kwart.","kwartal" "kw.","kwartal" "měs.","měsac" From 799ac9d0b7f951e629f0f7a4316d8b7efed5bdc7 Mon Sep 17 00:00:00 2001 From: aStereoID <43568056+aStereoID@users.noreply.github.com> Date: Mon, 4 Nov 2024 10:31:28 +0100 Subject: [PATCH 6/8] header row added --- src/main/resources/marytts/language/hsb/abbreviations.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/marytts/language/hsb/abbreviations.csv b/src/main/resources/marytts/language/hsb/abbreviations.csv index 4de5582..52e9f5a 100644 --- a/src/main/resources/marytts/language/hsb/abbreviations.csv +++ b/src/main/resources/marytts/language/hsb/abbreviations.csv @@ -1,3 +1,4 @@ +"abbreviation","expansion" "ca.","circa" "dr.","doktor" "kwart.","kwartal" From 11b69b45fd9310399057250c7154e53e87eea011 Mon Sep 17 00:00:00 2001 From: aStereoID Date: Mon, 4 Nov 2024 13:45:35 +0100 Subject: [PATCH 7/8] Add further abbreviations --- .../resources/marytts/language/hsb/abbreviations.csv | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/resources/marytts/language/hsb/abbreviations.csv b/src/main/resources/marytts/language/hsb/abbreviations.csv index 52e9f5a..6031c09 100644 --- a/src/main/resources/marytts/language/hsb/abbreviations.csv +++ b/src/main/resources/marytts/language/hsb/abbreviations.csv @@ -1,4 +1,13 @@ "abbreviation","expansion" +"A0","a nula" +"A1","a jedyn" +"A2","a dwaj" +"A3","a tři" +"A4","a štyri" +"A5","a pjeć" +"A6","a šěsć" +"A7","a sydom" +"1989/90","dźewjatnaćestow dźewjećawosomdźesat dźewjećdźesat" "ca.","circa" "dr.","doktor" "kwart.","kwartal" From 51c3f1547d855846ff4028cef20fad4cada72431 Mon Sep 17 00:00:00 2001 From: Ingmar Steiner Date: Thu, 7 Nov 2024 18:35:18 +0100 Subject: [PATCH 8/8] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 049089f..76b4d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Upper Sorbian language component for MaryTTS - Abbreviation expansion in preprocessing +### Fixed + +- Number expansion for years 1100-1999 + [v0.2.0] - 2024-05-12 ---------------------