Skip to content

Commit c33489a

Browse files
committed
Put casing of &ADDED word on first non-added to the right
Also changes withCasing to apply lowering where casing==lower – should have no effect unless we're changing casing like this, since getCasing only returns lower if there are no uppers. Closes #77
1 parent 9496db4 commit c33489a

File tree

4 files changed

+31
-17
lines changed

4 files changed

+31
-17
lines changed

src/suggest.cpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -576,22 +576,11 @@ if(verbose) std::cerr << "\033[1;33mright=\t" << i_right << "\033[0m" << std::en
576576
UStringVector reps = {u""};
577577
UStringVector reps_suggestwf = {}; // If we're doing SUGGESTWF, we ignore reps
578578
string prev_added_before_blank = "";
579+
std::optional<Casing> addedcasing = std::nullopt;
579580
for (size_t i = i_left; i <= i_right; ++i) {
580581
const auto& trg = sentence.cohorts[i];
581582
Casing casing = getCasing(toUtf8(trg.form));
582583

583-
// std::cerr << "\033[0;35mtrg.added=\t" << trg.added << " i=" << i << "i_left" << i_left<< "\033[0m" << std::endl;
584-
if(trg.added) {
585-
for(size_t j = i; j <= i_right; j++) {
586-
const auto& right_of_trg = sentence.cohorts[j];
587-
if(!right_of_trg.added) {
588-
// std::cerr << "\033[1;35mright_of_added=\t" << toUtf8(right_of_trg.form) << " j=" << j << "\033[0m" << std::endl;
589-
casing = getCasing(toUtf8(right_of_trg.form));
590-
break;
591-
}
592-
}
593-
}
594-
595584
if(verbose) std::cerr << "\033[1;34mi=\t" << i << "\033[0m" << std::endl;
596585
if(verbose) std::cerr << "\033[1;34mtrg.form=\t'" << toUtf8(trg.form) << "'\033[0m" << std::endl;
597586
if(verbose) std::cerr << "\033[1;34mtrg.id=\t" << trg.id << "\033[0m" << std::endl;
@@ -604,6 +593,23 @@ if(verbose) std::cerr << "\033[1;35mtrg.raw_pre_blank=\t'" << trg.raw_pre_blank
604593
if(verbose) std::cerr << "\t\t\033[1;36mdelete=\t" << toUtf8(trg.form) << "\033[0m" << std::endl;
605594
}
606595

596+
if(trg.added) {
597+
// This word was added, get casing from a non-added word to the right:
598+
for(size_t j = i; j <= i_right; j++) {
599+
const auto& right_of_trg = sentence.cohorts[j];
600+
if(!right_of_trg.added) {
601+
addedcasing = casing;
602+
casing = getCasing(toUtf8(right_of_trg.form));
603+
break;
604+
}
605+
}
606+
}
607+
else if(addedcasing.has_value() && !del) {
608+
// This word was not &ADDED, but is preceded by an added word:
609+
casing = addedcasing.value();
610+
addedcasing = std::nullopt;
611+
}
612+
607613
bool added_before_blank = false;
608614
bool fixedcase = false;
609615
bool applies_deletion = trg.id == src.id && src_applies_deletion;

src/suggest.hpp

+11-3
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,12 @@
4242
# include <hfst/HfstTransducer.h>
4343
// variants:
4444
# include <variant>
45+
# include <optional>
4546

4647
namespace divvun {
4748

4849
using std::variant;
50+
using std::optional;
4951
using std::pair;
5052
using std::string;
5153
using std::stringstream;
@@ -144,10 +146,16 @@ inline std::string totitle(const string& input) {
144146
std::transform(w.begin(), w.begin() + 1, w.begin(), std::towupper);
145147
return wideToUtf8(w);
146148
}
149+
150+
inline std::string tolower(const string& input) {
151+
std::wstring w = wideFromUtf8(input);
152+
setlocale(LC_ALL, "");
153+
std::transform(w.begin(), w.begin() + 1, w.begin(), std::towlower);
154+
return wideToUtf8(w);
155+
}
147156
// #endif
148157

149-
inline std::string withCasing(
150-
bool fixedcase, const Casing& inputCasing, const string& input) {
158+
inline std::string withCasing(bool fixedcase, const Casing& inputCasing, const string& input) {
151159
if (fixedcase) {
152160
return input;
153161
}
@@ -159,7 +167,7 @@ inline std::string withCasing(
159167
case mIxed:
160168
return input;
161169
case lower:
162-
return input;
170+
return tolower(input);
163171
}
164172
// should never get to this point
165173
return input;

test/suggest/expected.move-after.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"}
1+
{"errs":[["Nrel mellom nabs",4,20,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"før Nrel mellom nabs"}

test/suggest/expected.move.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs Nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"}
1+
{"errs":[["Nrel mellom nabs",0,16,"syn-abs-wordorder","syn-abs-wordorder",["Nabs nrel mellom"],"syn-abs-wordorder"]],"text":"Nrel mellom nabs"}

0 commit comments

Comments
 (0)