Skip to content

Commit

Permalink
✨ Put <description> in fifth field of json output, <title> in seventh
Browse files Browse the repository at this point in the history
We're now actually parsing <description> from XML :)

Also, allow <description> alongside <label> in pipespec
<option>'s (under prefs)

+ add a schemas.xml for editor
  • Loading branch information
unhammer committed Feb 27, 2019
1 parent fe5c34c commit a73e9e5
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/checkertypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ namespace divvun {
*/

typedef std::string Lang;
typedef std::u16string Msg;
typedef std::pair<std::u16string, std::u16string> Msg;
typedef std::u16string ErrId;
typedef std::basic_regex<char> ErrRe;

Expand Down
2 changes: 1 addition & 1 deletion src/errors.dtd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<!-- Copyright (C) 2016-2017, Kevin Brubeck Unhammer <[email protected]> -->
<!-- Copyright (C) 2016-2019, Kevin Brubeck Unhammer <[email protected]> -->

<!-- This program is free software: you can redistribute it and/or modify -->
<!-- it under the terms of the GNU General Public License as published by -->
Expand Down
3 changes: 2 additions & 1 deletion src/errors.rnc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2016-2017, Kevin Brubeck Unhammer <[email protected]>
# THIS IS A GENERATED FILE, DO NOT EDIT!
# Copyright (C) 2016-2019, Kevin Brubeck Unhammer <[email protected]>

# This program is free software: you can redistribute it and/or modify

Expand Down
6 changes: 3 additions & 3 deletions src/main_checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,16 @@ void printPrefs(const Pipeline& pipeline) {
const Prefs& prefs = lp.second;
std::cout << "==== Toggles: ====" << std::endl;
for(const auto& id : prefs.toggleIds) {
std::cout << "- [ ] " << toUtf8(id.first) << " \t" << toUtf8(id.second) << std::endl;
std::cout << "- [ ] " << toUtf8(id.first) << " \t" << toUtf8(id.second.first) << std::endl;
}
for(const auto& re : prefs.toggleRes) {
std::cout << "- [ ] [regex] \t" << toUtf8(re.second) << std::endl;
std::cout << "- [ ] [regex] \t" << toUtf8(re.second.first) << std::endl;
}
std::cout << "==== Options: ====" << std::endl;
for(const Option& o : prefs.options) {
std::cout << "- " << o.name << " (" << o.type << "):" << std::endl;
for(const auto& c : o.choices) {
std::cout << "- ( ) " << toUtf8(c.first) << " \t" << toUtf8(c.second) << std::endl;
std::cout << "- ( ) " << toUtf8(c.first) << " \t" << toUtf8(c.second.first) << std::endl;
}
}
}
Expand Down
16 changes: 14 additions & 2 deletions src/pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,23 @@ inline void parsePrefs(LocalisedPrefs& prefs, const pugi::xml_node& cmd) {
unordered_map<Lang, unordered_map<ErrId, Msg>> lems;
for (const pugi::xml_node& option: pref.children()) {
const auto errId = fromUtf8(option.attribute("err-id").value());
for (const pugi::xml_node& label: option.children()) {
for (const pugi::xml_node& label: option.children("label")) {
const auto lang = label.attribute("xml:lang").value();
const auto msg = fromUtf8(label.text().get()); // or xml_raw_cdata(label);
lems[lang][errId] = msg;
// Let <description> default to <label> first:
lems[lang][errId] = std::make_pair(msg, msg);
}
for (const pugi::xml_node& description: option.children("description")) {
const auto lang = description.attribute("xml:lang").value();
const auto msg = fromUtf8(description.text().get());
if(lems[lang].find(errId) != lems[lang].end()) {
lems[lang][errId].second = msg;
}
else {
// No <label> for this language, fallback to <description>:
lems[lang][errId] = std::make_pair(msg, msg);
}
}
}
for(const auto& lem : lems) {
const Lang& lang = lem.first;
Expand Down
9 changes: 7 additions & 2 deletions src/pipespec.dtd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<!-- Copyright (C) 2017, Kevin Brubeck Unhammer <[email protected]> -->
<!-- Copyright (C) 2017–2019, Kevin Brubeck Unhammer <[email protected]> -->

<!-- This program is free software: you can redistribute it and/or modify -->
<!-- it under the terms of the GNU General Public License as published by -->
Expand Down Expand Up @@ -57,7 +57,7 @@
<!-- type and name are *not* unique here; several pipelines may have a pref for e.g. "Oxford comma" -->


<!ELEMENT option (label+)>
<!ELEMENT option ((label|description)+)>
<!ATTLIST option
err-id CDATA #REQUIRED
>
Expand All @@ -69,6 +69,11 @@
xml:lang CDATA #REQUIRED
>

<!ELEMENT description %Text;>
<!ATTLIST description
xml:lang CDATA #REQUIRED
>

<!-- General "system" command – pipelines with this can only be used
in settings where we can open processes: -->
<!ELEMENT sh (arg*)> <!-- NOT IMPLEMENTED YET -->
Expand Down
7 changes: 5 additions & 2 deletions src/pipespec.rnc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# THIS IS A GENERATED FILE, DO NOT EDIT!
# Copyright (C) 2017, Kevin Brubeck Unhammer <[email protected]>
# Copyright (C) 2017–2019, Kevin Brubeck Unhammer <[email protected]>

# This program is free software: you can redistribute it and/or modify

Expand Down Expand Up @@ -72,13 +72,16 @@ attlist.pref &=
attribute type { text },
attribute name { text }
# type and name are *not* unique here; several pipelines may have a pref for e.g. "Oxford comma"
option = element option { attlist.option, label+ }
option = element option { attlist.option, (label | description)+ }
attlist.option &= attribute err-id { text }

Text = (text | em)*
label = element label { attlist.label, Text }
attlist.label &= attribute xml:lang { text }

description = element description { attlist.description, Text }
attlist.description &= attribute xml:lang { text }

# General "system" command – pipelines with this can only be used
# in settings where we can open processes:
sh = element sh { attlist.sh, arg* }
Expand Down
72 changes: 54 additions & 18 deletions src/suggest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,36 +89,66 @@ const MsgMap readMessagesXml(pugi::xml_document& doc, pugi::xml_parse_result& re
MsgMap msgs;

if (result) {
// <default>'s:
for (pugi::xml_node def: doc.child("errors").child("defaults").children("default")) {
// std::cerr << "defaults" << std::endl;
// For all <title>'s and <description>'s, add all their parent <id>/<re>'s:
for (pugi::xml_node child: def.child("header").children("title")) {
const auto& msg = fromUtf8(xml_raw_cdata(child));
const auto& lang = child.attribute("xml:lang").value();
for (pugi::xml_node e: def.child("ids").children("e")) {
// e_value assumes we only ever have one PCDATA element here:
const auto& errtype = fromUtf8(e.attribute("id").value());
// std::cerr << toUtf8(errtype) << std::endl;
if(msgs[lang].first.count(errtype) != 0) {
std::cerr << "divvun-suggest: WARNING: Duplicate titles for " << e.attribute("id").value() << std::endl;
}
msgs[lang].first[errtype] = msg;
// Default to <title> as <description>, may be overridden below:
msgs[lang].first[errtype] = make_pair(msg, msg);
}
for (pugi::xml_node re: def.child("ids").children("re")) {
std::basic_regex<char> r(re.attribute("v").value());
msgs[lang].second.push_back(std::make_pair(r, msg));
msgs[lang].second.push_back(std::make_pair(r, make_pair(msg, msg)));
}
}
for (pugi::xml_node child: def.child("body").children("description")) {
const auto& msg = fromUtf8(xml_raw_cdata(child));
const auto& lang = child.attribute("xml:lang").value();
for (pugi::xml_node e: def.child("ids").children("e")) {
const auto& errtype = fromUtf8(e.attribute("id").value());
auto &langmsgs = msgs[lang].first;
if (langmsgs.find(errtype) != langmsgs.end()) {
langmsgs[errtype].second = msg;
} else {
// No <title> for this language, fallback to <description>:
langmsgs[errtype] = std::make_pair(msg, msg);
}
}
}
}
// <error>'s
for (pugi::xml_node error: doc.child("errors").children("error")) {
const auto& errtype = fromUtf8(error.attribute("id").value());
// For all <title>'s and <description>'s, add the <error id> attribute:
for (pugi::xml_node child: error.child("header").children("title")) {
// child_value assumes we only ever have one PCDATA element here:
const auto& errtype = fromUtf8(error.attribute("id").value());
const auto& msg = fromUtf8(xml_raw_cdata(child));
const auto& lang = child.attribute("xml:lang").value();
if(msgs[lang].first.count(errtype) != 0) {
std::cerr << "divvun-suggest: WARNING: Duplicate titles for " << error.attribute("id").value() << std::endl;
auto& langmsgs = msgs[lang].first;
if(langmsgs.count(errtype) != 0) {
std::cerr << "divvun-suggest: WARNING: Duplicate <title>'s for " << error.attribute("id").value() << std::endl;
}
langmsgs[errtype] = make_pair(msg, msg);
}
for (pugi::xml_node child: error.child("body").children("description")) {
const auto& msg = fromUtf8(xml_raw_cdata(child));
const auto& lang = child.attribute("xml:lang").value();
auto& langmsgs = msgs[lang].first;
if(langmsgs.find(errtype) != langmsgs.end()) {
langmsgs[errtype].second = msg;
}
else {
// No <title> for this language, fallback to <description>:
langmsgs[errtype] = std::make_pair(msg, msg);
}
msgs[lang].first[errtype] = msg;
}
}
}
Expand Down Expand Up @@ -397,10 +427,10 @@ variant<Nothing, Err> Suggest::cohort_errs(const ErrId& err_id,
if(cohort_empty(c) || c.added || ignores.find(err_id) != ignores.end()) {
return Nothing();
}
u16string msg;
Msg msg;
for(const auto& mlang : sortedmsglangs) {
if(msg.empty() && mlang != locale) {
std::cerr << "divvun-suggest: WARNING: No message for " << json::str(err_id) << " in xml:lang '" << locale << "', trying '" << mlang << "'" << std::endl;
if(msg.second.empty() && mlang != locale) {
std::cerr << "divvun-suggest: WARNING: No <description> for " << json::str(err_id) << " in xml:lang '" << locale << "', trying '" << mlang << "'" << std::endl;
}
const auto& lmsgs = msgs.at(mlang);
if(lmsgs.first.count(err_id) != 0) {
Expand All @@ -421,23 +451,28 @@ variant<Nothing, Err> Suggest::cohort_errs(const ErrId& err_id,
}
}
}
if(!msg.empty()) {
if(!msg.second.empty()) {
break;
}
}
if(msg.empty()) {
std::cerr << "divvun-suggest: WARNING: No message for " << json::str(err_id) << " in any xml:lang" << std::endl;
msg = err_id;
if(msg.second.empty()) {
std::cerr << "divvun-suggest: WARNING: No <description> for " << json::str(err_id) << " in any xml:lang" << std::endl;
msg.second = err_id;
}
if(msg.first.empty()) {
msg.first = err_id;
}
// TODO: Make suitable structure on creating MsgMap instead?
replaceAll(msg, u"$1", c.form);
replaceAll(msg.first, u"$1", c.form);
replaceAll(msg.second, u"$1", c.form);
for(const auto& r: c.readings) {
if((!r.errtype.empty()) && err_id != r.errtype) {
continue;
}
rel_on_match(r.rels, MSG_TEMPLATE_REL, sentence,
[&] (const string& relname, size_t i_t, const Cohort& trg) {
replaceAll(msg, fromUtf8(relname.c_str()), trg.form);
replaceAll(msg.first, fromUtf8(relname.c_str()), trg.form);
replaceAll(msg.second, fromUtf8(relname.c_str()), trg.form);
});
}
auto beg = c.pos;
Expand Down Expand Up @@ -770,8 +805,9 @@ RunState Suggest::run_json(std::istream& is, std::ostream& os)
<< "," << std::to_string(e.beg)
<< "," << std::to_string(e.end)
<< "," << json::str(e.err)
<< "," << json::str(e.msg)
<< "," << json::str(e.msg.second)
<< "," << json::str_arr(e.rep)
<< "," << json::str(e.msg.first)
<< "]";
wantsep = true;
}
Expand Down
1 change: 1 addition & 0 deletions test/checker/pipespec.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<pref type="Punctuation" name="Tusenskilje">
<option err-id="tusen-mellom">
<label xml:lang="nn">Eg vil ha mellomrom mellom 000</label>
<description xml:lang="nn">Det er lov med anten mellomrom eller punktum som skiljeteikn for tal over tusen.</description>
</option>
<option err-id="tusen-punktum">
<label xml:lang="nn">Eg vil ha punktum mellom 000</label>
Expand Down
19 changes: 19 additions & 0 deletions test/checker/schemas.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0"?>
<locatingRules xmlns="http://thaiopensource.com/ns/locating-rules/1.0">

<!--
Having this file in the same dir as your XML's
lets Emacs show validation errors.
More information at:
https://www.gnu.org/software/emacs/manual/html_node/nxml-mode/Schema-locating-files.html#Schema-locating-files
-->

<typeId id="errors" uri="../../src/errors.rnc"/>
<documentElement localName="errors" typeId="errors"/>
<uri pattern="errors.xml" typeId="errors"/>

<typeId id="pipespec" uri="../../src/pipespec.rnc"/>
<documentElement localName="pipespec" typeId="pipespec"/>
<uri pattern="pipespec.xml" typeId="pipespec"/>

</locatingRules>

0 comments on commit a73e9e5

Please sign in to comment.