Skip to content

Commit

Permalink
fix further encoding issues
Browse files Browse the repository at this point in the history
  • Loading branch information
andreaspacher committed Mar 18, 2021
1 parent 7399238 commit 4bb4051
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions Script/clean-final-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,15 @@ WRONG <- c("<U+0096>", "<U+0092>", "<U+0097>", "<U+00A0>", "<U+009A>",
"<U+00AD>", "<U+00AE>", "<U+00AB>", "<U+00BB>", "<U+0099>",
"<U+0203>", "<U+0392>", "<U+0421>", "<U+041C>", "<U+202A>",
"<U+039A>", "<U+0441>", "<U+0410>", "<U+00A9>", "<U+200E>",
"<U+00B3>", "<U+02BD>", "<U+1EA1>", "<U+1ECD>", "<U+1ED9>")
"<U+00B3>", "<U+02BD>", "<U+1EA1>", "<U+1ECD>", "<U+1ED9>",
"<U+04E7>", "<U+1ECC>", "<U+00A4>", "<U+0087>", "<U+E524>",
"<U+00A2>", "<U+1EE7>", "<U+1EE9>", "<U+009C>", "<U+00BA>",
"<U+0084>", "<U+00B2>", "<U+00B0>", "<U+021A>", "<U+03A4>",
"<U+0391>", "<U+041E>", "<U+041F>", "<U+0413>", "<U+0422>",
"<U+0423>", "<U+03C1>", "<U+039F>", "<U+0095>", "<U+03B3>",
"<U+0384>", "<U+03BF>", "<U+03BD>", "<U+03C5>", "<U+2009>",
"<U+00AC>", "<U+03CE>", "<U+03BA>", "<U+03B9>", "<U+202F>",
"<U+00B8>", "<U+0412>", "<U+0430>")

RIGHT <- c("", "'", "", " ", "š",
"ʻ", "ž", "", "", "",
Expand All @@ -80,7 +88,15 @@ RIGHT <- c("–", "'", "—", " ", "š",
"-", "®", "«", "»", "",
"ȃ", "B", "C", "M", "",
"K", "c", "A", "©", "",
"³", "ʽ", "", "", "")
"³", "ʽ", "", "", "",
"ö", "", "¤", "", " ",
"¢", "", "", "œ", "°",
" ", "²", "°", "Ț", "T",
"A", "О", "П", "Г", "Т",
"У", "ρ", "Ο", "", "γ",
"´", "ο", "ν", "υ", " ",
"¬", "ώ", "κ", "ι", " ",
"¸", "B", "a")

editors$editor <- stringi::stri_replace_all_fixed(
editors$editor,
Expand Down

0 comments on commit 4bb4051

Please sign in to comment.