Settings: Add Settings - Measures - Readability - Flesch Reading Ease…

…; Work Area: Remove Profiler - Fernández Huerta's Readability Score / Szigriszt's Perspicuity Index
BLKSerene · Jul 24, 2023 · 641b643 · 641b643
1 parent 54d0e1c
commit 641b643
Show file tree

Hide file tree

Showing 11 changed files with 849 additions and 947 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@
 
 ## [3.3.0](https://github.com/BLKSerene/Wordless/releases/tag/3.3.0) - ??/??/2023
 ### 🎉 New Features
+- Settings: Add Settings - Measures - Readability - Flesch Reading Ease
 - Utils: Add khmer-nltk's Khmer sentence tokenizer, word tokenizer, and part-of-speech tagger
 - Utils: Add PyThaiNLP's perceptron part-of-speech tagger (Blackboard)
 - Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser
@@ -35,6 +36,7 @@
 
 ### ❌ Removals
 - Utils: Remove PyThaiNLP's perceptron part-of-speech tagger (LST20)
+- Work Area: Remove Profiler - Fernández Huerta's Readability Score / Szigriszt's Perspicuity Index
 
 ### ⏫ Dependency Changes
 - Dependencies: Add khmer-nltk

diff --git a/doc/doc_eng.md b/doc/doc_eng.md
diff --git a/doc/measures/readability/fernandez_huertas_readability_score.svg b/doc/measures/readability/fernandez_huertas_readability_score.svg
diff --git a/doc/measures/readability/re.svg b/doc/measures/readability/re.svg
diff --git a/doc/measures/readability/wstf.svg b/doc/measures/readability/wstf.svg
diff --git a/tests/wl_tests_measures/test_measures_readability.py b/tests/wl_tests_measures/test_measures_readability.py
diff --git a/tests/wl_tests_work_area/test_profiler.py b/tests/wl_tests_work_area/test_profiler.py
@@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files):
         count_tokens_lens_syls.append(collections.Counter(len_tokens_syls))
         count_tokens_lens_chars.append(collections.Counter(len_tokens_chars))
 
-        assert len(readability_statistics) == 24
+        assert len(readability_statistics) == 22
 
         # Counts
         assert count_paras

diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py
@@ -216,27 +216,6 @@ def devereux_readability_index(main, text):
 
     return grade_placement
 
-# Fernández Huerta's Readability Score
-# References:
-#     Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32.
-#     Law, Gwillim. (2011, May 27). Error in the Fernandez Huerta readability formula. LINGUIST List. https://linguistlist.org/issues/22/22-2332/
-def fernandez_huertas_readability_score(main, text):
-    if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']:
-        text = get_counts(main, text)
-
-        if text.count_words and text.count_sentences:
-            p = (
-                206.84
-                - 60 * (text.count_syls / text.count_words)
-                - 102 * (text.count_sentences / text.count_words)
-            )
-        else:
-            p = 'text_too_short'
-    else:
-        p = 'no_support'
-
-    return p
-
 # Flesch-Kincaid Grade Level
 # Reference: Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel. Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf
 def flesch_kincaid_grade_level(main, text):
@@ -272,22 +251,71 @@ def flesch_kincaid_grade_level(main, text):
 #     Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf
 # Russian variant:
 #     Oborneva, I. V. (2006). Автоматизированная оценка сложности учебных текстов на основе статистических параметров [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
+# Spanish variant (Fernández Huerta):
+#     Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32.
+#     Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf
+# Spanish variant (Szigriszt Pazos):
+#     Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
 def flesch_reading_ease(main, text):
     if text.lang in main.settings_global['syl_tokenizers']:
         text = get_counts(main, text)
 
         if text.count_words and text.count_sentences:
-            flesch_re = (
-                206.835
-                - 0.846 * (text.count_syls / text.count_words * 100)
-                - 1.015 * (text.count_words / text.count_sentences)
-            )
+            if text.lang == 'nld':
+                re = (
+                    206.84
+                    - 77 * (text.count_syls / text.count_words)
+                    - 0.93 * (text.count_words / text.count_sentences)
+                )
+            elif text.lang == 'fra':
+                re = (
+                    207
+                    - 73.6 * (text.count_syls / text.count_words)
+                    - 1.015 * (text.count_words / text.count_sentences)
+                )
+            elif text.lang.startswith('deu_'):
+                re = (
+                    180
+                    - 58.5 * (text.count_syls / text.count_words)
+                    - (text.count_words / text.count_sentences)
+                )
+            elif text.lang == 'ita':
+                re = (
+                    217
+                    - 60 * (text.count_syls / text.count_words)
+                    - 1.3 * (text.count_words / text.count_sentences)
+                )
+            elif text.lang == 'rus':
+                re = (
+                    206.835
+                    - 60.1 * (text.count_syls / text.count_words)
+                    - 1.3 * (text.count_words / text.count_sentences)
+                )
+            elif text.lang == 'spa':
+                if main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Fernández Huerta':
+                    re = (
+                        206.84
+                        - 60 * (text.count_syls / text.count_words)
+                        - 1.02 * (text.count_words / text.count_sentences)
+                    )
+                elif main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Szigriszt Pazos':
+                    re = (
+                        206.84
+                        - 62.3 * (text.count_syls / text.count_words)
+                        - (text.count_words / text.count_sentences)
+                    )
+            else:
+                re = (
+                    206.835
+                    - 0.846 * (text.count_syls / text.count_words * 100)
+                    - 1.015 * (text.count_words / text.count_sentences)
+                )
         else:
-            flesch_re = 'text_too_short'
+            re = 'text_too_short'
     else:
-        flesch_re = 'no_support'
+        re = 'no_support'
 
-    return flesch_re
+    return re
 
 # Flesch Reading Ease (Simplified)
 # Reference: Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. Journal of Applied Psychology, 35(5), 333–337. https://doi.org/10.1037/h0062427
@@ -689,25 +717,6 @@ def spache_grade_level(main, text):
 
     return grade_level
 
-# Szigriszt's Perspicuity Index
-# Reference: Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
-def szigriszts_perspicuity_index(main, text):
-    if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']:
-        text = get_counts(main, text)
-
-        if text.count_words and text.count_sentences:
-            p = (
-                207
-                - 62.3 * (text.count_syls / text.count_words)
-                - (text.count_words / text.count_sentences)
-            )
-        else:
-            p = 'text_too_short'
-    else:
-        p = 'no_support'
-
-    return p
-
 # Wiener Sachtextformel
 # References:
 #     Bamberger, R., & Vanecek, E. (1984). Lesen – Verstehen – Lernen – Schreiben. Jugend und Volk.

diff --git a/wordless/wl_profiler.py b/wordless/wl_profiler.py
@@ -362,7 +362,6 @@ def __init__(self, parent):
             _tr('wl_profiler', 'Coleman-Liau Index'),
             _tr('wl_profiler', 'Dale-Chall Readability Score'),
             _tr('wl_profiler', 'Devereaux Readability Index'),
-            _tr('wl_profiler', "Fernández Huerta's Readability Score"),
             _tr('wl_profiler', 'Flesch-Kincaid Grade Level'),
             _tr('wl_profiler', 'Flesch Reading Ease'),
             _tr('wl_profiler', 'Flesch Reading Ease (Simplified)'),
@@ -379,7 +378,6 @@ def __init__(self, parent):
             _tr('wl_profiler', 'Rix'),
             _tr('wl_profiler', 'SMOG Grade'),
             _tr('wl_profiler', 'Spache Grade Level'),
-            _tr('wl_profiler', "Szigriszt's Perspicuity Index"),
             _tr('wl_profiler', 'Wiener Sachtextformel')
         ]
 
@@ -1178,7 +1176,6 @@ def run(self):
                         wl_measures_readability.coleman_liau_index(self.main, text),
                         wl_measures_readability.dale_chall_readability_score(self.main, text),
                         wl_measures_readability.devereux_readability_index(self.main, text),
-                        wl_measures_readability.fernandez_huertas_readability_score(self.main, text),
                         wl_measures_readability.flesch_kincaid_grade_level(self.main, text),
                         wl_measures_readability.flesch_reading_ease(self.main, text),
                         wl_measures_readability.flesch_reading_ease_simplified(self.main, text),
@@ -1195,7 +1192,6 @@ def run(self):
                         wl_measures_readability.rix(self.main, text),
                         wl_measures_readability.smog_grade(self.main, text),
                         wl_measures_readability.spache_grade_level(self.main, text),
-                        wl_measures_readability.szigriszts_perspicuity_index(self.main, text),
                         wl_measures_readability.wiener_sachtextformel(self.main, text)
                     ]
                 else:

diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py
@@ -1741,6 +1741,10 @@ def init_settings_default(main):
         'measures': {
             # Settings - Measures - Readability
             'readability': {
+                're': {
+                    'variant_spa': 'Fernández Huerta'
+                },
+
                 'wstf': {
                     'variant': '1'
                 }

diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py
@@ -31,38 +31,62 @@ def __init__(self, main):
         self.settings_default = self.main.settings_default['measures']['readability']
         self.settings_custom = self.main.settings_custom['measures']['readability']
 
+        # Flesch Reading Ease
+        self.group_box_re = QGroupBox(self.tr('Flesch Reading Ease'), self)
+
+        self.label_re_variant_spa = QLabel(self.tr('Spanish variant:'), self)
+        self.combo_box_re_variant_spa = wl_boxes.Wl_Combo_Box(self)
+
+        self.combo_box_re_variant_spa.addItems([
+            'Fernández Huerta',
+            'Szigriszt Pazos'
+        ])
+
+        self.group_box_re.setLayout(wl_layouts.Wl_Layout())
+        self.group_box_re.layout().addWidget(self.label_re_variant_spa, 0, 0)
+        self.group_box_re.layout().addWidget(self.combo_box_re_variant_spa, 0, 1)
+
+        self.group_box_re.layout().setColumnStretch(2, 1)
+
         # Wiener Sachtextformel
         self.group_box_wstf = QGroupBox(self.tr('Wiener Sachtextformel'), self)
 
-        self.label_variant = QLabel(self.tr('Variant:'), self)
-        self.combo_box_variant = wl_boxes.Wl_Combo_Box(self)
+        self.label_wstf_variant = QLabel(self.tr('Variant:'), self)
+        self.combo_box_wstf_variant = wl_boxes.Wl_Combo_Box(self)
 
-        self.combo_box_variant.addItems(['1', '2', '3', '4'])
+        self.combo_box_wstf_variant.addItems(['1', '2', '3', '4'])
 
         self.group_box_wstf.setLayout(wl_layouts.Wl_Layout())
-        self.group_box_wstf.layout().addWidget(self.label_variant, 0, 0)
-        self.group_box_wstf.layout().addWidget(self.combo_box_variant, 0, 1)
+        self.group_box_wstf.layout().addWidget(self.label_wstf_variant, 0, 0)
+        self.group_box_wstf.layout().addWidget(self.combo_box_wstf_variant, 0, 1)
 
         self.group_box_wstf.layout().setColumnStretch(2, 1)
 
         self.setLayout(wl_layouts.Wl_Layout())
-        self.layout().addWidget(self.group_box_wstf, 0, 0)
+        self.layout().addWidget(self.group_box_re, 0, 0)
+        self.layout().addWidget(self.group_box_wstf, 1, 0)
 
         self.layout().setContentsMargins(6, 4, 6, 4)
-        self.layout().setRowStretch(1, 1)
+        self.layout().setRowStretch(2, 1)
 
     def load_settings(self, defaults = False):
         if defaults:
             settings = copy.deepcopy(self.settings_default)
         else:
             settings = copy.deepcopy(self.settings_custom)
 
+        # Flesch Reading Ease
+        self.combo_box_re_variant_spa.setCurrentText(settings['re']['variant_spa'])
+
         # Wiener Sachtextformel
-        self.combo_box_variant.setCurrentText(settings['wstf']['variant'])
+        self.combo_box_wstf_variant.setCurrentText(settings['wstf']['variant'])
 
     def apply_settings(self):
+        # Flesch Reading Ease
+        self.settings_custom['re']['variant_spa'] = self.combo_box_re_variant_spa.currentText()
+
         # Wiener Sachtextformel
-        self.settings_custom['wstf']['variant'] = self.combo_box_variant.currentText()
+        self.settings_custom['wstf']['variant'] = self.combo_box_wstf_variant.currentText()
 
         return True