From 8ff158d444892022cbbfaa4b94c1b7c9063dc827 Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Tue, 25 Jul 2023 19:03:16 +0800 Subject: [PATCH] Settings: Add Settings - Measures - Readability - Bormuth's Grade Placement; Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement --- CHANGELOG.md | 3 +- doc/doc_eng.md | 193 +++++----- doc/measures/d_f.svg | 12 - doc/measures/f_bar.svg | 11 - .../readability/bormuths_cloze_mean_gp.svg | 331 ++++++++++++++++++ .../readability/spache_grade_level.svg | 113 +++--- .../szigriszts_perspicuity_index.svg | 82 ----- .../readability/{xc50.svg => x_c50.svg} | 133 ++++--- .../test_measures_readability.py | 44 +++ tests/wl_tests_work_area/test_profiler.py | 2 +- .../wl_measures/wl_measures_readability.py | 47 ++- wordless/wl_profiler.py | 4 + wordless/wl_settings/wl_settings_default.py | 4 + wordless/wl_settings/wl_settings_measures.py | 28 +- 14 files changed, 680 insertions(+), 327 deletions(-) delete mode 100644 doc/measures/d_f.svg delete mode 100644 doc/measures/f_bar.svg create mode 100644 doc/measures/readability/bormuths_cloze_mean_gp.svg delete mode 100644 doc/measures/readability/szigriszts_perspicuity_index.svg rename doc/measures/readability/{xc50.svg => x_c50.svg} (84%) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9824162f..ba214f00e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,12 +20,13 @@ ## [3.3.0](https://github.com/BLKSerene/Wordless/releases/tag/3.3.0) - ??/??/2023 ### 🎉 New Features -- Settings: Add Settings - Measures - Readability - Flesch Reading Ease +- Settings: Add Settings - Measures - Readability - Bormuth's Grade Placement / Flesch Reading Ease - Utils: Add khmer-nltk's Khmer sentence tokenizer, word tokenizer, and part-of-speech tagger - Utils: Add PyThaiNLP's perceptron part-of-speech tagger (Blackboard) - Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser - Utils: Add spaCy's Malay word tokenizer - Utils: Add spaCy's Slovenian sentence recognizer, part-of-speech tagger, lemmatizer, and dependency parser +- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement ### ✨ Improvements - Utils: Update Wordless's sentence and sentence segment splitters diff --git a/doc/doc_eng.md b/doc/doc_eng.md index e9201160c..7ff37ec57 100644 --- a/doc/doc_eng.md +++ b/doc/doc_eng.md @@ -521,7 +521,7 @@ You can further filter the results as you see fit by clicking **Filter Results** - **3.7.3 Collocate**
The collocating token. You can specify what should be counted as a "token" via **Token Settings**. -- **3.7.4 Ln, ... , L3, L2, L1, R1, R2, R3, ... , Rn**
+- **3.7.4 Ln, ..., L3, L2, L1, R1, R2, R3, ..., Rn**
The number of co-occurrences of the node and the collocating token with the collocating token at the given position in each file. - **3.7.5 Frequency**
@@ -571,7 +571,7 @@ You can further filter the results as you see fit by clicking **Filter Results** - **3.8.3 Collocate**
The collocating part of speech. You can specify what should be counted as a "token" via **Token Settings**. -- **3.8.4 Ln, ... , L3, L2, L1, R1, R2, R3, ... , Rn**
+- **3.8.4 Ln, ..., L3, L2, L1, R1, R2, R3, ..., Rn**
The number of co-occurrences of the node and the collocating part of speech with the collocating part of speech at the given position in each file. - **3.8.5 Frequency**
@@ -900,11 +900,13 @@ Vietnamese |CP1258 |✔ The readability of a text depends on several variables including the average sentence length, average word length in characters, average word length in syllables, number of monosyllabic words, number of polysyllabic words, number of difficult words, etc. -It should be noted that some readability tests are **language-specific**, or applicable only to files of languages for which *Wordless* have **built-in syllable tokenization support** (check [4.4.1](#doc-4-1) for reference), while others can be applied to files of all languages. +It should be noted that some readability measures are **language-specific**, or applicable only to texts in languages for which *Wordless* have **built-in syllable tokenization support** (check [4.4.1](#doc-4-1) for reference), while others can be applied to texts in all languages. These variables are used in the following formulas:
**NumSentences**: Number of sentences in the text or sample
**NumWords**: Number of words in the text or sample
+**NumWordsDale₇₆₉**: Number of words outside the Dale list of 769 easy words ([Dale, 1931](#ref-dale-1931))
+**NumWordsDale₃₀₀₀**: Number of words outside the Dale list of 3000 easy words ([Dale & Chall, 1948b](#ref-dale-chall-1948b))
**NumWordsMonosyllabic**: Number of monosyllabic words
**NumSyls**: Number of syllable in the text or sample
**NumCharsAll**: Number of characters (including letters, CJK characters, etc., numerals, and punctuation marks) in the text or sample
@@ -918,6 +920,14 @@ Automated Arabic Readability Index: Automated Readability Index: ARI = 0.5 \times \frac{NumWords}{NumSentences} + 4.71 \times \frac{NumCharsAll}{NumWords} - 21.43 +Bormuth's Cloze Mean & Grade Placement: + \begin{align*} + M = \; &0.886593 - 0.083640 \times \frac{NumChars}{NumWords} + 0.161911 \times \left(\frac{NumWordsDale_{3000}}{NumWords}\right)^3 - \\ + &0.021401 \times \frac{NumWords}{NumSentences} + 0.000577 \times \left(\frac{NumWords}{NumSentences}\right)^2 - 0.000005 \times \left(\frac{NumWords}{NumSentences}\right)^3 \\ + GP = \; &4.275 + 12.881 \times M - 34.934 \times M^2 + 20.388 \times M^3 + 26.194 \times C - 2.046 \times C^2 - 11.767 \times C^3 - \\ + &44.285 \times \left(M \times C\right) + 97.620 \times \left(M \times C\right)^2 - 59.538 \times \left(M \times C\right)^3 + \end{align*} + Coleman-Liau Index: \begin{align*} Estimated \; Cloze \; % &= 141.8401 - 0.21459 \times \left(\frac{NumCharsAlphabetic}{NumWords} \times 100\right) + 1.079812 \times \left(\frac{NumSentences}{NumWords} \times 100\right) \\ @@ -925,7 +935,7 @@ Coleman-Liau Index: \end{align*} Dale-Chall Readability Score: - {X_{c50} = 0.1579 \times \frac{NumDifficultWords}{NumWords} + 0.0496 \times \frac{NumWords}{NumSentences} + 3.6365} + X_{c50} = 0.1579 \times \frac{NumWordsDale_{3000}}{NumWords} + 0.0496 \times \frac{NumWords}{NumSentences} + 3.6365 Devereux Readability Index: Grade \; Placement = 1.56 \times \frac{NumCharsAll}{NumWords} + 0.19 \times \frac{NumWords}{NumSentences} - 6.49 @@ -987,7 +997,7 @@ SMOG Grade: g = 3.1291 + 1.043 \times \sqrt{NumWordsPolysyllabic} Spache Grade Level: - {Grade \; Level = 0.141 \times \frac{100}{NumSentences} + 0.086 \times \left(\frac{NumDifficultWords}{100} \times 100\right) + 0.839} + {Grade \; Level = 0.141 \times \frac{100}{NumSentences} + 0.086 \times \left(\frac{NumWordsDale_{769}}{100} \times 100\right) + 0.839} Wiener Sachtextformel: \begin{align*} @@ -1002,28 +1012,29 @@ Wiener Sachtextformel: Measure of Readability|Formula ----------------------|------- -Automated Arabic Readability Index
([Al-Tamimi et al., 2013](#ref-altamimi-et-al-2013))|![Formula](/doc/measures/readability/aari.svg)

* This test applies only to **Arabic texts**. +Automated Arabic Readability Index
([Al-Tamimi et al., 2013](#ref-altamimi-et-al-2013))|![Formula](/doc/measures/readability/aari.svg)

* This measure applies only to **Arabic texts**. Automated Readability Index
([Smith & Senter, 1967](#ref-smith-senter-1967))|![Formula](/doc/measures/readability/ari.svg) +Bormuth's Cloze Mean & Grade Placement
([Bormuth, 1969](#ref-bormuth-1969))|![Formula](/doc/measures/readability/bormuths_cloze_mean_gp.svg)
where **C** is the cloze criterion score, whose value could be changed via **Menu → Preferences → Settings → Measures → Readability → Bormuth's Grade Placement - Cloze criterion score**

* This measure applies only to **English texts**. Coleman-Liau Index
([Coleman & Liau, 1975](#ref-coleman-liau-1975))|![Formula](/doc/measures/readability/coleman_liau_index.svg) -Dale-Chall Readability Score
([Dale & Chall, 1948a](#ref-dale-chall-1948a))|![Formula](/doc/measures/readability/xc50.svg)
where **NumDifficultWords** is the number of words outside the Dale list of 3000 easy words ([Dale & Chall, 1948b](#ref-dale-chall-1948b)).

* This test applies only to **English texts**. +Dale-Chall Readability Score
([Dale & Chall, 1948a](#ref-dale-chall-1948a))|![Formula](/doc/measures/readability/x_c50.svg)

* This measure applies only to **English texts**. Devereux Readability Index
([Smith, 1961](#ref-smith-1961))|![Formula](/doc/measures/readability/devereux_readability_index.svg) Flesch-Kincaid Grade Level1
([Kincaid et al., 1975](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/flesch_kincaid_grade_level.svg) -Flesch Reading Ease1
([Flesch, 1948](#ref-flesch-1948)
Dutch: [Douma, 1960](#ref-douma-1960); [Brouwer, 1963](#ref-brouwer-1963)
French: [Kandel & Moles, 1958](#ref-kandel-moles-1958)
German: [Amstad, 1978](#ref-amstad-1978)
Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986)
Russian: [Oborneva, 2006](#ref-oborneva-2006)
Spanish: [Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/re.svg)

* This test has multiple variants for some languages, which you could select via **Menu → Preferences → Settings → Measures → Readability → Flesch Reading Ease**. +Flesch Reading Ease1
([Flesch, 1948](#ref-flesch-1948)
Dutch: [Douma, 1960](#ref-douma-1960); [Brouwer, 1963](#ref-brouwer-1963)
French: [Kandel & Moles, 1958](#ref-kandel-moles-1958)
German: [Amstad, 1978](#ref-amstad-1978)
Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986)
Russian: [Oborneva, 2006](#ref-oborneva-2006)
Spanish: [Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/re.svg)

* This measure has multiple variants for some languages, which you could select via **Menu → Preferences → Settings → Measures → Readability → Flesch Reading Ease**. Flesch Reading Ease (Simplified)1
([Farr et al., 1951](#ref-farr-et-al-1951))|![Formula](/doc/measures/readability/re_simplified.svg) FORCAST Grade Level1
([Caylor et al., 1973](#ref-caylor-et-al-1973))|![Formula](/doc/measures/readability/rgl.svg)

* A sample of 150 words is taken randomly from the text, thus the text should be **at least 150 words long**. -Fórmula de comprensibilidad de Gutiérrez de Polini
([Gutiérrez de Polini, 1972](#ref-gutierrez-de-polini-1972))|![Formula](/doc/measures/readability/cp.svg)

* This test applies only to **Spanish texts**. -Fórmula de Crawford1
([Crawford, 1985](#ref-crawford-1985))|![Formula](/doc/measures/readability/formula_de_crawford.svg)

* This test applies only to **Spanish texts**. -Gulpease Index
([Lucisano & Emanuela Piemontese, 1988](#ref-lucisano-emanuela-piemontese-1988))|![Formula](/doc/measures/readability/gulpease_index.svg)

* This test applies only to **Italian texts**. -Gunning Fog Index1
(English: [Gunning, 1968](#ref-gunning-1968)
Polish: [Pisarek, 1969](#ref-pisarek-1969))|![Formula](/doc/measures/readability/fog_index.svg)
where **NumHardWords** is the number of words with 3 or more syllables excluding all proper nouns and words with 3 syllables ending with *-ed* or *-es* for **English texts**, and the number of words with 4 or more syllables for **Polish texts**.

* This test applies only to **English texts** and **Polish texts**. -Legibilidad µ
([Muñoz Baquedano, 2006](#ref-munoz-baquedano-2006))|![Formula](/doc/measures/readability/mu.svg)
where **LenWordsAvg** is the average word length in letters, and **LenWordsVar** is the variance of word lengths in letters.

* This test applies only to **Spanish texts**.
* The text should be **at least 2 words long**. -Lensear Write1
([O’Hayre, 1966](#ref-o-hayre-1966))|![Formula](/doc/measures/readability/lensear_write.svg)
where **NumWordsMonosyllabic** is the number of monosyllabic words excluding *the*, *is*, *are*, *was*, *were*, and **NumSentences** is the number of sentences to the nearest period.

* This test applies only to **English texts**.
* A sample of 100 words is taken randomly from the text.
If the text is **shorter than 100 words**, **NumWordsMonosyllabic** and **NumSentences** need to be multiplied by 100 and then divided by the number of text. +Fórmula de comprensibilidad de Gutiérrez de Polini
([Gutiérrez de Polini, 1972](#ref-gutierrez-de-polini-1972))|![Formula](/doc/measures/readability/cp.svg)

* This measure applies only to **Spanish texts**. +Fórmula de Crawford1
([Crawford, 1985](#ref-crawford-1985))|![Formula](/doc/measures/readability/formula_de_crawford.svg)

* This measure applies only to **Spanish texts**. +Gulpease Index
([Lucisano & Emanuela Piemontese, 1988](#ref-lucisano-emanuela-piemontese-1988))|![Formula](/doc/measures/readability/gulpease_index.svg)

* This measure applies only to **Italian texts**. +Gunning Fog Index1
(English: [Gunning, 1968](#ref-gunning-1968)
Polish: [Pisarek, 1969](#ref-pisarek-1969))|![Formula](/doc/measures/readability/fog_index.svg)
where **NumHardWords** is the number of words with 3 or more syllables excluding all proper nouns and words with 3 syllables ending with *-ed* or *-es* for **English texts**, and the number of words with 4 or more syllables for **Polish texts**.

* This measure applies only to **English texts** and **Polish texts**. +Legibilidad µ
([Muñoz Baquedano, 2006](#ref-munoz-baquedano-2006))|![Formula](/doc/measures/readability/mu.svg)
where **LenWordsAvg** is the average word length in letters, and **LenWordsVar** is the variance of word lengths in letters.

* This measure applies only to **Spanish texts**.
* The text should be **at least 2 words long**. +Lensear Write1
([O’Hayre, 1966](#ref-o-hayre-1966))|![Formula](/doc/measures/readability/lensear_write.svg)
where **NumWordsMonosyllabic** is the number of monosyllabic words excluding *the*, *is*, *are*, *was*, *were*, and **NumSentences** is the number of sentences to the nearest period.

* This measure applies only to **English texts**.
* A sample of 100 words is taken randomly from the text.
* If the text is **shorter than 100 words**, **NumWordsMonosyllabic** and **NumSentences** need to be multiplied by 100 and then divided by the number of text. Lix
([Björnsson, 1968](#ref-bjornsson-1968))|![Formula](/doc/measures/readability/lix.svg)
where **NumLongWords** is the number of words with 7 or more letters. -McAlpine EFLAW Readability Score
([Nirmaldasan, 2009](#ref-nirmaldasan-2009))|![Formula](/doc/measures/readability/eflaw.svg)

* This test applies only to **English texts**. -OSMAN
([El-Haj & Rayson, 2016](#ref-elhaj-rayson-2016))|![Formula](/doc/measures/readability/osman.svg)
where **NumLongWords** is the number of words with 6 or more letters, **NumComplexWords** is the number of words with 5 or more syllables, and **NumFaseehWords** is the number of complex words containing ء/ئ/ؤ/ذ/ظ or ending with وا/ون.

* This test applies only to **Arabic texts**.
* The number of syllables in each Arabic word is estimated by adding the number of short syllables and twice the number of long and stress syllables. +McAlpine EFLAW Readability Score
([Nirmaldasan, 2009](#ref-nirmaldasan-2009))|![Formula](/doc/measures/readability/eflaw.svg)

* This measure applies only to **English texts**. +OSMAN
([El-Haj & Rayson, 2016](#ref-elhaj-rayson-2016))|![Formula](/doc/measures/readability/osman.svg)
where **NumLongWords** is the number of words with 6 or more letters, **NumComplexWords** is the number of words with 5 or more syllables, and **NumFaseehWords** is the number of complex words containing ء/ئ/ؤ/ذ/ظ or ending with وا/ون.

* This measure applies only to **Arabic texts**.
* The number of syllables in each Arabic word is estimated by adding the number of short syllables and twice the number of long and stress syllables. Rix
([Anderson, 1983](#ref-anderson-1983))|![Formula](/doc/measures/readability/rix.svg)
where **NumLongWords** is the number of words with 7 or more letters. SMOG Grade1
([McLaughlin, 1969](#ref-mclaughlin-1969))|![Formula](/doc/measures/readability/smog_grade.svg)
where **NumWordsPolysyllabic** is the number of words with 3 or more syllables.

* A sample consisting of the first 10 sentences of the text, the last 10 sentences of the text, and 10 sentences at the middle of the text is taken from the text, thus the text should be **at least 30 sentences long**. -Spache Grade Level
([Spache, 1953](#ref-spache-1953))|![Formula](/doc/measures/readability/spache_grade_level.svg)
where **NumDifficultWords** is the number of words outside the Dale list of 769 easy words ([Dale, 1931](#ref-dale-1931)).

* Three samples each of 100 words are taken randomly from the text and the mean of the three scores is calculated, thus the text should be **at least 100 words long**. -Wiener Sachtextformel1
([Bamberger & Vanecek, 1984](#ref-bamberger-vanecek-1984))|![Formula](/doc/measures/readability/wstf.svg)
where **NumWordsPolysyllabic** is the number of words with 3 or more syllables and **NumLongWords** is the numbers of words with 7 or more letters.

* This test applies only to **German texts**.
* This test has 4 variants, which you could select via **Menu → Preferences → Settings → Measures → Readability → Wiener Sachtextformel → Variant**. +Spache Grade Level
([Spache, 1953](#ref-spache-1953))|![Formula](/doc/measures/readability/spache_grade_level.svg)

* Three samples each of 100 words are taken randomly from the text and the mean of the three scores is calculated, thus the text should be **at least 100 words long**. +Wiener Sachtextformel1
([Bamberger & Vanecek, 1984](#ref-bamberger-vanecek-1984))|![Formula](/doc/measures/readability/wstf.svg)
where **NumWordsPolysyllabic** is the number of words with 3 or more syllables and **NumLongWords** is the numbers of words with 7 or more letters.

* This measure applies only to **German texts**.
* This measure has 4 variants, which you could select via **Menu → Preferences → Settings → Measures → Readability → Wiener Sachtextformel → Variant**. **Notes:** 1. Requires **built-in syllable tokenization support** @@ -1031,9 +1042,9 @@ Measure of Readability|Formula #### [4.4.2 Measures of Dispersion & Adjusted Frequency](#doc) -For parts-based measures, each file is divided into **n** (whose value you could modify via **Menu → Preferences → Settings → Measures → Dispersion / Adjusted Frequency → General Settings → Divide each file into subsections**) sub-sections and the frequency of the word in each part is counted and denoted by **F₁**, **F₂**, **F₃**,... , **Fₙ** respectively. The total frequency of the word in each file is denoted by **F** and the mean value of the frequencies over all sub-sections is denoted by ![f_bar](/doc/measures/f_bar.svg). +For parts-based measures, each file is divided into **n** (whose value you could modify via **Menu → Preferences → Settings → Measures → Dispersion / Adjusted Frequency → General Settings → Divide each file into subsections**) sub-sections and the frequency of the word in each part is counted and denoted by **F₁**, **F₂**, **F₃**, ..., **Fₙ** respectively. The total frequency of the word in each file is denoted by **F** and the mean value of the frequencies over all sub-sections is denoted by **F̅**. -For distance-based measures, the distance between each pair of subsequent occurrences of the word is calculated and denoted by **d₁**, **d₂**, **d₃**,... , ![d_f](/doc/measures/d_f.svg) respectively. The total number of tokens in each file is denoted by **N**. +For distance-based measures, the distance between each pair of subsequent occurrences of the word is calculated and denoted by **d₁**, **d₂**, **d₃**, ..., **dF** respectively. The total number of tokens in each file is denoted by **N**. Then, the dispersion and adjusted frequency of the word are calculated as follows: @@ -1055,7 +1066,7 @@ Average Waiting Time: f_{AWT} &= \frac{N^2}{\sum_{i = 1}^F{d_i^2}} \end{align*} -Carroll's D₂/Um: +Carroll's D₂/Uₘ: \begin{align*} H &= \ln F - \frac{\sum_{i = 1}^n \left(F_i \times \ln F_i\right)}{F} \\ D_2 &= \frac{H}{\ln n} \\ @@ -1110,7 +1121,7 @@ Measure of Dispersion (Parts-based)|Measure of Adjusted Frequency (Parts-based)|  |Engwall's FM
([Engwall, 1974](#ref-engwall-1974))|![Formula](/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg)
where **R** is the number of sub-sections in which the word appears at least once. Gries's DP
([Gries, 2008](#ref-gries-2008); [Lijffijt & Gries, 2012](#ref-lijffijt-gries-2012))||![Formula](/doc/measures/dispersion_adjusted_frequency/griess_dp.svg)

* Normalization is applied by default, which behavior you could change via **Menu → Preferences → Settings → Measures → Dispersion → Gries's DP → Apply normalization**. Juilland's D
([Juilland & Chang-Rodrigues, 1964](#ref-juilland-chang-rodrigues-1964))|Juilland's U
([Juilland & Chang-Rodrigues, 1964](#ref-juilland-chang-rodrigues-1964))|![Formula](/doc/measures/dispersion_adjusted_frequency/juillands_u.svg) - |Kromer's UR
([Kromer, 2003](#ref-kromer-2003))|![Formula](/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg)
where **ψ** is the [digamma function](https://en.wikipedia.org/wiki/Digamma_function), and **C** is the [Euler–Mascheroni constant](https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant). + |Kromer's UR
([Kromer, 2003](#ref-kromer-2003))|![Formula](/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg)
where **ψ** is the [digamma function](https://en.wikipedia.org/wiki/Digamma_function), and **C** is the [Euler–Mascheroni constant](https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant). Lyne's D₃
([Lyne, 1985](#ref-lyne-1985))||![Formula](/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg) Rosengren's S
([Rosengren, 1971](#ref-rosengren-1971))|Rosengren's KF
([Rosengren, 1971](#ref-rosengren-1971))|![Formula](/doc/measures/dispersion_adjusted_frequency/rosengrens_s.svg) Zhang's Distributional Consistency
([Zhang, 2004](#ref-zhang-2004))||![Formula](/doc/measures/dispersion_adjusted_frequency/zhangs_distributional_consistency.svg) @@ -1212,7 +1223,7 @@ Test of Statistical Significance|Formula Pearson's Chi-squared Test
([Hofland & Johanson, 1982](#ref-hofland-johanson-1982); [Oakes, 1998](#ref-oakes-1998))|![Formula](/doc/measures/statistical_significance/pearsons_chi_squared_test.svg) Student's t-test (1-sample)
([Church et al., 1991](#ref-church-et-al-1991))|![Formula](/doc/measures/statistical_significance/students_t_test_1_sample.svg) Student's t-test (2-sample)
([Paquot & Bestgen, 2009](#ref-paquot-bestgen-2009))|![Formula](/doc/measures/statistical_significance/students_t_test_2_sample.svg) -Welch's t-test|Same as Student's t-test (2-sample), but with different degrees of freedom (hence a different p-value). +Welch's t-test|* Same as Student's t-test (2-sample), but with different degrees of freedom (hence a different p-value). z-score
([Dennis, 1964](#ref-dennis-1964))|![Formula](/doc/measures/statistical_significance/z_score.svg) z-score (Berry-Rogghe)
([Berry-Rogghe, 1973](#ref-berry-rogghe-1973))|![Formula](/doc/measures/statistical_significance/z_score_berry_rogghes.svg)
where **S** is the average span size on both sides of the node word. @@ -1336,7 +1347,7 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille Dice's Coefficient
([Smadja et al., 1996](#ref-smadja-et-al-1996))|![Formula](/doc/measures/effect_size/dices_coeff.svg) Difference Coefficient
([Hofland & Johanson, 1982](#ref-hofland-johanson-1982); [Gabrielatos, 2018](#ref-gabrielatos-2018))|![Formula](/doc/measures/effect_size/diff_coeff.svg) Jaccard Index
([Dunning, 1998](#ref-dunning-1998))|![Formula](/doc/measures/effect_size/jaccard_index.svg) -Kilgarriff's Ratio
([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)
where **α** is the smoothing parameter, which is 1 by default.

* You can change the value of **α** via **Menu → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing Parameter**. +Kilgarriff's Ratio
([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)
where **α** is the smoothing parameter, whose value could be changed via **Menu → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing Parameter**. Log Ratio
([Hardie, 2014](#ref-hardie-2014))|![Formula](/doc/measures/effect_size/log_ratio.svg) Log-Frequency Biased MD
([Thanopoulos et al., 2002](#ref-thanopoulos-et-al-2002))|![Formula](/doc/measures/effect_size/lfmd.svg) logDice
([Rychlý, 2008](#ref-rychly-2008))|![Formula](/doc/measures/effect_size/log_dice.svg) @@ -1362,135 +1373,137 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille [4] [**^**](#ref-wstf) Bamberger, R., & Vanecek, E. (1984). *Lesen – Verstehen – Lernen – Schreiben*. Jugend und Volk.
[5] [**^**](#ref-z-score-berry-rogghes) Berry-Rogghe, G. L. M. (1973). The computation of collocations and their relevance in lexical studies. In A. J. Aiken, R. W. Bailey, & N. Hamilton-Smith (Eds.), *The computer and literary studies* (pp. 103–112). Edinburgh University Press.
+ +[6] [**^**](#ref-bormuths-cloze-mean-gp) Bormuth, J. R. (1969). *Development of readability analyses*. U.S. Department of Health, Education, and Welfare. http://files.eric.ed.gov/fulltext/ED029166.pdf
-[6] [**^**](#ref-lix) Björnsson, C.-H. (1968). *Läsbarhet*. Liber.
+[7] [**^**](#ref-lix) Björnsson, C.-H. (1968). *Läsbarhet*. Liber.
-[7] [**^**](#ref-re) Brouwer, R. H. M. (1963). Onderzoek naar de leesmoeilijkheid van Nederlands proza. Paedagogische studiën, 40, 454–464. https://objects.library.uu.nl/reader/index.php?obj=1874-205260&lan=en +[8] [**^**](#ref-re) Brouwer, R. H. M. (1963). Onderzoek naar de leesmoeilijkheid van Nederlands proza. *Paedagogische studiën*, *40*, 454–464. https://objects.library.uu.nl/reader/index.php?obj=1874-205260&lan=en
-[8] [**^**](#ref-carrolls-d2)[**^**](#ref-carrolls-um) Carroll, J. B. (1970). An alternative to Juilland’s usage coefficient for lexical frequencies and a proposal for a standard frequency index. *Computer Studies in the Humanities and Verbal Behaviour*, *3*(2), 61–65. https://doi.org/10.1002/j.2333-8504.1970.tb00778.x
+[9] [**^**](#ref-carrolls-d2)[**^**](#ref-carrolls-um) Carroll, J. B. (1970). An alternative to Juilland’s usage coefficient for lexical frequencies and a proposal for a standard frequency index. *Computer Studies in the Humanities and Verbal Behaviour*, *3*(2), 61–65. https://doi.org/10.1002/j.2333-8504.1970.tb00778.x
-[9] [**^**](#ref-rgl) Caylor, J. S., Sticht, T. G., Fox, L. C., & Ford, J. P. (1973). *Methodologies for determining reading requirements of military occupational specialties*. Human Resource Research Organization. https://files.eric.ed.gov/fulltext/ED074343.pdf
+[10] [**^**](#ref-rgl) Caylor, J. S., Sticht, T. G., Fox, L. C., & Ford, J. P. (1973). *Methodologies for determining reading requirements of military occupational specialties*. Human Resource Research Organization. https://files.eric.ed.gov/fulltext/ED074343.pdf
-[10] [**^**](#ref-squared-phi-coeff) Church, K. W., & Gale, W. A. (1991, September 29–October 1). Concordances for parallel text [Paper presentation]. Using Corpora: Seventh Annual Conference of the UW Centre for the New OED and Text Research, St. Catherine's College, Oxford, United Kingdom.
+[11] [**^**](#ref-squared-phi-coeff) Church, K. W., & Gale, W. A. (1991, September 29–October 1). Concordances for parallel text [Paper presentation]. Using Corpora: Seventh Annual Conference of the UW Centre for the New OED and Text Research, St. Catherine's College, Oxford, United Kingdom.
-[11] [**^**](#ref-students-t-test-1-sample) Church, K., Gale, W., Hanks, P., & Hindle, D. (1991). Using statistics in lexical analysis. In U. Zernik (Ed.), *Lexical acquisition: Exploiting on-line resources to build a lexicon* (pp. 115–164). Psychology Press.
+[12] [**^**](#ref-students-t-test-1-sample) Church, K., Gale, W., Hanks, P., & Hindle, D. (1991). Using statistics in lexical analysis. In U. Zernik (Ed.), *Lexical acquisition: Exploiting on-line resources to build a lexicon* (pp. 115–164). Psychology Press.
-[12] [**^**](#ref-pmi) Church, K. W., & Hanks, P. (1990). Word association norms, mutual information, and lexicography. *Computational Linguistics*, *16*(1), 22–29.
+[13] [**^**](#ref-pmi) Church, K. W., & Hanks, P. (1990). Word association norms, mutual information, and lexicography. *Computational Linguistics*, *16*(1), 22–29.
-[13] [**^**](#ref-coleman-liau-index) Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. *Journal of Applied Psychology*, *60*(2), 283–284. https://doi.org/10.1037/h0076540
+[14] [**^**](#ref-coleman-liau-index) Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. *Journal of Applied Psychology*, *60*(2), 283–284. https://doi.org/10.1037/h0076540
-[14] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
+[15] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
-[15] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
+[16] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
-[16] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
+[17] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
-[17] [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
+[18] [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
-[18] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
+[19] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
-[19] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
+[20] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
-[20] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
+[21] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
-[21] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
+[22] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
-[22] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323 +[23] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323 -[23] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
+[24] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
-[24] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
+[25] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
-[25] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
+[26] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
-[26] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
+[27] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
-[27] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
+[28] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
-[28] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
+[29] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
-[29] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
+[30] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
-[30] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
+[31] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
-[31] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
+[32] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
-[32] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
+[33] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
-[33] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
+[34] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
-[34] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
+[35] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
-[35] [**^**](#ref-formula-de-comprensibilidad-de-gutierrez-de-polini) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
+[36] [**^**](#ref-formula-de-comprensibilidad-de-gutierrez-de-polini) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
-[36] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
+[37] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
-[37] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
+[38] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
-[38] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
+[39] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
-[39] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
+[40] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
-[40] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
+[41] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
-[41] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
+[42] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
-[42] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
+[43] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
-[43] [**^**](#ref-flesch-kincaid-grade-level) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
+[44] [**^**](#ref-flesch-kincaid-grade-level) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
-[44] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
+[45] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
-[45] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
+[46] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
-[46] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
+[47] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
-[47] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
+[48] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
-[48] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
+[49] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
-[49] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
+[50] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
-[50] [**^**](#ref-legibilidad-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
+[51] [**^**](#ref-legibilidad-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
-[51] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
+[52] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
-[52] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
+[53] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
-[53] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3 +[54] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
-[54] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
+[55] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
-[55] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
+[56] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
-[56] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
+[57] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
-[57] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
+[58] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
-[58] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
+[59] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
-[59] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
+[60] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
-[60] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
+[61] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
-[61] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
+[62] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
-[62] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
+[63] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
-[63] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124 +[64] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124
-[64] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
+[65] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
-[65] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
+[66] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
-[66] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
+[67] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
-[67] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
+[68] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
-[68] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
+[69] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
-[69] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
+[70] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
-[70] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
+[71] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
-[71] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
+[72] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
diff --git a/doc/measures/d_f.svg b/doc/measures/d_f.svg deleted file mode 100644 index 42b4cb0fa..000000000 --- a/doc/measures/d_f.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - \ No newline at end of file diff --git a/doc/measures/f_bar.svg b/doc/measures/f_bar.svg deleted file mode 100644 index 3bca323bf..000000000 --- a/doc/measures/f_bar.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - \ No newline at end of file diff --git a/doc/measures/readability/bormuths_cloze_mean_gp.svg b/doc/measures/readability/bormuths_cloze_mean_gp.svg new file mode 100644 index 000000000..a9ae6b773 --- /dev/null +++ b/doc/measures/readability/bormuths_cloze_mean_gp.svg @@ -0,0 +1,331 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/spache_grade_level.svg b/doc/measures/readability/spache_grade_level.svg index f48778a7b..8a972dfe9 100644 --- a/doc/measures/readability/spache_grade_level.svg +++ b/doc/measures/readability/spache_grade_level.svg @@ -1,19 +1,10 @@ - + - - - - - - - - - - - - + + + @@ -25,8 +16,6 @@ - - @@ -36,6 +25,18 @@ + + + + + + + + + + + + @@ -48,16 +49,16 @@ - - + + - - - + + + - - - + + + @@ -71,45 +72,43 @@ - - + + - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/szigriszts_perspicuity_index.svg b/doc/measures/readability/szigriszts_perspicuity_index.svg deleted file mode 100644 index 479ee2445..000000000 --- a/doc/measures/readability/szigriszts_perspicuity_index.svg +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/doc/measures/readability/xc50.svg b/doc/measures/readability/x_c50.svg similarity index 84% rename from doc/measures/readability/xc50.svg rename to doc/measures/readability/x_c50.svg index 8aa13886f..7b578fd3c 100644 --- a/doc/measures/readability/xc50.svg +++ b/doc/measures/readability/x_c50.svg @@ -1,6 +1,6 @@ - + @@ -9,11 +9,10 @@ + - - @@ -23,6 +22,7 @@ + @@ -36,7 +36,7 @@ - + @@ -48,68 +48,67 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/wl_tests_measures/test_measures_readability.py b/tests/wl_tests_measures/test_measures_readability.py index 4a7e5f3f4..a1a0e34d8 100644 --- a/tests/wl_tests_measures/test_measures_readability.py +++ b/tests/wl_tests_measures/test_measures_readability.py @@ -93,6 +93,48 @@ def test_automated_readability_index(): assert ari_eng_0 == 'text_too_short' assert ari_eng_12 == ari_spa_12 == 0.5 * (12 / 3) + 4.71 * (47 / 12) - 21.43 +def test_bormuths_cloze_mean(): + m_eng_0 = wl_measures_readability.bormuths_cloze_mean(main, test_text_eng_0) + m_eng_12 = wl_measures_readability.bormuths_cloze_mean(main, test_text_eng_12) + m_other_12 = wl_measures_readability.bormuths_cloze_mean(main, test_text_other_12) + + print("Bormuth's Cloze Mean:") + print(f'\teng/0: {m_eng_0}') + print(f'\teng/12: {m_eng_12}') + print(f'\tother/12: {m_other_12}') + + assert m_eng_0 == 'text_too_short' + assert m_eng_12 == ( + 0.886593 - + 0.083640 * (45 / 12) + + 0.161911 * ((1 / 12)**3) - + 0.021401 * (12 / 3) + + 0.000577 * ((12 / 3)**2) - + 0.000005 * ((12 / 3)**3) + ) + assert m_other_12 == 'no_support' + +def test_bormuths_gp(): + gp_eng_0 = wl_measures_readability.bormuths_gp(main, test_text_eng_0) + gp_eng_12 = wl_measures_readability.bormuths_gp(main, test_text_eng_12) + gp_other_12 = wl_measures_readability.bormuths_gp(main, test_text_other_12) + + print("Bormuth's Grade Placement:") + print(f'\teng/0: {gp_eng_0}') + print(f'\teng/12: {gp_eng_12}') + print(f'\tother/12: {gp_other_12}') + + m = wl_measures_readability.bormuths_cloze_mean(main, test_text_eng_12) + c = 0.35 + + assert gp_eng_0 == 'text_too_short' + assert gp_eng_12 == ( + 4.275 + 12.881 * m - 34.934 * (m**2) + 20.388 * (m**3) + + 26.194 * c - 2.046 * (c**2) - 11.767 * (c**3) - + 44.285 * (m * c) + 97.620 * ((m * c)**2) - 59.538 * ((m * c)**3) + ) + assert gp_other_12 == 'no_support' + def test_coleman_liau_index(): grade_level_eng_0 = wl_measures_readability.coleman_liau_index(main, test_text_eng_0) grade_level_eng_12 = wl_measures_readability.coleman_liau_index(main, test_text_eng_12) @@ -440,6 +482,8 @@ def test_wiener_sachtextformel(): if __name__ == '__main__': test_automated_ara_readability_index() test_automated_readability_index() + test_bormuths_cloze_mean() + test_bormuths_gp() test_coleman_liau_index() test_dale_chall_readability_score() test_devereux_readability_index() diff --git a/tests/wl_tests_work_area/test_profiler.py b/tests/wl_tests_work_area/test_profiler.py index 86ffa7bf2..2a4cc0644 100644 --- a/tests/wl_tests_work_area/test_profiler.py +++ b/tests/wl_tests_work_area/test_profiler.py @@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files): count_tokens_lens_syls.append(collections.Counter(len_tokens_syls)) count_tokens_lens_chars.append(collections.Counter(len_tokens_chars)) - assert len(readability_statistics) == 22 + assert len(readability_statistics) == 24 # Counts assert count_paras diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index 000ab3be5..d61166722 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -107,7 +107,7 @@ def get_count_words_syls(syls_words, len_min = 1, len_max = None): ]) # Calculate the number of words outside Dale's lists of easy words -def get_count_difficult_words(words, num_easy_words): +def get_count_words_dale(words, num_easy_words): dale_list_easy_words = set() count_difficult_words = 0 @@ -161,6 +161,47 @@ def automated_readability_index(main, text): return ari +# Bormuth's Cloze Mean & Grade Placement +# Reference: Bormuth, J. R. (1969). Development of readability analyses. U.S. Department of Health, Education, and Welfare. http://files.eric.ed.gov/fulltext/ED029166.pdf +def bormuths_cloze_mean(main, text): + text = get_counts(main, text) + + if text.lang.startswith('eng_'): + if text.count_sentences and text.count_words: + ddl = get_count_words_dale(text.words_flat, 3000) + m = ( + 0.886593 - + 0.083640 * (text.count_chars_alphabetic / text.count_words) + + 0.161911 * ((ddl / text.count_words)**3) - + 0.021401 * (text.count_words / text.count_sentences) + + 0.000577 * ((text.count_words / text.count_sentences)**2) - + 0.000005 * ((text.count_words / text.count_sentences)**3) + ) + else: + m = 'text_too_short' + else: + m = 'no_support' + + return m + +def bormuths_gp(main, text): + if text.lang.startswith('eng_'): + m = bormuths_cloze_mean(main, text) + c = main.settings_custom['measures']['readability']['bormuths_gp']['cloze_criterion_score'] / 100 + + if m == 'text_too_short': + gp = m + else: + gp = ( + 4.275 + 12.881 * m - 34.934 * (m**2) + 20.388 * (m**3) + + 26.194 * c - 2.046 * (c**2) - 11.767 * (c**3) - + 44.285 * (m * c) + 97.620 * ((m * c)**2) - 59.538 * ((m * c)**3) + ) + else: + gp = 'no_support' + + return gp + # Coleman-Liau Index # Reference: Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. Journal of Applied Psychology, 60(2), 283–284. https://doi.org/10.1037/h0076540 def coleman_liau_index(main, text): @@ -187,7 +228,7 @@ def dale_chall_readability_score(main, text): text = get_counts(main, text) if text.count_words and text.count_sentences: - count_difficult_words = get_count_difficult_words(text.words_flat, 3000) + count_difficult_words = get_count_words_dale(text.words_flat, 3000) x_c50 = ( 0.1579 * (count_difficult_words / text.count_words) + 0.0496 * (text.count_words / text.count_sentences) @@ -711,7 +752,7 @@ def spache_grade_level(main, text): else: break - count_difficult_words = get_count_difficult_words(samples, 769) + count_difficult_words = get_count_words_dale(samples, 769) grade_levels.append( 0.141 * (100 / count_sentences_samples) + 0.086 * (count_difficult_words / 100 * 100) diff --git a/wordless/wl_profiler.py b/wordless/wl_profiler.py index cc0faa82e..2b23f8fe5 100644 --- a/wordless/wl_profiler.py +++ b/wordless/wl_profiler.py @@ -359,6 +359,8 @@ def __init__(self, parent): HEADERS_READABILITY = [ _tr('wl_profiler', 'Automated Arabic Readability Index'), _tr('wl_profiler', 'Automated Readability Index'), + _tr('wl_profiler', "Bormuth's Cloze Mean"), + _tr('wl_profiler', "Bormuth's Grade Placement"), _tr('wl_profiler', 'Coleman-Liau Index'), _tr('wl_profiler', 'Dale-Chall Readability Score'), _tr('wl_profiler', 'Devereaux Readability Index'), @@ -1173,6 +1175,8 @@ def run(self): readability_stats = [ wl_measures_readability.automated_ara_readability_index(self.main, text), wl_measures_readability.automated_readability_index(self.main, text), + wl_measures_readability.bormuths_cloze_mean(self.main, text), + wl_measures_readability.bormuths_gp(self.main, text), wl_measures_readability.coleman_liau_index(self.main, text), wl_measures_readability.dale_chall_readability_score(self.main, text), wl_measures_readability.devereux_readability_index(self.main, text), diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index d9d10f45b..aacbd32de 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -1741,6 +1741,10 @@ def init_settings_default(main): 'measures': { # Settings - Measures - Readability 'readability': { + 'bormuths_gp': { + 'cloze_criterion_score': 35 + }, + 're': { 'variant_nld': 'Douma', 'variant_spa': 'Fernández Huerta' diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py index 372189198..1749c7638 100644 --- a/wordless/wl_settings/wl_settings_measures.py +++ b/wordless/wl_settings/wl_settings_measures.py @@ -31,6 +31,21 @@ def __init__(self, main): self.settings_default = self.main.settings_default['measures']['readability'] self.settings_custom = self.main.settings_custom['measures']['readability'] + # Bormuth's Grade Placement + self.group_box_bormuths_gp = QGroupBox(self.tr("Bormuth's Grade Placement"), self) + + self.label_cloze_criterion_score = QLabel(self.tr('Cloze criterion score:'), self) + self.spin_box_cloze_criterion_score = wl_boxes.Wl_Spin_Box(self) + + self.spin_box_cloze_criterion_score.setRange(0, 100) + self.spin_box_cloze_criterion_score.setSuffix('%') + + self.group_box_bormuths_gp.setLayout(wl_layouts.Wl_Layout()) + self.group_box_bormuths_gp.layout().addWidget(self.label_cloze_criterion_score, 0, 0) + self.group_box_bormuths_gp.layout().addWidget(self.spin_box_cloze_criterion_score, 0, 1) + + self.group_box_bormuths_gp.layout().setColumnStretch(2, 1) + # Flesch Reading Ease self.group_box_re = QGroupBox(self.tr('Flesch Reading Ease'), self) @@ -71,11 +86,12 @@ def __init__(self, main): self.group_box_wstf.layout().setColumnStretch(2, 1) self.setLayout(wl_layouts.Wl_Layout()) - self.layout().addWidget(self.group_box_re, 0, 0) - self.layout().addWidget(self.group_box_wstf, 1, 0) + self.layout().addWidget(self.group_box_bormuths_gp, 0, 0) + self.layout().addWidget(self.group_box_re, 1, 0) + self.layout().addWidget(self.group_box_wstf, 2, 0) self.layout().setContentsMargins(6, 4, 6, 4) - self.layout().setRowStretch(2, 1) + self.layout().setRowStretch(3, 1) def load_settings(self, defaults = False): if defaults: @@ -83,6 +99,9 @@ def load_settings(self, defaults = False): else: settings = copy.deepcopy(self.settings_custom) + # Bormuth's Grade Placement + self.spin_box_cloze_criterion_score.setValue(settings['bormuths_gp']['cloze_criterion_score']) + # Flesch Reading Ease self.combo_box_re_variant_nld.setCurrentText(settings['re']['variant_nld']) self.combo_box_re_variant_spa.setCurrentText(settings['re']['variant_spa']) @@ -91,6 +110,9 @@ def load_settings(self, defaults = False): self.combo_box_wstf_variant.setCurrentText(settings['wstf']['variant']) def apply_settings(self): + # Bormuth's Grade Placement + self.settings_custom['bormuths_gp']['cloze_criterion_score'] = self.spin_box_cloze_criterion_score.value() + # Flesch Reading Ease self.settings_custom['re']['variant_nld'] = self.combo_box_re_variant_nld.currentText() self.settings_custom['re']['variant_spa'] = self.combo_box_re_variant_spa.currentText()