From b2af8e8faf03da1eeb2b7661b419fd47ab20889c Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Wed, 2 Aug 2023 04:04:14 +0800 Subject: [PATCH] Work Area: Add Profiler - Readability - Degrees of Reading Power --- CHANGELOG.md | 2 +- doc/doc_eng.md | 234 ++--- doc/measures/readability/aari.svg | 216 +++-- doc/measures/readability/ari.svg | 166 ++-- .../readability/bormuths_cloze_mean_gp.svg | 613 ++++++------- .../readability/coleman_liau_index.svg | 352 ++++---- .../colemans_readability_formula.svg | 782 ++++++++-------- doc/measures/readability/cp.svg | 173 ++-- .../danielson_bryans_readability_formula.svg | 388 ++++---- .../devereux_readability_index.svg | 188 ++-- doc/measures/readability/drp.svg | 45 + doc/measures/readability/eflaw.svg | 122 +-- .../flesch_kincaid_grade_level.svg | 93 -- doc/measures/readability/fog_index.svg | 166 ++-- .../readability/formula_de_crawford.svg | 198 ++-- doc/measures/readability/gl.svg | 93 ++ doc/measures/readability/gulpease_index.svg | 183 ++-- doc/measures/readability/lensear_write.svg | 104 +-- doc/measures/readability/lix.svg | 144 +-- doc/measures/readability/mu.svg | 128 +-- doc/measures/readability/osman.svg | 274 +++--- doc/measures/readability/re.svg | 845 +++++++++--------- doc/measures/readability/re_simplified.svg | 180 ++-- doc/measures/readability/rgl.svg | 86 +- doc/measures/readability/rix.svg | 96 +- doc/measures/readability/smog_grade.svg | 94 +- .../readability/spache_grade_level.svg | 184 ++-- doc/measures/readability/wstf.svg | 558 ++++++------ doc/measures/readability/x_c50.svg | 224 ++--- doc/measures/readability/x_c50_new.svg | 202 ++--- .../test_measures_readability.py | 160 ++-- tests/wl_tests_work_area/test_profiler.py | 2 +- .../wl_measures/wl_measures_readability.py | 59 +- wordless/wl_profiler.py | 24 +- 34 files changed, 3714 insertions(+), 3664 deletions(-) create mode 100644 doc/measures/readability/drp.svg delete mode 100644 doc/measures/readability/flesch_kincaid_grade_level.svg create mode 100644 doc/measures/readability/gl.svg diff --git a/CHANGELOG.md b/CHANGELOG.md index d22883701..bbaa2f107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ - Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser - Utils: Add spaCy's Malay word tokenizer - Utils: Add spaCy's Slovenian sentence recognizer, part-of-speech tagger, lemmatizer, and dependency parser -- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Dale-Chall Readability Formula (New) / Danielson-Bryan's Readability Formula +- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Dale-Chall Readability Formula (New) / Danielson-Bryan's Readability Formula / Degrees of Reading Power ### ✨ Improvements - Utils: Update Wordless's sentence and sentence segment splitters diff --git a/doc/doc_eng.md b/doc/doc_eng.md index 04a81db58..5ac3ccbcd 100644 --- a/doc/doc_eng.md +++ b/doc/doc_eng.md @@ -888,123 +888,126 @@ These variables are used in the following formulas:
**NumWordsDale₇₆₉**: Number of words outside the Dale list of 769 easy words ([Dale, 1931](#ref-dale-1931))
**NumWordsDale₃₀₀₀**: Number of words outside the Dale list of 3000 easy words ([Dale & Chall, 1948b](#ref-dale-chall-1948b))
**NumWords1Syl**: Number of monosyllabic words
-**NumWords3PlusSyls**: Number of words with 3 or more syllables
+**NumWords3+Syls**: Number of words with 3 or more syllables
**NumSyls**: Number of syllable in the text or sample
**NumCharsAll**: Number of characters (including letters, CJK characters, etc., numerals, and punctuation marks) in the text or sample
-**NumCharsAlphanumeric**: Number of alphanumeric characters (letters, CJK characters, etc., numerals) in the text or sample
-**NumCharsAlphabetic**: Number of alphabetic characters (letters, CJK characters, etc.) in the text or sample +**NumCharsAlpha**: Number of alphabetic characters (letters, CJK characters, etc.) in the text or sample
+**NumCharsAlnum**: Number of alphanumeric characters (letters, CJK characters, etc., numerals) in the text or sample @@ -1018,16 +1021,17 @@ Measure of Readability|Formula Dale-Chall Readability Formula
([Dale & Chall, 1948a](#ref-dale-chall-1948a); [Dale & Chall, 1948b](#ref-dale-chall-1948b))|![Formula](/doc/measures/readability/x_c50.svg)

* This measure applies only to **English texts**. Dale-Chall Readability Formula (New)
([Chall & Dale, 1995](#ref-chall-dale-1995))|![Formula](/doc/measures/readability/x_c50_new.svg)

* This measure applies only to **English texts**. Danielson-Bryan's Readability Formula
([Danielson & Bryan, 1963](#ref-danielson-bryan-1963))|![Formula](/doc/measures/readability/danielson_bryans_readability_formula.svg)

* This measure has 2 variants, which you could select via **Menu → Preferences → Settings → Measures → Readability → Danielson-Bryan's Readability Formula → Variant**. +Degrees of Reading Power
([College Entrance Examination Board, 1981](#ref-college-entrance-examination-board-1981))|![Formula](/doc/measures/readability/drp.svg)
where **M** is *Bormuth's cloze mean*.

* This measure applies only to **English texts**. Devereux Readability Index
([Smith, 1961](#ref-smith-1961))|![Formula](/doc/measures/readability/devereux_readability_index.svg) -Flesch-Kincaid Grade Level¹
([Kincaid et al., 1975](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/flesch_kincaid_grade_level.svg) +Flesch-Kincaid Grade Level¹
([Kincaid et al., 1975](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/gl.svg) Flesch Reading Ease¹
([Flesch, 1948](#ref-flesch-1948)
Dutch: [Douma, 1960](#ref-douma-1960); [Brouwer, 1963](#ref-brouwer-1963)
French: [Kandel & Moles, 1958](#ref-kandel-moles-1958)
German: [Amstad, 1978](#ref-amstad-1978)
Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986)
Russian: [Oborneva, 2006](#ref-oborneva-2006)
Spanish: [Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/re.svg)

* This measure has multiple variants for some languages, which you could select via **Menu → Preferences → Settings → Measures → Readability → Flesch Reading Ease**. Flesch Reading Ease (Simplified)¹
([Farr et al., 1951](#ref-farr-et-al-1951))|![Formula](/doc/measures/readability/re_simplified.svg) FORCAST Grade Level¹
([Caylor et al., 1973](#ref-caylor-et-al-1973))|![Formula](/doc/measures/readability/rgl.svg)

* A sample of 150 words is taken randomly from the text, thus the text should be **at least 150 words long**. -Fórmula de comprensibilidad de Gutiérrez de Polini
([Gutiérrez de Polini, 1972](#ref-gutierrez-de-polini-1972))|![Formula](/doc/measures/readability/cp.svg)

* This measure applies only to **Spanish texts**. +Fórmula de comprensibilidad de Gutiérrez de Polini
([Gutiérrez de Polini, 1972](#ref-gutierrez-de-polini-1972))|![Formula](/doc/measures/readability/cp.svg)

* This measure applies only to **Spanish texts**. Fórmula de Crawford¹
([Crawford, 1985](#ref-crawford-1985))|![Formula](/doc/measures/readability/formula_de_crawford.svg)

* This measure applies only to **Spanish texts**. Gulpease Index
([Lucisano & Emanuela Piemontese, 1988](#ref-lucisano-emanuela-piemontese-1988))|![Formula](/doc/measures/readability/gulpease_index.svg)

* This measure applies only to **Italian texts**. Gunning Fog Index¹
(English: [Gunning, 1968](#ref-gunning-1968)
Polish: [Pisarek, 1969](#ref-pisarek-1969))|![Formula](/doc/measures/readability/fog_index.svg)
where **NumHardWords** is the number of words with 3 or more syllables excluding all proper nouns and words with 3 syllables ending with *-ed* or *-es* for **English texts**, and the number of words with 4 or more syllables for **Polish texts**.

* This measure applies only to **English texts** and **Polish texts**. -Legibilidad µ
([Muñoz Baquedano, 2006](#ref-munoz-baquedano-2006))|![Formula](/doc/measures/readability/mu.svg)
where **LenWordsAvg** is the average word length in letters, and **LenWordsVar** is the variance of word lengths in letters.

* This measure applies only to **Spanish texts**.
* The text should be **at least 2 words long**. +Legibilidad µ
([Muñoz Baquedano, 2006](#ref-munoz-baquedano-2006))|![Formula](/doc/measures/readability/mu.svg)
where **LenWordsAvg** is the average word length in letters, and **LenWordsVar** is the variance of word lengths in letters.

* This measure applies only to **Spanish texts**.
* The text should be **at least 2 words long**. Lensear Write¹
([O’Hayre, 1966](#ref-o-hayre-1966))|![Formula](/doc/measures/readability/lensear_write.svg)
where **NumWords1Syl** is the number of monosyllabic words excluding *the*, *is*, *are*, *was*, *were*, and **NumSentences** is the number of sentences to the nearest period.

* This measure applies only to **English texts**.
* A sample of 100 words is taken randomly from the text.
* If the text is **shorter than 100 words**, **NumWords1Syl** and **NumSentences** need to be multiplied by 100 and then divided by the number of text. Lix
([Björnsson, 1968](#ref-bjornsson-1968))|![Formula](/doc/measures/readability/lix.svg)
where **NumLongWords** is the number of words with 7 or more letters. McAlpine EFLAW Readability Score
([Nirmaldasan, 2009](#ref-nirmaldasan-2009))|![Formula](/doc/measures/readability/eflaw.svg)

* This measure applies only to **English texts**. @@ -1394,123 +1398,125 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille [14] [**^**](#ref-pmi) Church, K. W., & Hanks, P. (1990). Word association norms, mutual information, and lexicography. *Computational Linguistics*, *16*(1), 22–29.
[15] [**^**](#ref-coleman-liau-index) Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. *Journal of Applied Psychology*, *60*(2), 283–284. https://doi.org/10.1037/h0076540
+ +[16] [**^**](#ref-drp) College Entrance Examination Board. (1981). *Degrees of reading power brings the students and the text together*.
-[16] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
+[17] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
-[17] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
+[18] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
-[18] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
+[19] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
-[19] [**^**](#ref-num-words-769) [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
+[20] [**^**](#ref-num-words-769) [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
-[20] [**^**](#ref-dale-chall-readability-formula) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
+[21] [**^**](#ref-dale-chall-readability-formula) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
-[21] [**^**](#ref-num-words-3000) [**^**](#ref-dale-chall-readability-formula) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
+[22] [**^**](#ref-num-words-3000) [**^**](#ref-dale-chall-readability-formula) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
-[22] [**^**](#ref-danielson-bryans-readability-formula) Danielson, W. A., & Bryan, S. D. (1963). Computer automation of two readability formulas. *Journalism Quarterly*, *40*(2), 201–206. https://doi.org/10.1177/107769906304000207
+[23] [**^**](#ref-danielson-bryans-readability-formula) Danielson, W. A., & Bryan, S. D. (1963). Computer automation of two readability formulas. *Journalism Quarterly*, *40*(2), 201–206. https://doi.org/10.1177/107769906304000207
-[23] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
+[24] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
-[24] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
+[25] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
-[25] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323
+[26] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323
-[26] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
+[27] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
-[27] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
+[28] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
-[28] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
+[29] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
-[29] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
+[30] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
-[30] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
+[31] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
-[31] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
+[32] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
-[32] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
+[33] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
-[33] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
+[34] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
-[34] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
+[35] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
-[35] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
+[36] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
-[36] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
+[37] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
-[37] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
+[38] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
-[38] [**^**](#ref-formula-de-comprensibilidad-de-gutierrez-de-polini) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
+[39] [**^**](#ref-cp) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
-[39] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
+[40] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
-[40] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
+[41] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
-[41] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
+[42] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
-[42] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
+[43] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
-[43] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
+[44] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
-[44] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
+[45] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
-[45] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
+[46] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
-[46] [**^**](#ref-flesch-kincaid-grade-level) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
+[47] [**^**](#ref-gl) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
-[47] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
+[48] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
-[48] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
+[49] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
-[49] [**^**](#ref-colemans-readability-formula) Liau, T. L., Bassin, C. B., Martin, C. J., & Coleman, E. B. (1976). Modification of the Coleman readability formulas. *Journal of Reading Behavior*, *8*(4), 381–386. https://journals.sagepub.com/doi/pdf/10.1080/10862967609547193
+[50] [**^**](#ref-colemans-readability-formula) Liau, T. L., Bassin, C. B., Martin, C. J., & Coleman, E. B. (1976). Modification of the Coleman readability formulas. *Journal of Reading Behavior*, *8*(4), 381–386. https://journals.sagepub.com/doi/pdf/10.1080/10862967609547193
-[50] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
+[51] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
-[51] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
+[52] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
-[52] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
+[53] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
-[53] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
+[54] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
-[54] [**^**](#ref-legibilidad-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
+[55] [**^**](#ref-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
-[55] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
+[56] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
-[56] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
+[57] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
-[57] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
+[58] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
-[58] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
+[59] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
-[59] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
+[60] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
-[60] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
+[61] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
-[61] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
+[62] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
-[62] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
+[63] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
-[63] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
+[64] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
-[64] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
+[65] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
-[65] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
+[66] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
-[66] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
+[67] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
-[67] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124
+[68] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124
-[68] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
+[69] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
-[69] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
+[70] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
-[70] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
+[71] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
-[71] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
+[72] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
-[72] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
+[73] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
-[73] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
+[74] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
-[74] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
+[75] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
-[75] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
+[76] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
diff --git a/doc/measures/readability/aari.svg b/doc/measures/readability/aari.svg index 7780648a6..72ebd0fb8 100644 --- a/doc/measures/readability/aari.svg +++ b/doc/measures/readability/aari.svg @@ -1,7 +1,8 @@ - + + @@ -10,122 +11,105 @@ - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/ari.svg b/doc/measures/readability/ari.svg index dd90e2f09..a627aa257 100644 --- a/doc/measures/readability/ari.svg +++ b/doc/measures/readability/ari.svg @@ -1,7 +1,8 @@ - + + @@ -13,89 +14,88 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/bormuths_cloze_mean_gp.svg b/doc/measures/readability/bormuths_cloze_mean_gp.svg index a9ae6b773..655a4a0c1 100644 --- a/doc/measures/readability/bormuths_cloze_mean_gp.svg +++ b/doc/measures/readability/bormuths_cloze_mean_gp.svg @@ -1,32 +1,10 @@ - + - - - - - - - - - - - - - - - - - - - - - - @@ -41,291 +19,320 @@ + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/coleman_liau_index.svg b/doc/measures/readability/coleman_liau_index.svg index 309e4fec9..04860b3e3 100644 --- a/doc/measures/readability/coleman_liau_index.svg +++ b/doc/measures/readability/coleman_liau_index.svg @@ -1,11 +1,12 @@ - + + @@ -19,185 +20,178 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/colemans_readability_formula.svg b/doc/measures/readability/colemans_readability_formula.svg index 6aadfaade..95fbd670c 100644 --- a/doc/measures/readability/colemans_readability_formula.svg +++ b/doc/measures/readability/colemans_readability_formula.svg @@ -1,29 +1,9 @@ - + - - - - - - - - - - - - - - - - - - - - @@ -37,383 +17,403 @@ + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/cp.svg b/doc/measures/readability/cp.svg index dcbafbe78..55217a083 100644 --- a/doc/measures/readability/cp.svg +++ b/doc/measures/readability/cp.svg @@ -1,7 +1,8 @@ - + + @@ -11,96 +12,88 @@ - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/danielson_bryans_readability_formula.svg b/doc/measures/readability/danielson_bryans_readability_formula.svg index 1919d6acc..b7f94ff47 100644 --- a/doc/measures/readability/danielson_bryans_readability_formula.svg +++ b/doc/measures/readability/danielson_bryans_readability_formula.svg @@ -1,7 +1,8 @@ - + + @@ -16,200 +17,199 @@ - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/devereux_readability_index.svg b/doc/measures/readability/devereux_readability_index.svg index 9aae083c3..c6c638d6e 100644 --- a/doc/measures/readability/devereux_readability_index.svg +++ b/doc/measures/readability/devereux_readability_index.svg @@ -1,7 +1,8 @@ - + + @@ -12,100 +13,99 @@ - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/drp.svg b/doc/measures/readability/drp.svg new file mode 100644 index 000000000..3a014d559 --- /dev/null +++ b/doc/measures/readability/drp.svg @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/eflaw.svg b/doc/measures/readability/eflaw.svg index 99777191d..fdf5c3927 100644 --- a/doc/measures/readability/eflaw.svg +++ b/doc/measures/readability/eflaw.svg @@ -1,69 +1,69 @@ - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/flesch_kincaid_grade_level.svg b/doc/measures/readability/flesch_kincaid_grade_level.svg deleted file mode 100644 index cc58ddf34..000000000 --- a/doc/measures/readability/flesch_kincaid_grade_level.svg +++ /dev/null @@ -1,93 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/doc/measures/readability/fog_index.svg b/doc/measures/readability/fog_index.svg index 36c7b98aa..351014aa5 100644 --- a/doc/measures/readability/fog_index.svg +++ b/doc/measures/readability/fog_index.svg @@ -1,98 +1,98 @@ - + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/formula_de_crawford.svg b/doc/measures/readability/formula_de_crawford.svg index 3f3f36049..a954b7c65 100644 --- a/doc/measures/readability/formula_de_crawford.svg +++ b/doc/measures/readability/formula_de_crawford.svg @@ -1,7 +1,8 @@ - + + @@ -16,105 +17,104 @@ - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/gl.svg b/doc/measures/readability/gl.svg new file mode 100644 index 000000000..fcfd03799 --- /dev/null +++ b/doc/measures/readability/gl.svg @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/gulpease_index.svg b/doc/measures/readability/gulpease_index.svg index dca78cec0..277341cb3 100644 --- a/doc/measures/readability/gulpease_index.svg +++ b/doc/measures/readability/gulpease_index.svg @@ -1,105 +1,98 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/lensear_write.svg b/doc/measures/readability/lensear_write.svg index 94d9943f7..50218bddf 100644 --- a/doc/measures/readability/lensear_write.svg +++ b/doc/measures/readability/lensear_write.svg @@ -1,61 +1,61 @@ - + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/lix.svg b/doc/measures/readability/lix.svg index 48fad1bf0..f7561a4d6 100644 --- a/doc/measures/readability/lix.svg +++ b/doc/measures/readability/lix.svg @@ -1,81 +1,81 @@ - + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/mu.svg b/doc/measures/readability/mu.svg index 172112f54..2ecef7287 100644 --- a/doc/measures/readability/mu.svg +++ b/doc/measures/readability/mu.svg @@ -1,79 +1,79 @@ - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/osman.svg b/doc/measures/readability/osman.svg index 0fa325114..6f0530fcd 100644 --- a/doc/measures/readability/osman.svg +++ b/doc/measures/readability/osman.svg @@ -1,7 +1,8 @@ - + + @@ -14,143 +15,142 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/re.svg b/doc/measures/readability/re.svg index dce0c2fbf..75abfa9f1 100644 --- a/doc/measures/readability/re.svg +++ b/doc/measures/readability/re.svg @@ -1,429 +1,432 @@ - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/re_simplified.svg b/doc/measures/readability/re_simplified.svg index fd060f7b7..b1ec1e3b1 100644 --- a/doc/measures/readability/re_simplified.svg +++ b/doc/measures/readability/re_simplified.svg @@ -1,11 +1,12 @@ - + + @@ -13,96 +14,95 @@ - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/rgl.svg b/doc/measures/readability/rgl.svg index 4985d2a28..2a6506640 100644 --- a/doc/measures/readability/rgl.svg +++ b/doc/measures/readability/rgl.svg @@ -1,7 +1,8 @@ - + + @@ -10,49 +11,48 @@ - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/rix.svg b/doc/measures/readability/rix.svg index d60683386..cd579fef9 100644 --- a/doc/measures/readability/rix.svg +++ b/doc/measures/readability/rix.svg @@ -1,56 +1,56 @@ - + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/smog_grade.svg b/doc/measures/readability/smog_grade.svg index 5bd6b7977..740cce8d8 100644 --- a/doc/measures/readability/smog_grade.svg +++ b/doc/measures/readability/smog_grade.svg @@ -1,9 +1,10 @@ - + + @@ -12,55 +13,50 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/spache_grade_level.svg b/doc/measures/readability/spache_grade_level.svg index 8a972dfe9..2488bd53a 100644 --- a/doc/measures/readability/spache_grade_level.svg +++ b/doc/measures/readability/spache_grade_level.svg @@ -1,30 +1,10 @@ - + - - - - - - - - - - - - - - - - - - - - @@ -34,81 +14,101 @@ + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/wstf.svg b/doc/measures/readability/wstf.svg index 86e0c4ea3..dc07b21c3 100644 --- a/doc/measures/readability/wstf.svg +++ b/doc/measures/readability/wstf.svg @@ -1,9 +1,10 @@ - + + @@ -20,287 +21,282 @@ - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/x_c50.svg b/doc/measures/readability/x_c50.svg index 8d1658e93..14d50749e 100644 --- a/doc/measures/readability/x_c50.svg +++ b/doc/measures/readability/x_c50.svg @@ -1,122 +1,122 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/x_c50_new.svg b/doc/measures/readability/x_c50_new.svg index a51e186a9..ac760c97c 100644 --- a/doc/measures/readability/x_c50_new.svg +++ b/doc/measures/readability/x_c50_new.svg @@ -1,112 +1,112 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/wl_tests_measures/test_measures_readability.py b/tests/wl_tests_measures/test_measures_readability.py index 46c8d5b69..20c918584 100644 --- a/tests/wl_tests_measures/test_measures_readability.py +++ b/tests/wl_tests_measures/test_measures_readability.py @@ -16,6 +16,8 @@ # along with this program. If not, see . # ---------------------------------------------------------------------- +import math + import numpy from tests import wl_test_init @@ -67,10 +69,10 @@ def __init__(self, tokens_multilevel, lang = 'eng_us'): test_text_rus_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'rus') test_text_other_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'other') -def test_automated_ara_readability_index(): - aari_ara_0 = wl_measures_readability.automated_ara_readability_index(main, test_text_ara_0) - aari_ara_12 = wl_measures_readability.automated_ara_readability_index(main, test_text_ara_12) - aari_eng_12 = wl_measures_readability.automated_ara_readability_index(main, test_text_eng_12) +def test_aari(): + aari_ara_0 = wl_measures_readability.aari(main, test_text_ara_0) + aari_ara_12 = wl_measures_readability.aari(main, test_text_ara_12) + aari_eng_12 = wl_measures_readability.aari(main, test_text_eng_12) print('Automated Arabic Readability Index:') print(f'\tara/0: {aari_ara_0}') @@ -81,10 +83,10 @@ def test_automated_ara_readability_index(): assert aari_ara_12 == 3.28 * 46 + 1.43 * (46 / 12) + 1.24 * (12 / 3) assert aari_eng_12 == 'no_support' -def test_automated_readability_index(): - ari_eng_0 = wl_measures_readability.automated_readability_index(main, test_text_eng_0) - ari_eng_12 = wl_measures_readability.automated_readability_index(main, test_text_eng_12) - ari_spa_12 = wl_measures_readability.automated_readability_index(main, test_text_spa_12) +def test_ari(): + ari_eng_0 = wl_measures_readability.ari(main, test_text_eng_0) + ari_eng_12 = wl_measures_readability.ari(main, test_text_eng_12) + ari_spa_12 = wl_measures_readability.ari(main, test_text_spa_12) print('Automated Readability Index:') print(f'\teng/0: {ari_eng_0}') @@ -224,6 +226,21 @@ def test_danielson_bryans_readability_formula(): assert danielson_bryan_eng_12_1 == 1.0364 * (47 / (12 - 1)) + 0.0194 * (47 / 3) - 0.6059 assert danielson_bryan_eng_12_2 == danielson_bryan_other_12 == 131.059 - 10.364 * (47 / (12 - 1)) - 0.194 * (47 / 3) +def test_drp(): + drp_eng_0 = wl_measures_readability.drp(main, test_text_eng_0) + drp_eng_12 = wl_measures_readability.drp(main, test_text_eng_12) + drp_other_12 = wl_measures_readability.drp(main, test_text_other_12) + + print('Degrees of Reading Power:') + print(f'\teng/0: {drp_eng_0}') + print(f'\teng/12: {drp_eng_12}') + print(f'\tother/12: {drp_other_12}') + + assert drp_eng_0 == 'text_too_short' + m = wl_measures_readability.bormuths_cloze_mean(main, test_text_eng_12) + assert drp_eng_12 == 100 - math.floor(m * 100 + 0.5) + assert drp_other_12 == 'no_support' + def test_devereux_readability_index(): grade_placement_eng_0 = wl_measures_readability.devereux_readability_index(main, test_text_eng_0) grade_placement_eng_12 = wl_measures_readability.devereux_readability_index(main, test_text_eng_12) @@ -237,11 +254,11 @@ def test_devereux_readability_index(): assert grade_placement_eng_0 == 'text_too_short' assert grade_placement_eng_12 == grade_placement_spa_12 == 1.56 * (47 / 12) + 0.19 * (12 / 3) - 6.49 -def test_flesch_kincaid_grade_level(): - gl_eng_0 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_eng_0) - gl_eng_12 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_eng_12) - gl_spa_12 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_spa_12) - gl_other_12 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_other_12) +def test_gl(): + gl_eng_0 = wl_measures_readability.gl(main, test_text_eng_0) + gl_eng_12 = wl_measures_readability.gl(main, test_text_eng_12) + gl_spa_12 = wl_measures_readability.gl(main, test_text_spa_12) + gl_other_12 = wl_measures_readability.gl(main, test_text_other_12) print('Flesch-Kincaid Grade Level:') print(f'\teng/0: {gl_eng_0}') @@ -254,27 +271,27 @@ def test_flesch_kincaid_grade_level(): assert gl_spa_12 != 'no_support' assert gl_other_12 == 'no_support' -def test_flesch_reading_ease(): - flesch_re_eng_0 = wl_measures_readability.flesch_reading_ease(main, test_text_eng_0) - flesch_re_eng_12 = wl_measures_readability.flesch_reading_ease(main, test_text_eng_12) +def test_re_flesch(): + flesch_re_eng_0 = wl_measures_readability.re_flesch(main, test_text_eng_0) + flesch_re_eng_12 = wl_measures_readability.re_flesch(main, test_text_eng_12) settings['re']['variant_nld'] = 'Douma' - flesch_re_nld_12_douma = wl_measures_readability.flesch_reading_ease(main, test_text_nld_12) + flesch_re_nld_12_douma = wl_measures_readability.re_flesch(main, test_text_nld_12) settings['re']['variant_nld'] = "Brouwer's Leesindex A" - flesch_re_nld_12_brouwer = wl_measures_readability.flesch_reading_ease(main, test_text_nld_12) + flesch_re_nld_12_brouwer = wl_measures_readability.re_flesch(main, test_text_nld_12) - flesch_re_fra_12 = wl_measures_readability.flesch_reading_ease(main, test_text_fra_12) - flesch_re_deu_12 = wl_measures_readability.flesch_reading_ease(main, test_text_deu_12) - flesch_re_ita_12 = wl_measures_readability.flesch_reading_ease(main, test_text_ita_12) - flesch_re_rus_12 = wl_measures_readability.flesch_reading_ease(main, test_text_rus_12) + flesch_re_fra_12 = wl_measures_readability.re_flesch(main, test_text_fra_12) + flesch_re_deu_12 = wl_measures_readability.re_flesch(main, test_text_deu_12) + flesch_re_ita_12 = wl_measures_readability.re_flesch(main, test_text_ita_12) + flesch_re_rus_12 = wl_measures_readability.re_flesch(main, test_text_rus_12) settings['re']['variant_spa'] = 'Fernández Huerta' - flesch_re_spa_12_fh = wl_measures_readability.flesch_reading_ease(main, test_text_spa_12) + flesch_re_spa_12_fh = wl_measures_readability.re_flesch(main, test_text_spa_12) settings['re']['variant_spa'] = 'Szigriszt Pazos' - flesch_re_spa_12_sp = wl_measures_readability.flesch_reading_ease(main, test_text_spa_12) + flesch_re_spa_12_sp = wl_measures_readability.re_flesch(main, test_text_spa_12) - flesch_re_afr_12 = wl_measures_readability.flesch_reading_ease(main, test_text_afr_12) - flesch_re_other_12 = wl_measures_readability.flesch_reading_ease(main, test_text_other_12) + flesch_re_afr_12 = wl_measures_readability.re_flesch(main, test_text_afr_12) + flesch_re_other_12 = wl_measures_readability.re_flesch(main, test_text_other_12) print('Flesch Reading Ease:') print(f'\teng/0: {flesch_re_eng_0}') @@ -303,11 +320,11 @@ def test_flesch_reading_ease(): assert flesch_re_afr_12 == 206.835 - 0.846 * (18 / 12 * 100) - 1.015 * (12 / 3) assert flesch_re_other_12 == 'no_support' -def test_flesch_reading_ease_simplified(): - flesch_re_simplified_eng_0 = wl_measures_readability.flesch_reading_ease_simplified(main, test_text_eng_0) - flesch_re_simplified_eng_12 = wl_measures_readability.flesch_reading_ease_simplified(main, test_text_eng_12) - flesch_re_simplified_spa_12 = wl_measures_readability.flesch_reading_ease_simplified(main, test_text_spa_12) - flesch_re_simplified_other_12 = wl_measures_readability.flesch_reading_ease_simplified(main, test_text_other_12) +def test_re_simplified(): + flesch_re_simplified_eng_0 = wl_measures_readability.re_simplified(main, test_text_eng_0) + flesch_re_simplified_eng_12 = wl_measures_readability.re_simplified(main, test_text_eng_12) + flesch_re_simplified_spa_12 = wl_measures_readability.re_simplified(main, test_text_spa_12) + flesch_re_simplified_other_12 = wl_measures_readability.re_simplified(main, test_text_other_12) print('Flesch Reading Ease (Simplified):') print(f'\teng/0: {flesch_re_simplified_eng_0}') @@ -319,11 +336,11 @@ def test_flesch_reading_ease_simplified(): assert flesch_re_simplified_eng_12 == flesch_re_simplified_spa_12 == 1.599 * (9 / 12 * 100) - 1.015 * (12 / 3) - 31.517 assert flesch_re_simplified_other_12 == 'no_support' -def test_forcast_grade_level(): - rgl_eng_12 = wl_measures_readability.forcast_grade_level(main, test_text_eng_12) - rgl_eng_150 = wl_measures_readability.forcast_grade_level(main, test_text_eng_150) - rgl_spa_150 = wl_measures_readability.forcast_grade_level(main, test_text_spa_150) - rgl_other_12 = wl_measures_readability.forcast_grade_level(main, test_text_other_12) +def test_rgl(): + rgl_eng_12 = wl_measures_readability.rgl(main, test_text_eng_12) + rgl_eng_150 = wl_measures_readability.rgl(main, test_text_eng_150) + rgl_spa_150 = wl_measures_readability.rgl(main, test_text_spa_150) + rgl_other_12 = wl_measures_readability.rgl(main, test_text_other_12) print('FORCAST Grade Level:') print(f'\teng/12: {rgl_eng_12}') @@ -335,10 +352,10 @@ def test_forcast_grade_level(): assert rgl_eng_150 == rgl_spa_150 == 20.43 - 0.11 * (6 * 18 + 4) assert rgl_other_12 == 'no_support' -def test_formula_de_comprensibilidad_de_gutierrez_de_polini(): - cp_spa_0 = wl_measures_readability.formula_de_comprensibilidad_de_gutierrez_de_polini(main, test_text_spa_0) - cp_spa_12 = wl_measures_readability.formula_de_comprensibilidad_de_gutierrez_de_polini(main, test_text_spa_12) - cp_eng_12 = wl_measures_readability.formula_de_comprensibilidad_de_gutierrez_de_polini(main, test_text_eng_12) +def test_cp(): + cp_spa_0 = wl_measures_readability.cp(main, test_text_spa_0) + cp_spa_12 = wl_measures_readability.cp(main, test_text_spa_12) + cp_eng_12 = wl_measures_readability.cp(main, test_text_eng_12) print('Fórmula de Comprensibilidad de Gutiérrez de Polini:') print(f'\tspa/0: {cp_spa_0}') @@ -377,11 +394,11 @@ def test_gulpease_index(): assert gulpease_index_ita_12 == 89 + (300 * 3 - 10 * 45) / 12 assert gulpease_index_eng_12 == 'no_support' -def test_gunning_fog_index(): - fog_index_eng_0 = wl_measures_readability.gunning_fog_index(main, test_text_eng_0) - fog_index_eng_12_propn = wl_measures_readability.gunning_fog_index(main, test_text_eng_12_propn) - fog_index_pol_12 = wl_measures_readability.gunning_fog_index(main, test_text_pol_12) - fog_index_spa_12 = wl_measures_readability.gunning_fog_index(main, test_text_spa_12) +def test_fog_index(): + fog_index_eng_0 = wl_measures_readability.fog_index(main, test_text_eng_0) + fog_index_eng_12_propn = wl_measures_readability.fog_index(main, test_text_eng_12_propn) + fog_index_pol_12 = wl_measures_readability.fog_index(main, test_text_pol_12) + fog_index_spa_12 = wl_measures_readability.fog_index(main, test_text_spa_12) print('Gunning Fog Index:') print(f'\teng/0: {fog_index_eng_0}') @@ -394,10 +411,10 @@ def test_gunning_fog_index(): assert fog_index_pol_12 == 0.4 * (12 / 3 + 1 / 12 * 100) assert fog_index_spa_12 == 'no_support' -def test_legibility_mu(): - mu_spa_0 = wl_measures_readability.legibility_mu(main, test_text_spa_0) - mu_spa_12 = wl_measures_readability.legibility_mu(main, test_text_spa_12) - mu_eng_12 = wl_measures_readability.legibility_mu(main, test_text_eng_12) +def test_mu(): + mu_spa_0 = wl_measures_readability.mu(main, test_text_spa_0) + mu_spa_12 = wl_measures_readability.mu(main, test_text_spa_12) + mu_eng_12 = wl_measures_readability.mu(main, test_text_eng_12) print('Legibilidad µ:') print(f'\tspa/0: {mu_spa_0}') @@ -439,10 +456,10 @@ def test_lix(): assert lix_eng_12 == 12 / 3 + 100 * (3 / 12) assert lix_spa_12 != 'no_support' -def test_mcalpine_eflaw(): - eflaw_eng_0 = wl_measures_readability.mcalpine_eflaw(main, test_text_eng_0) - eflaw_eng_12 = wl_measures_readability.mcalpine_eflaw(main, test_text_eng_12) - eflaw_spa_12 = wl_measures_readability.mcalpine_eflaw(main, test_text_spa_12) +def test_eflaw(): + eflaw_eng_0 = wl_measures_readability.eflaw(main, test_text_eng_0) + eflaw_eng_12 = wl_measures_readability.eflaw(main, test_text_eng_12) + eflaw_spa_12 = wl_measures_readability.eflaw(main, test_text_spa_12) print('McAlpine EFLAW Readability Score:') print(f'\teng/0: {eflaw_eng_0}') @@ -511,17 +528,17 @@ def test_spache_grade_level(): assert grade_level_eng_100 == numpy.mean([0.141 * (100 / 25) + 0.086 * (25 / 100 * 100) + 0.839] * 3) assert grade_level_spa_12 == 'no_support' -def test_wiener_sachtextformel(): - wstf_deu_0 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_0) +def test_wstf(): + wstf_deu_0 = wl_measures_readability.wstf(main, test_text_deu_0) settings['wstf']['variant'] = '1' - wstf_deu_12_1 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12) + wstf_deu_12_1 = wl_measures_readability.wstf(main, test_text_deu_12) settings['wstf']['variant'] = '2' - wstf_deu_12_2 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12) + wstf_deu_12_2 = wl_measures_readability.wstf(main, test_text_deu_12) settings['wstf']['variant'] = '3' - wstf_deu_12_3 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12) + wstf_deu_12_3 = wl_measures_readability.wstf(main, test_text_deu_12) settings['wstf']['variant'] = '4' - wstf_deu_12_4 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12) - wstf_eng_12 = wl_measures_readability.wiener_sachtextformel(main, test_text_eng_12) + wstf_deu_12_4 = wl_measures_readability.wstf(main, test_text_deu_12) + wstf_eng_12 = wl_measures_readability.wstf(main, test_text_eng_12) print('Wiener Sachtextformel:') print(f'\tdeu/0: {wstf_deu_0}') @@ -544,8 +561,8 @@ def test_wiener_sachtextformel(): assert wstf_eng_12 == 'no_support' if __name__ == '__main__': - test_automated_ara_readability_index() - test_automated_readability_index() + test_aari() + test_ari() test_bormuths_cloze_mean() test_bormuths_gp() test_coleman_liau_index() @@ -553,21 +570,22 @@ def test_wiener_sachtextformel(): test_dale_chall_readability_formula() test_dale_chall_readability_formula_new() test_danielson_bryans_readability_formula() + test_drp() test_devereux_readability_index() - test_flesch_kincaid_grade_level() - test_flesch_reading_ease() - test_flesch_reading_ease_simplified() - test_forcast_grade_level() - test_formula_de_comprensibilidad_de_gutierrez_de_polini() + test_gl() + test_re_flesch() + test_re_simplified() + test_rgl() + test_cp() test_formula_de_crawford() test_gulpease_index() - test_gunning_fog_index() - test_legibility_mu() + test_fog_index() + test_mu() test_lensear_write() test_lix() - test_mcalpine_eflaw() + test_eflaw() test_osman() test_rix() test_smog_grade() test_spache_grade_level() - test_wiener_sachtextformel() + test_wstf() diff --git a/tests/wl_tests_work_area/test_profiler.py b/tests/wl_tests_work_area/test_profiler.py index 8d70ff5ed..bd882fb3d 100644 --- a/tests/wl_tests_work_area/test_profiler.py +++ b/tests/wl_tests_work_area/test_profiler.py @@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files): count_tokens_lens_syls.append(collections.Counter(len_tokens_syls)) count_tokens_lens_chars.append(collections.Counter(len_tokens_chars)) - assert len(readability_statistics) == 26 + assert len(readability_statistics) == 28 # Counts assert count_paras diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index dab05e257..7374139c5 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -17,10 +17,12 @@ # ---------------------------------------------------------------------- import bisect +import math import random import re import numpy + from wordless.wl_checks import wl_checks_tokens from wordless.wl_nlp import wl_pos_tagging, wl_syl_tokenization from wordless.wl_utils import wl_misc, wl_paths @@ -128,7 +130,7 @@ def get_count_words_dale(words, num_easy_words): # Automated Arabic Readability Index # Reference: Al-Tamimi, A., Jaradat M., Aljarrah, N., & Ghanim, S. (2013). AARI: Automatic Arabic readability index. The International Arab Journal of Information Technology, 11(4), pp. 370–378. -def automated_ara_readability_index(main, text): +def aari(main, text): if text.lang == 'ara': text = get_counts(main, text) @@ -147,7 +149,7 @@ def automated_ara_readability_index(main, text): # Automated Readability Index # Reference: Smith, E. A., & Senter, R. J. (1967). Automated readability index. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf -def automated_readability_index(main, text): +def ari(main, text): text = get_counts(main, text) if text.count_sentences and text.count_words: @@ -185,20 +187,17 @@ def bormuths_cloze_mean(main, text): return m def bormuths_gp(main, text): - if text.lang.startswith('eng_'): - m = bormuths_cloze_mean(main, text) - c = main.settings_custom['measures']['readability']['bormuths_gp']['cloze_criterion_score'] / 100 + m = bormuths_cloze_mean(main, text) - if m == 'text_too_short': - gp = m - else: - gp = ( - 4.275 + 12.881 * m - 34.934 * (m**2) + 20.388 * (m**3) - + 26.194 * c - 2.046 * (c**2) - 11.767 * (c**3) - - 44.285 * (m * c) + 97.620 * ((m * c)**2) - 59.538 * ((m * c)**3) - ) + if m not in ['no_support', 'text_too_short']: + c = main.settings_custom['measures']['readability']['bormuths_gp']['cloze_criterion_score'] / 100 + gp = ( + 4.275 + 12.881 * m - 34.934 * (m**2) + 20.388 * (m**3) + + 26.194 * c - 2.046 * (c**2) - 11.767 * (c**3) + - 44.285 * (m * c) + 97.620 * ((m * c)**2) - 59.538 * ((m * c)**3) + ) else: - gp = 'no_support' + gp = m return gp @@ -336,6 +335,20 @@ def danielson_bryans_readability_formula(main, text): return danielson_bryan +# Degrees of Reading Power +# References: +# College Entrance Examination Board. (1981). Degrees of reading power brings the students and the text together. +# Carver, R. P. (1985). Measuring readability using DRP units. Journal of Reading Behavior, 17(4), 303–316. https://doi.org/10.1080/10862968509547547 +def drp(main, text): + m = bormuths_cloze_mean(main, text) + + if m not in ['no_support', 'text_too_short']: + drp = 100 - math.floor(m * 100 + 0.5) + else: + drp = m + + return drp + # Devereux Readability Index # Reference: Smith, E. A. (1961). Devereaux readability index. Journal of Educational Research, 54(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728 def devereux_readability_index(main, text): @@ -354,7 +367,7 @@ def devereux_readability_index(main, text): # Flesch-Kincaid Grade Level # Reference: Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel. Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf -def flesch_kincaid_grade_level(main, text): +def gl(main, text): if text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) @@ -394,7 +407,7 @@ def flesch_kincaid_grade_level(main, text): # Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf # Spanish variant (Szigriszt Pazos): # Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y -def flesch_reading_ease(main, text): +def re_flesch(main, text): if text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) @@ -464,7 +477,7 @@ def flesch_reading_ease(main, text): # Flesch Reading Ease (Simplified) # Reference: Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. Journal of Applied Psychology, 35(5), 333–337. https://doi.org/10.1037/h0062427 -def flesch_reading_ease_simplified(main, text): +def re_simplified(main, text): if text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) @@ -485,7 +498,7 @@ def flesch_reading_ease_simplified(main, text): # FORCAST Grade Level # Reference: Caylor, J. S., Sticht, T. G., Fox, L. C., & Ford, J. P. (1973). Methodologies for determining reading requirements of military occupational specialties. Human Resource Research Organization. https://files.eric.ed.gov/fulltext/ED074343.pdf -def forcast_grade_level(main, text): +def rgl(main, text): if text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) @@ -506,7 +519,7 @@ def forcast_grade_level(main, text): # References: # Gutiérrez de Polini, L. E. (1972). Investigación sobre lectura en Venezuela [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela. # Rodríguez Trujillo, N. (1980). Determinación de la comprensibilidad de materiales de lectura por medio de variables lingüísticas. Lectura y Vida, 1(1). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a1n1/01_01_Rodriguez.pdf -def formula_de_comprensibilidad_de_gutierrez_de_polini(main, text): +def cp(main, text): if text.lang == 'spa': text = get_counts(main, text) @@ -567,7 +580,7 @@ def gulpease_index(main, text): # Gunning, R. (1968). The technique of clear writing (revised ed.). McGraw-Hill Book Company. # Polish variant: # Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. Zeszyty Prasoznawcze, 4(42), 35–48. -def gunning_fog_index(main, text): +def fog_index(main, text): if text.lang.startswith('eng_') or text.lang == 'pol' and text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) @@ -604,7 +617,7 @@ def gunning_fog_index(main, text): # Legibilidad µ # Reference: Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile, 21(2), 13–26. -def legibility_mu(main, text): +def mu(main, text): if text.lang == 'spa': text = get_counts(main, text) @@ -689,7 +702,7 @@ def lix(main, text): # McAlpine EFLAW Readability Score # Reference: Nirmaldasan. (2009, April 30). McAlpine EFLAW readability score. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/ -def mcalpine_eflaw(main, text): +def eflaw(main, text): if text.lang.startswith('eng_'): text = get_counts(main, text) @@ -872,7 +885,7 @@ def spache_grade_level(main, text): # References: # Bamberger, R., & Vanecek, E. (1984). Lesen – Verstehen – Lernen – Schreiben. Jugend und Volk. # Lesbarkeitsindex. (2022, July 21). In Wikipedia. https://de.wikipedia.org/w/index.php?title=Lesbarkeitsindex&oldid=224664667 -def wiener_sachtextformel(main, text): +def wstf(main, text): if text.lang.startswith('deu_') and text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) diff --git a/wordless/wl_profiler.py b/wordless/wl_profiler.py index 0fb23a07d..f39a9402f 100644 --- a/wordless/wl_profiler.py +++ b/wordless/wl_profiler.py @@ -366,6 +366,7 @@ def __init__(self, parent): _tr('wl_profiler', 'Dale-Chall Readability Formula'), _tr('wl_profiler', 'Dale-Chall Readability Formula (New)'), _tr('wl_profiler', "Danielson-Bryan's Readability Formula"), + _tr('wl_profiler', 'Degrees of Reading Power'), _tr('wl_profiler', 'Devereaux Readability Index'), _tr('wl_profiler', 'Flesch-Kincaid Grade Level'), _tr('wl_profiler', 'Flesch Reading Ease'), @@ -1176,8 +1177,8 @@ def run(self): # Readability if self.profiler_tab in ['readability', 'all']: readability_stats = [ - wl_measures_readability.automated_ara_readability_index(self.main, text), - wl_measures_readability.automated_readability_index(self.main, text), + wl_measures_readability.aari(self.main, text), + wl_measures_readability.ari(self.main, text), wl_measures_readability.bormuths_cloze_mean(self.main, text), wl_measures_readability.bormuths_gp(self.main, text), wl_measures_readability.coleman_liau_index(self.main, text), @@ -1185,24 +1186,25 @@ def run(self): wl_measures_readability.dale_chall_readability_formula(self.main, text), wl_measures_readability.dale_chall_readability_formula_new(self.main, text), wl_measures_readability.danielson_bryans_readability_formula(self.main, text), + wl_measures_readability.drp(self.main, text), wl_measures_readability.devereux_readability_index(self.main, text), - wl_measures_readability.flesch_kincaid_grade_level(self.main, text), - wl_measures_readability.flesch_reading_ease(self.main, text), - wl_measures_readability.flesch_reading_ease_simplified(self.main, text), - wl_measures_readability.forcast_grade_level(self.main, text), - wl_measures_readability.formula_de_comprensibilidad_de_gutierrez_de_polini(self.main, text), + wl_measures_readability.gl(self.main, text), + wl_measures_readability.re_flesch(self.main, text), + wl_measures_readability.re_simplified(self.main, text), + wl_measures_readability.rgl(self.main, text), + wl_measures_readability.cp(self.main, text), wl_measures_readability.formula_de_crawford(self.main, text), wl_measures_readability.gulpease_index(self.main, text), - wl_measures_readability.gunning_fog_index(self.main, text), - wl_measures_readability.legibility_mu(self.main, text), + wl_measures_readability.fog_index(self.main, text), + wl_measures_readability.mu(self.main, text), wl_measures_readability.lensear_write(self.main, text), wl_measures_readability.lix(self.main, text), - wl_measures_readability.mcalpine_eflaw(self.main, text), + wl_measures_readability.eflaw(self.main, text), wl_measures_readability.osman(self.main, text), wl_measures_readability.rix(self.main, text), wl_measures_readability.smog_grade(self.main, text), wl_measures_readability.spache_grade_level(self.main, text), - wl_measures_readability.wiener_sachtextformel(self.main, text) + wl_measures_readability.wstf(self.main, text) ] else: readability_stats = None