From 6e944a505020111eb6cf1a02a983a70286381dc5 Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Wed, 2 Aug 2023 15:56:11 +0800 Subject: [PATCH] Work Area: Add Profiler - Readability - Easy Listening Formula --- CHANGELOG.md | 2 +- doc/doc_eng.md | 154 +++++++++--------- .../dispersion_adjusted_frequency/ald.svg | 130 +++++++-------- .../dispersion_adjusted_frequency/arf.svg | 92 ++++++----- .../dispersion_adjusted_frequency/awt.svg | 116 ++++++------- .../carrolls_um.svg | 114 ++++++------- .../engwalls_fm.svg | 35 ++-- .../griess_dp.svg | 125 +++++++------- .../juillands_u.svg | 112 ++++++------- .../kromers_ur.svg | 62 +++---- .../lynes_d3.svg | 80 ++++----- .../rosengrens_s.svg | 83 +++++----- .../zhangs_distributional_consistency.svg | 72 ++++---- doc/measures/readability/elf.svg | 61 +++++++ .../test_measures_readability.py | 18 ++ tests/wl_tests_work_area/test_profiler.py | 2 +- .../wl_measures/wl_measures_readability.py | 19 ++- wordless/wl_profiler.py | 2 + 18 files changed, 690 insertions(+), 589 deletions(-) create mode 100644 doc/measures/readability/elf.svg diff --git a/CHANGELOG.md b/CHANGELOG.md index bbaa2f107..bcc3e469c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ - Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser - Utils: Add spaCy's Malay word tokenizer - Utils: Add spaCy's Slovenian sentence recognizer, part-of-speech tagger, lemmatizer, and dependency parser -- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Dale-Chall Readability Formula (New) / Danielson-Bryan's Readability Formula / Degrees of Reading Power +- Work Area: Add Profiler - Readability - Bormuth's Cloze Mean / Bormuth's Grade Placement / Coleman's Readability Formula / Dale-Chall Readability Formula (New) / Danielson-Bryan's Readability Formula / Degrees of Reading Power / Easy Listening Formula ### ✨ Improvements - Utils: Update Wordless's sentence and sentence segment splitters diff --git a/doc/doc_eng.md b/doc/doc_eng.md index 5ac3ccbcd..076006786 100644 --- a/doc/doc_eng.md +++ b/doc/doc_eng.md @@ -941,6 +941,9 @@ Degrees of Reading Power: Devereux Readability Index: \text{Grade Placement} = 1.56 \times \frac{\text{NumCharsAll}}{\text{NumWords}} + 0.19 \times \frac{\text{NumWords}}{\text{NumSentences}} - 6.49 +Easy Listening Formula: + \text{ELF} = \frac{\text{NumSyls} - \text{NumWords}}{\text{NumSentences}} + Flesch-Kincaid Grade Level: \text{GL} = 0.39 \times \frac{\text{NumWords}}{\text{NumSentences}} + 11.8 \times \frac{\text{NumSyls}}{\text{NumWords}} - 15.59 @@ -1023,6 +1026,7 @@ Measure of Readability|Formula Danielson-Bryan's Readability Formula
([Danielson & Bryan, 1963](#ref-danielson-bryan-1963))|![Formula](/doc/measures/readability/danielson_bryans_readability_formula.svg)

* This measure has 2 variants, which you could select via **Menu → Preferences → Settings → Measures → Readability → Danielson-Bryan's Readability Formula → Variant**. Degrees of Reading Power
([College Entrance Examination Board, 1981](#ref-college-entrance-examination-board-1981))|![Formula](/doc/measures/readability/drp.svg)
where **M** is *Bormuth's cloze mean*.

* This measure applies only to **English texts**. Devereux Readability Index
([Smith, 1961](#ref-smith-1961))|![Formula](/doc/measures/readability/devereux_readability_index.svg) +Easy Listening Formula¹
([Fang, 1966](#ref-fang-1966))|![Formula](/doc/measures/readability/elf.svg) Flesch-Kincaid Grade Level¹
([Kincaid et al., 1975](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/gl.svg) Flesch Reading Ease¹
([Flesch, 1948](#ref-flesch-1948)
Dutch: [Douma, 1960](#ref-douma-1960); [Brouwer, 1963](#ref-brouwer-1963)
French: [Kandel & Moles, 1958](#ref-kandel-moles-1958)
German: [Amstad, 1978](#ref-amstad-1978)
Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986)
Russian: [Oborneva, 2006](#ref-oborneva-2006)
Spanish: [Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/re.svg)

* This measure has multiple variants for some languages, which you could select via **Menu → Preferences → Settings → Measures → Readability → Flesch Reading Ease**. Flesch Reading Ease (Simplified)¹
([Farr et al., 1951](#ref-farr-et-al-1951))|![Formula](/doc/measures/readability/re_simplified.svg) @@ -1056,68 +1060,60 @@ Then, the dispersion and adjusted frequency of the word are calculated as follow Measure of Dispersion (Parts-based)|Measure of Adjusted Frequency (Parts-based)|Formula @@ -1428,95 +1424,97 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille [29] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
[30] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
+ +[31] [**^**](#ref-elf) Fang, I. E. (1966). The easy listening formula. *Journal of Broadcasting*, *11*(1), 63–68. https://doi.org/10.1080/08838156609363529
-[31] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
+[32] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
-[32] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
+[33] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
-[33] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
+[34] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
-[34] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
+[35] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
-[35] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
+[36] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
-[36] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
+[37] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
-[37] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
+[38] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
-[38] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
+[39] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
-[39] [**^**](#ref-cp) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
+[40] [**^**](#ref-cp) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
-[40] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
+[41] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
-[41] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
+[42] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
-[42] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
+[43] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
-[43] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
+[44] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
-[44] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
+[45] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
-[45] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
+[46] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
-[46] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
+[47] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
-[47] [**^**](#ref-gl) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
+[48] [**^**](#ref-gl) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
-[48] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
+[49] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
-[49] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
+[50] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
-[50] [**^**](#ref-colemans-readability-formula) Liau, T. L., Bassin, C. B., Martin, C. J., & Coleman, E. B. (1976). Modification of the Coleman readability formulas. *Journal of Reading Behavior*, *8*(4), 381–386. https://journals.sagepub.com/doi/pdf/10.1080/10862967609547193
+[51] [**^**](#ref-colemans-readability-formula) Liau, T. L., Bassin, C. B., Martin, C. J., & Coleman, E. B. (1976). Modification of the Coleman readability formulas. *Journal of Reading Behavior*, *8*(4), 381–386. https://journals.sagepub.com/doi/pdf/10.1080/10862967609547193
-[51] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
+[52] [**^**](#ref-griess-dp-norm) Lijffijt, J., & Gries, S. T. (2012). Correction to Stefan Th. Gries’ “dispersions and adjusted frequencies in corpora”. *International Journal of Corpus Linguistics*, *17*(1), 147–149. https://doi.org/10.1075/ijcl.17.1.08lij
-[52] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
+[53] [**^**](#ref-gulpease-index) Lucisano, P., & Emanuela Piemontese, M. (1988). GULPEASE: A formula for the prediction of the difficulty of texts in Italian. *Scuola e Città*, *39*(3), pp. 110–124.
-[53] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
+[54] [**^**](#ref-lynes-d3) Lyne, A. A. (1985). Dispersion. In *The vocabulary of French business correspondence: Word frequencies, collocations, and problems of lexicometric method* (pp. 101–124). Slatkine/Champion.
-[54] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
+[55] [**^**](#ref-smog-grade) McLaughlin, G. H. (1969). SMOG grading: A new readability formula. *Journal of Reading*, *12*(8), pp. 639–646.
-[55] [**^**](#ref-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
+[56] [**^**](#ref-mu) Muñoz Baquedano, M. (2006). Legibilidad y variabilidad de los textos. *Boletín de Investigación Educacional, Pontificia Universidad Católica de Chile*, *21*(2), 13–26.
-[56] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
+[57] [**^**](#ref-eflaw) Nirmaldasan. (2009, April 30). *McAlpine EFLAW readability score*. Readability Monitor. Retrieved November 15, 2022, from https://strainindex.wordpress.com/2009/04/30/mcalpine-eflaw-readability-score/
-[57] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
+[58] [**^**](#ref-pearsons-chi-squared-test) Oakes, M. P. (1998). *Statistics for Corpus Linguistics*. Edinburgh University Press.
-[58] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
+[59] [**^**](#ref-re) Oborneva, I. V. (2006). *Автоматизированная оценка сложности учебных текстов на основе статистических параметров* [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3
-[59] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
+[60] [**^**](#ref-lensear-write) O’Hayre, J. (1966). *Gobbledygook has gotta go*. U.S. Government Printing Office. https://www.governmentattic.org/15docs/Gobbledygook_Has_Gotta_Go_1966.pdf
-[60] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
+[61] [**^**](#ref-students-t-test-2-sample) Paquot, M., & Bestgen, Y. (2009). Distinctive words in academic writing: A comparison of three statistical tests for keyword extraction. *Language and Computers*, *68*, 247–269.
-[61] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
+[62] [**^**](#ref-fishers-exact-test) Pedersen, T. (1996). Fishing for exactness. In T. Winn (Ed.), *Proceedings of the Sixth Annual South-Central Regional SAS Users' Group Conference* (pp. 188–200). The South–Central Regional SAS Users' Group.
-[62] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
+[63] [**^**](#ref-min-sensitivity) Pedersen, T. (1998). Dependent bigram identification. In *Proceedings of the Fifteenth National Conference on Artificial Intelligence* (p. 1197). AAAI Press.
-[63] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
+[64] [**^**](#ref-fog-index) Pisarek, W. (1969). Jak mierzyć zrozumiałość tekstu?. *Zeszyty Prasoznawcze*, *4*(42), 35–48.
-[64] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
+[65] [**^**](#ref-odds-ratio) Pojanapunya, P., & Todd, R. W. (2016). Log-likelihood and odds ratio keyness statistics for different purposes of keyword analysis. *Corpus Linguistics and Linguistic Theory*, *15*(1), pp. 133–167. https://doi.org/10.1515/cllt-2015-0030
-[65] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
+[66] [**^**](#ref-poisson-collocation-measure) Quasthoff, U., & Wolff, C. (2002). The poisson collocation measure and its applications. *Proceedings of 2nd International Workshop on Computational Approaches to Collocations*. IEEE.
-[66] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
+[67] [**^**](#ref-rosengrens-s)[**^**](#ref-rosengrens-kf) Rosengren, I. (1971). The quantitative concept of language and its relation to the structure of frequency dictionaries. *Études de linguistique appliquée*, *1*, 103–127.
-[67] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
+[68] [**^**](#ref-log-dice) Rychlý, P. (2008). A lexicographyer-friendly association score. In P. Sojka & A. Horák (Eds.), *Proceedings of Second Workshop on Recent Advances in Slavonic Natural Languages Processing*. Masaryk University
-[68] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124
+[69] [**^**](#ref-ald) [**^**](#ref-fald) [**^**](#ref-arf) [**^**](#ref-farf) [**^**](#ref-awt) [**^**](#ref-fawt) Savický, P., & Hlaváčová, J. (2002). Measures of word commonness. *Journal of Quantitative Linguistics*, *9*(3), 215–231. https://doi.org/10.1076/jqul.9.3.215.14124
-[69] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
+[70] [**^**](#ref-dices-coeff) Smadja, F., McKeown, K. R., & Hatzivassiloglou, V. (1996). Translating collocations for bilingual lexicons: A statistical approach. *Computational Linguistics*, *22*(1), pp. 1–38.
-[70] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
+[71] [**^**](#ref-devereux-readability-index) Smith, E. A. (1961). Devereaux readability index. *Journal of Educational Research*, *54*(8), 298–303. https://doi.org/10.1080/00220671.1961.10882728
-[71] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
+[72] [**^**](#ref-ari) Smith, E. A., & Senter, R. J. (1967). *Automated readability index*. Aerospace Medical Research Laboratories. https://apps.dtic.mil/sti/pdfs/AD0667273.pdf
-[72] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
+[73] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
-[73] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
+[74] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
-[74] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
+[75] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
-[75] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
+[76] [**^**](#ref-log-likehood-ratio-test-bayes-factor)[**^**](#ref-students-t-test-2-sample-bayes-factor) Wilson, A. (2013). Embracing Bayes Factors for key item analysis in corpus linguistics. In M. Bieswanger & A. Koll-Stobbe (Eds.), *New Approaches to the Study of Linguistic Variability* (pp. 3–11). Peter Lang.
-[76] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
+[77] [**^**](#ref-zhangs-distributional-consistency) Zhang, H., Huang, C., & Yu, S. (2004). Distributional consistency: As a general method for defining a core lexicon. In M. T. Lino, M. F. Xavier, F. Ferreira, R. Costa, & R. Silva (Eds.), *Proceedings of Fourth International Conference on Language Resources and Evaluation* (pp. 1119–1122). European Language Resources Association.
diff --git a/doc/measures/dispersion_adjusted_frequency/ald.svg b/doc/measures/dispersion_adjusted_frequency/ald.svg index 89ffed76c..d3d00dd55 100644 --- a/doc/measures/dispersion_adjusted_frequency/ald.svg +++ b/doc/measures/dispersion_adjusted_frequency/ald.svg @@ -1,90 +1,90 @@ - - + + + + + - - - - - - + + + + + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/arf.svg b/doc/measures/dispersion_adjusted_frequency/arf.svg index 4a20e5024..e5e0f25d8 100644 --- a/doc/measures/dispersion_adjusted_frequency/arf.svg +++ b/doc/measures/dispersion_adjusted_frequency/arf.svg @@ -1,58 +1,60 @@ - - + + - - - - - - - - - + + + + + + + + + + + - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/awt.svg b/doc/measures/dispersion_adjusted_frequency/awt.svg index e09785195..6bb8faa39 100644 --- a/doc/measures/dispersion_adjusted_frequency/awt.svg +++ b/doc/measures/dispersion_adjusted_frequency/awt.svg @@ -1,71 +1,71 @@ - - + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/carrolls_um.svg b/doc/measures/dispersion_adjusted_frequency/carrolls_um.svg index c4fab5579..1d76e86c2 100644 --- a/doc/measures/dispersion_adjusted_frequency/carrolls_um.svg +++ b/doc/measures/dispersion_adjusted_frequency/carrolls_um.svg @@ -1,77 +1,77 @@ - - + + + - - - - + + + - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg b/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg index 4ff76ef2e..725fd2b01 100644 --- a/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg +++ b/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg @@ -1,22 +1,23 @@ - - + + - - - - - - + + + + + + + - - - - - - - - - + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/griess_dp.svg b/doc/measures/dispersion_adjusted_frequency/griess_dp.svg index d79a847b5..1725ab909 100644 --- a/doc/measures/dispersion_adjusted_frequency/griess_dp.svg +++ b/doc/measures/dispersion_adjusted_frequency/griess_dp.svg @@ -1,69 +1,70 @@ - - + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/juillands_u.svg b/doc/measures/dispersion_adjusted_frequency/juillands_u.svg index aa13f02a8..db23e66d1 100644 --- a/doc/measures/dispersion_adjusted_frequency/juillands_u.svg +++ b/doc/measures/dispersion_adjusted_frequency/juillands_u.svg @@ -1,72 +1,72 @@ - - + + - - - - - - - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg b/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg index cb38f2e6a..d60dce178 100644 --- a/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg +++ b/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg @@ -1,40 +1,40 @@ - - + + + + + + + + + + + - - - - - - - + - - - - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg b/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg index 9d417af43..5167a603c 100644 --- a/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg +++ b/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg @@ -1,55 +1,55 @@ - - + + + + - - - - - - - - + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/rosengrens_s.svg b/doc/measures/dispersion_adjusted_frequency/rosengrens_s.svg index 7dad464f9..dc0dc91c0 100644 --- a/doc/measures/dispersion_adjusted_frequency/rosengrens_s.svg +++ b/doc/measures/dispersion_adjusted_frequency/rosengrens_s.svg @@ -1,49 +1,50 @@ - - + + - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/dispersion_adjusted_frequency/zhangs_distributional_consistency.svg b/doc/measures/dispersion_adjusted_frequency/zhangs_distributional_consistency.svg index b51f6e53c..00d252ba3 100644 --- a/doc/measures/dispersion_adjusted_frequency/zhangs_distributional_consistency.svg +++ b/doc/measures/dispersion_adjusted_frequency/zhangs_distributional_consistency.svg @@ -1,47 +1,47 @@ - - + + - + + + + + + - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/elf.svg b/doc/measures/readability/elf.svg new file mode 100644 index 000000000..9f29134e1 --- /dev/null +++ b/doc/measures/readability/elf.svg @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/wl_tests_measures/test_measures_readability.py b/tests/wl_tests_measures/test_measures_readability.py index 20c918584..020298d7b 100644 --- a/tests/wl_tests_measures/test_measures_readability.py +++ b/tests/wl_tests_measures/test_measures_readability.py @@ -254,6 +254,23 @@ def test_devereux_readability_index(): assert grade_placement_eng_0 == 'text_too_short' assert grade_placement_eng_12 == grade_placement_spa_12 == 1.56 * (47 / 12) + 0.19 * (12 / 3) - 6.49 +def test_elf(): + elf_eng_0 = wl_measures_readability.elf(main, test_text_eng_0) + elf_eng_12 = wl_measures_readability.elf(main, test_text_eng_12) + elf_spa_12 = wl_measures_readability.elf(main, test_text_spa_12) + elf_other_12 = wl_measures_readability.elf(main, test_text_other_12) + + print('Easy Listening Formula:') + print(f'\teng/0: {elf_eng_0}') + print(f'\teng/12: {elf_eng_12}') + print(f'\tspa/12: {elf_spa_12}') + print(f'\tother/12: {elf_other_12}') + + assert elf_eng_0 == 'text_too_short' + assert elf_eng_12 == (15 - 12) / 3 + assert elf_spa_12 != 'no_support' + assert elf_other_12 == 'no_support' + def test_gl(): gl_eng_0 = wl_measures_readability.gl(main, test_text_eng_0) gl_eng_12 = wl_measures_readability.gl(main, test_text_eng_12) @@ -572,6 +589,7 @@ def test_wstf(): test_danielson_bryans_readability_formula() test_drp() test_devereux_readability_index() + test_elf() test_gl() test_re_flesch() test_re_simplified() diff --git a/tests/wl_tests_work_area/test_profiler.py b/tests/wl_tests_work_area/test_profiler.py index bd882fb3d..30c3c3a91 100644 --- a/tests/wl_tests_work_area/test_profiler.py +++ b/tests/wl_tests_work_area/test_profiler.py @@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files): count_tokens_lens_syls.append(collections.Counter(len_tokens_syls)) count_tokens_lens_chars.append(collections.Counter(len_tokens_chars)) - assert len(readability_statistics) == 28 + assert len(readability_statistics) == 29 # Counts assert count_paras diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index 7374139c5..377f59d1f 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -291,7 +291,9 @@ def dale_chall_readability_formula(main, text): return x_c50 # Dale-Chall Readability Formula (New) -# Reference: Chall, J. S., & Dale, E. (1995). Readability revisited: The new Dale-Chall readability formula. Brookline Books. +# References: +# Chall, J. S., & Dale, E. (1995). Readability revisited: The new Dale-Chall readability formula. Brookline Books. +# 清川英男. (1996). CHALL, J. S. and DALE, E.(1995) Readability Revisited: The New Dale-Chall Readability Formula. Brookline Books. 教育メディア研究, 3(1), 59. https://www.jstage.jst.go.jp/article/jaems/3/1/3_KJ00009004543/_pdf def dale_chall_readability_formula_new(main, text): if text.lang.startswith('eng_'): text = get_counts(main, text) @@ -365,6 +367,21 @@ def devereux_readability_index(main, text): return grade_placement +# Easy Listening Formula +# Reference: Fang, I. E. (1966). The easy listening formula. Journal of Broadcasting, 11(1), 63–68. https://doi.org/10.1080/08838156609363529 +def elf(main, text): + if text.lang in main.settings_global['syl_tokenizers']: + text = get_counts(main, text) + + if text.count_sentences: + elf = (text.count_syls - text.count_words) / text.count_sentences + else: + elf = 'text_too_short' + else: + elf = 'no_support' + + return elf + # Flesch-Kincaid Grade Level # Reference: Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel. Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf def gl(main, text): diff --git a/wordless/wl_profiler.py b/wordless/wl_profiler.py index f39a9402f..a6281b864 100644 --- a/wordless/wl_profiler.py +++ b/wordless/wl_profiler.py @@ -368,6 +368,7 @@ def __init__(self, parent): _tr('wl_profiler', "Danielson-Bryan's Readability Formula"), _tr('wl_profiler', 'Degrees of Reading Power'), _tr('wl_profiler', 'Devereaux Readability Index'), + _tr('wl_profiler', 'Easy Listening Formula'), _tr('wl_profiler', 'Flesch-Kincaid Grade Level'), _tr('wl_profiler', 'Flesch Reading Ease'), _tr('wl_profiler', 'Flesch Reading Ease (Simplified)'), @@ -1188,6 +1189,7 @@ def run(self): wl_measures_readability.danielson_bryans_readability_formula(self.main, text), wl_measures_readability.drp(self.main, text), wl_measures_readability.devereux_readability_index(self.main, text), + wl_measures_readability.elf(self.main, text), wl_measures_readability.gl(self.main, text), wl_measures_readability.re_flesch(self.main, text), wl_measures_readability.re_simplified(self.main, text),