diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b1da0323..f9824162f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ ## [3.3.0](https://github.com/BLKSerene/Wordless/releases/tag/3.3.0) - ??/??/2023 ### 🎉 New Features +- Settings: Add Settings - Measures - Readability - Flesch Reading Ease - Utils: Add khmer-nltk's Khmer sentence tokenizer, word tokenizer, and part-of-speech tagger - Utils: Add PyThaiNLP's perceptron part-of-speech tagger (Blackboard) - Utils: Add spaCy's Korean sentence recognizer, word tokenizer, part-of-speech tagger, lemmatizer, and dependency parser @@ -35,6 +36,7 @@ ### ❌ Removals - Utils: Remove PyThaiNLP's perceptron part-of-speech tagger (LST20) +- Work Area: Remove Profiler - Fernández Huerta's Readability Score / Szigriszt's Perspicuity Index ### ⏫ Dependency Changes - Dependencies: Add khmer-nltk diff --git a/doc/doc_eng.md b/doc/doc_eng.md index ae6ce9881..e9201160c 100644 --- a/doc/doc_eng.md +++ b/doc/doc_eng.md @@ -930,20 +930,21 @@ Dale-Chall Readability Score: Devereux Readability Index: Grade \; Placement = 1.56 \times \frac{NumCharsAll}{NumWords} + 0.19 \times \frac{NumWords}{NumSentences} - 6.49 -Fernández Huerta's Readability Score: - Score = 206.84 - 60 \times \frac{NumSyls}{NumWords} - 102 \times \frac{NumSentences}{NumWords} - Flesch-Kincaid Grade Level: GL = 0.39 \times \frac{NumWords}{NumSentences} + 11.8 \times \frac{NumSyls}{NumWords} - 15.59 Flesch Reading Ease: \begin{align*} - RE &= 206.835 - 0.846 \times \left(\frac{NumSyls}{NumWords} \times 100\right) - 1.015 \times \frac{NumWords}{NumSentences} \\ - RE_{Dutch} &= 206.84 - 77 \times \frac{NumSyls}{NumWords} - 0.93 \times \frac{NumWords}{NumSentences} \\ - RE_{French} &= 207 - 73.6 \times \frac{NumSyls}{NumWords} - 1.015 \times \frac{NumWords}{NumSentences} \\ - RE_{German} &= 180 - 58.5 \times \frac{NumSyls}{NumWords} - \frac{NumWords}{NumSentences} \\ - RE_{Italian} &= 217 - 60 \times \frac{NumSyls}{NumWords} - 1.3 \times \frac{NumWords}{NumSentences} \\ - RE_{Russian} &= 206.835 - 60.1 \times \frac{NumSyls}{NumWords} - 1.3 \times \frac{NumWords}{NumSentences} + ASW &= \frac{NumSyls}{NumWords} \qquad ASL = \frac{NumWords}{NumSentences} \\ + RE &= 206.835 - 0.846 \times \left(ASW \times 100\right) - 1.015 \times ASL \\ + RE_{Dutch-Douma} &= 206.84 - 77 \times ASW - 0.93 \times ASL \\ + RE_{Dutch-Brouwer} &= 195 - \frac{200}{3} \times ASW - 2 \times ASL \\ + RE_{French} &= 207 - 73.6 \times ASW - 1.015 \times ASL \\ + RE_{German} &= 180 - 58.5 \times ASW - ASL \\ + RE_{Italian} &= 217 - 60 \times ASW - 1.3 \times ASL \\ + RE_{Russian} &= 206.835 - 60.1 \times ASW - 1.3 \times ASL \\ + RE_{Spanish-Fernández \; Huerta} &= 206.84 - 60 \times ASW - 1.02 \times ASL \\ + RE_{Spanish-Szigriszt \; Pazos} &= 207 - 62.3 \times ASW - ASL \end{align*} Flesch Reading Ease (Simplified): @@ -988,15 +989,10 @@ SMOG Grade: Spache Grade Level: {Grade \; Level = 0.141 \times \frac{100}{NumSentences} + 0.086 \times \left(\frac{NumDifficultWords}{100} \times 100\right) + 0.839} -Szigriszt's Perspicuity Index: - p = 207 - 62.3 \times \frac{NumSyls}{NumWords} - \frac{NumWords}{NumSentences} - Wiener Sachtextformel: \begin{align*} - MS &= \frac{NumWordsPolysyllabic}{NumWords} \\ - SL &= \frac{NumWords}{NumSentences} \\ - IW &= \frac{NumLongWords}{NumWords} \\ - ES &= \frac{NumWordsMonosyllabic}{NumWords} \\ + MS &= \frac{NumWordsPolysyllabic}{NumWords} \qquad SL = \frac{NumWords}{NumSentences} \\ + IW &= \frac{NumLongWords}{NumWords} \qquad \qquad \quad ES = \frac{NumWordsMonosyllabic}{NumWords} \\ WSTF_1 &= 0.1925 \times MS + 0.1672 \times SL + 0.1297 \times IW - 0.0327 \times ES - 0.875 \\ WSTF_2 &= 0.2007 \times MS + 0.1682 \times SL + 0.1373 \times IW - 2.779 \\ WSTF_3 &= 0.2963 \times MS + 0.1905 \times SL - 1.1144 \\ @@ -1011,15 +1007,14 @@ Measure of Readability|Formula Coleman-Liau Index
([Coleman & Liau, 1975](#ref-coleman-liau-1975))|![Formula](/doc/measures/readability/coleman_liau_index.svg) Dale-Chall Readability Score
([Dale & Chall, 1948a](#ref-dale-chall-1948a))|![Formula](/doc/measures/readability/xc50.svg)
where **NumDifficultWords** is the number of words outside the Dale list of 3000 easy words ([Dale & Chall, 1948b](#ref-dale-chall-1948b)).

* This test applies only to **English texts**. Devereux Readability Index
([Smith, 1961](#ref-smith-1961))|![Formula](/doc/measures/readability/devereux_readability_index.svg) -Fernández Huerta's Readability Score1
([Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Law, 2011](#ref-law-2011))|![Formula](/doc/measures/readability/fernandez_huertas_readability_score.svg)

* This test applies only to **Spanish texts**. Flesch-Kincaid Grade Level1
([Kincaid et al., 1975](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/flesch_kincaid_grade_level.svg) -Flesch Reading Ease1
(English: [Flesch, 1948](#ref-flesch-1948); Dutch: [Douma, 1960](#ref-douma-1960); French: [Kandel & Moles, 1958](#ref-kandel-moles-1958); German: [Amstad, 1978](#ref-amstad-1978); Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986); Russian: [Oborneva, 2006](#ref-oborneva-2006))|![Formula](/doc/measures/readability/re.svg) +Flesch Reading Ease1
([Flesch, 1948](#ref-flesch-1948)
Dutch: [Douma, 1960](#ref-douma-1960); [Brouwer, 1963](#ref-brouwer-1963)
French: [Kandel & Moles, 1958](#ref-kandel-moles-1958)
German: [Amstad, 1978](#ref-amstad-1978)
Italian: [Franchina & Vacca, 1986](#ref-franchina-vacca-1986)
Russian: [Oborneva, 2006](#ref-oborneva-2006)
Spanish: [Fernández Huerta, 1959](#ref-fernandez-huerta-1959); [Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/re.svg)

* This test has multiple variants for some languages, which you could select via **Menu → Preferences → Settings → Measures → Readability → Flesch Reading Ease**. Flesch Reading Ease (Simplified)1
([Farr et al., 1951](#ref-farr-et-al-1951))|![Formula](/doc/measures/readability/re_simplified.svg) -FORCAST Grade Level1
([Caylor et al., 1973](#ref-caylor-et-al-1973))|![Formula](/doc/measures/readability/rgl.svg)
* A sample of 150 words is taken randomly from the text, thus the text should be **at least 150 words long**. +FORCAST Grade Level1
([Caylor et al., 1973](#ref-caylor-et-al-1973))|![Formula](/doc/measures/readability/rgl.svg)

* A sample of 150 words is taken randomly from the text, thus the text should be **at least 150 words long**. Fórmula de comprensibilidad de Gutiérrez de Polini
([Gutiérrez de Polini, 1972](#ref-gutierrez-de-polini-1972))|![Formula](/doc/measures/readability/cp.svg)

* This test applies only to **Spanish texts**. Fórmula de Crawford1
([Crawford, 1985](#ref-crawford-1985))|![Formula](/doc/measures/readability/formula_de_crawford.svg)

* This test applies only to **Spanish texts**. Gulpease Index
([Lucisano & Emanuela Piemontese, 1988](#ref-lucisano-emanuela-piemontese-1988))|![Formula](/doc/measures/readability/gulpease_index.svg)

* This test applies only to **Italian texts**. -Gunning Fog Index1
(English: [Gunning, 1968](#ref-gunning-1968); Polish: [Pisarek, 1969](#ref-pisarek-1969))|![Formula](/doc/measures/readability/fog_index.svg)
where **NumHardWords** is the number of words with 3 or more syllables excluding all proper nouns and words with 3 syllables ending with *-ed* or *-es* for **English texts**, and the number of words with 4 or more syllables for **Polish texts**.

* This test applies only to **English texts** and **Polish texts**. +Gunning Fog Index1
(English: [Gunning, 1968](#ref-gunning-1968)
Polish: [Pisarek, 1969](#ref-pisarek-1969))|![Formula](/doc/measures/readability/fog_index.svg)
where **NumHardWords** is the number of words with 3 or more syllables excluding all proper nouns and words with 3 syllables ending with *-ed* or *-es* for **English texts**, and the number of words with 4 or more syllables for **Polish texts**.

* This test applies only to **English texts** and **Polish texts**. Legibilidad µ
([Muñoz Baquedano, 2006](#ref-munoz-baquedano-2006))|![Formula](/doc/measures/readability/mu.svg)
where **LenWordsAvg** is the average word length in letters, and **LenWordsVar** is the variance of word lengths in letters.

* This test applies only to **Spanish texts**.
* The text should be **at least 2 words long**. Lensear Write1
([O’Hayre, 1966](#ref-o-hayre-1966))|![Formula](/doc/measures/readability/lensear_write.svg)
where **NumWordsMonosyllabic** is the number of monosyllabic words excluding *the*, *is*, *are*, *was*, *were*, and **NumSentences** is the number of sentences to the nearest period.

* This test applies only to **English texts**.
* A sample of 100 words is taken randomly from the text.
If the text is **shorter than 100 words**, **NumWordsMonosyllabic** and **NumSentences** need to be multiplied by 100 and then divided by the number of text. Lix
([Björnsson, 1968](#ref-bjornsson-1968))|![Formula](/doc/measures/readability/lix.svg)
where **NumLongWords** is the number of words with 7 or more letters. @@ -1028,7 +1023,6 @@ Measure of Readability|Formula Rix
([Anderson, 1983](#ref-anderson-1983))|![Formula](/doc/measures/readability/rix.svg)
where **NumLongWords** is the number of words with 7 or more letters. SMOG Grade1
([McLaughlin, 1969](#ref-mclaughlin-1969))|![Formula](/doc/measures/readability/smog_grade.svg)
where **NumWordsPolysyllabic** is the number of words with 3 or more syllables.

* A sample consisting of the first 10 sentences of the text, the last 10 sentences of the text, and 10 sentences at the middle of the text is taken from the text, thus the text should be **at least 30 sentences long**. Spache Grade Level
([Spache, 1953](#ref-spache-1953))|![Formula](/doc/measures/readability/spache_grade_level.svg)
where **NumDifficultWords** is the number of words outside the Dale list of 769 easy words ([Dale, 1931](#ref-dale-1931)).

* Three samples each of 100 words are taken randomly from the text and the mean of the three scores is calculated, thus the text should be **at least 100 words long**. -Szigriszt's Perspicuity Index1
([Szigriszt Pazos, 1993](#ref-szigrisze-pazos-1993))|![Formula](/doc/measures/readability/szigriszts_perspicuity_index.svg)

* This test applies only to **Spanish texts**. Wiener Sachtextformel1
([Bamberger & Vanecek, 1984](#ref-bamberger-vanecek-1984))|![Formula](/doc/measures/readability/wstf.svg)
where **NumWordsPolysyllabic** is the number of words with 3 or more syllables and **NumLongWords** is the numbers of words with 7 or more letters.

* This test applies only to **German texts**.
* This test has 4 variants, which you could select via **Menu → Preferences → Settings → Measures → Readability → Wiener Sachtextformel → Variant**. **Notes:** @@ -1358,7 +1352,6 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille ## [5 References](#doc) - [1] [**^**](#ref-aari) Al-Tamimi, A., Jaradat M., Aljarrah, N., & Ghanim, S. (2013). AARI: Automatic Arabic readability index. *The International Arab Journal of Information Technology*, *11*(4), pp. 370–378.
@@ -1371,83 +1364,82 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille [5] [**^**](#ref-z-score-berry-rogghes) Berry-Rogghe, G. L. M. (1973). The computation of collocations and their relevance in lexical studies. In A. J. Aiken, R. W. Bailey, & N. Hamilton-Smith (Eds.), *The computer and literary studies* (pp. 103–112). Edinburgh University Press.
[6] [**^**](#ref-lix) Björnsson, C.-H. (1968). *Läsbarhet*. Liber.
+ +[7] [**^**](#ref-re) Brouwer, R. H. M. (1963). Onderzoek naar de leesmoeilijkheid van Nederlands proza. Paedagogische studiën, 40, 454–464. https://objects.library.uu.nl/reader/index.php?obj=1874-205260&lan=en -[7] [**^**](#ref-carrolls-d2)[**^**](#ref-carrolls-um) Carroll, J. B. (1970). An alternative to Juilland’s usage coefficient for lexical frequencies and a proposal for a standard frequency index. *Computer Studies in the Humanities and Verbal Behaviour*, *3*(2), 61–65. https://doi.org/10.1002/j.2333-8504.1970.tb00778.x
+[8] [**^**](#ref-carrolls-d2)[**^**](#ref-carrolls-um) Carroll, J. B. (1970). An alternative to Juilland’s usage coefficient for lexical frequencies and a proposal for a standard frequency index. *Computer Studies in the Humanities and Verbal Behaviour*, *3*(2), 61–65. https://doi.org/10.1002/j.2333-8504.1970.tb00778.x
-[8] [**^**](#ref-rgl) Caylor, J. S., Sticht, T. G., Fox, L. C., & Ford, J. P. (1973). *Methodologies for determining reading requirements of military occupational specialties*. Human Resource Research Organization. https://files.eric.ed.gov/fulltext/ED074343.pdf
+[9] [**^**](#ref-rgl) Caylor, J. S., Sticht, T. G., Fox, L. C., & Ford, J. P. (1973). *Methodologies for determining reading requirements of military occupational specialties*. Human Resource Research Organization. https://files.eric.ed.gov/fulltext/ED074343.pdf
-[9] [**^**](#ref-squared-phi-coeff) Church, K. W., & Gale, W. A. (1991, September 29–October 1). Concordances for parallel text [Paper presentation]. Using Corpora: Seventh Annual Conference of the UW Centre for the New OED and Text Research, St. Catherine's College, Oxford, United Kingdom.
+[10] [**^**](#ref-squared-phi-coeff) Church, K. W., & Gale, W. A. (1991, September 29–October 1). Concordances for parallel text [Paper presentation]. Using Corpora: Seventh Annual Conference of the UW Centre for the New OED and Text Research, St. Catherine's College, Oxford, United Kingdom.
-[10] [**^**](#ref-students-t-test-1-sample) Church, K., Gale, W., Hanks, P., & Hindle, D. (1991). Using statistics in lexical analysis. In U. Zernik (Ed.), *Lexical acquisition: Exploiting on-line resources to build a lexicon* (pp. 115–164). Psychology Press.
+[11] [**^**](#ref-students-t-test-1-sample) Church, K., Gale, W., Hanks, P., & Hindle, D. (1991). Using statistics in lexical analysis. In U. Zernik (Ed.), *Lexical acquisition: Exploiting on-line resources to build a lexicon* (pp. 115–164). Psychology Press.
-[11] [**^**](#ref-pmi) Church, K. W., & Hanks, P. (1990). Word association norms, mutual information, and lexicography. *Computational Linguistics*, *16*(1), 22–29.
+[12] [**^**](#ref-pmi) Church, K. W., & Hanks, P. (1990). Word association norms, mutual information, and lexicography. *Computational Linguistics*, *16*(1), 22–29.
-[12] [**^**](#ref-coleman-liau-index) Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. *Journal of Applied Psychology*, *60*(2), 283–284. https://doi.org/10.1037/h0076540
+[13] [**^**](#ref-coleman-liau-index) Coleman, M., & Liau, T. L. (1975). A computer readability formula designed for machine scoring. *Journal of Applied Psychology*, *60*(2), 283–284. https://doi.org/10.1037/h0076540
-[13] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
+[14] [**^**](#ref-formula-de-crawford) Crawford, A. N. (1985). Fórmula y gráfico para determinar la comprensibilidad de textos de nivel primario en castellano. *Lectura y Vida*, *6*(4). http://www.lecturayvida.fahce.unlp.edu.ar/numeros/a6n4/06_04_Crawford.pdf
-[14] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
+[15] [**^**](#ref-im3) Daille, B. (1994). *Approche mixte pour l'extraction automatique de terminologie: statistiques lexicales et filtres linguistiques* [Doctoral thesis, Paris Diderot University]. Béatrice Daille. http://www.bdaille.com/index.php?option=com_docman&task=doc_download&gid=8&Itemid=
-[15] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
+[16] [**^**](#ref-im3) Daille, B. (1995). Combined approach for terminology extraction: Lexical statistics and linguistic filtering. *UCREL technical papers* (Vol. 5). Lancaster University.
-[16] [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
+[17] [**^**](#ref-spache-grade-level) Dale, E. (1931). A comparison of two word lists. *Educational Research Bulletin*, *10*(18), 484–489.
-[17] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
+[18] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948a). A formula for predicting readability. *Educational Research Bulletin*, *27*(1), 11–20, 28.
-[18] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
+[19] [**^**](#ref-dale-chall-readability-score) Dale, E., & Chall, J. S. (1948b). A formula for predicting readability: Instructions. *Educational Research Bulletin*, *27*(2), 37–54.
-[19] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
+[20] [**^**](#ref-z-score) Dennis, S. F. (1964). The construction of a thesaurus automatically from a sample of text. In M. E. Stevens, V. E. Giuliano, & L. B. Heilprin (Eds.), *Proceedings of the symposium on statistical association methods for mechanized documentation* (pp. 61–148). National Bureau of Standards.
-[20] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
+[21] [**^**](#ref-me) Dias, G., Guilloré, S., & Pereira Lopes, J. G. (1999). Language independent automatic acquisition of rigid multiword units from unrestricted text corpora. In A. Condamines, C. Fabre, & M. Péry-Woodley (Eds.), *TALN'99: 6ème Conférence Annuelle Sur le Traitement Automatique des Langues Naturelles* (pp. 333–339). TALN.
-[21] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323 +[22] [**^**](#ref-re) Douma, W. H. (1960). *De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules* [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323 -[22] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
+[23] [**^**](#ref-log-likehood-ratio-test) Dunning, T. E. (1993). Accurate methods for the statistics of surprise and coincidence. *Computational Linguistics*, *19*(1), 61–74.
-[23] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
+[24] [**^**](#ref-jaccard-index)[**^**](#ref-mi) Dunning, T. E. (1998). *Finding structure in text, genome and other symbolic sequences* [Doctoral dissertation, University of Sheffield]. arXiv. arxiv.org/pdf/1207.1847.pdf
-[24] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
+[25] [**^**](#ref-osman) El-Haj, M., & Rayson, P. (2016). OSMAN: A novel Arabic readability metric. In N. Calzolari, K. Choukri, T. Declerck, S. Goggi, M. Grobelnik, B. Maegaard, J. Mariani, H. Mazo, A. Moreno, J. Odijk, & S. Piperidis (Eds.), *Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)* (pp. 250–255). European Language Resources Association. http://www.lrec-conf.org/proceedings/lrec2016/index.html
-[25] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
+[26] [**^**](#ref-engwalls-fm) Engwall, G. (1974). *Fréquence et distribution du vocabulaire dans un choix de romans français* [Unpublished doctoral dissertation]. Stockholm University.
-[26] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
+[27] [**^**](#ref-re-simplified) Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. *Journal of Applied Psychology*, *35*(5), 333–337. https://doi.org/10.1037/h0062427
-[27] [**^**](#ref-fernandez-huertas-readability-score) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
+[28] [**^**](#ref-re) Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. *Consigna*, *214*, 29–32.
-[28] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
+[29] [**^**](#ref-re) Flesch, R. (1948). A new readability yardstick. *Journal of Applied Psychology*, *32*(3), 221–233. https://doi.org/10.1037/h0057532
-[29] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
+[30] [**^**](#ref-re) Franchina, V., & Vacca, R. (1986). Adaptation of Flesh readability index on a bilingual text written by the same author both in Italian and English languages. *Linguaggi*, *3*, 47–49.
-[30] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
+[31] [**^**](#ref-diff-coeff) Gabrielatos, C. (2018). Keyness analysis: Nature, metrics and techniques. In C. Taylor & A. Marchi (Eds.), *Corpus approaches to discourse: A critical review* (pp. 225–258). Routledge.
-[31] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
+[32] [**^**](#ref-pct-diff) Gabrielatos, C., & Marchi, A. (2012, September 13–14). *Keyness: Appropriate metrics and practical issues* [Conference session]. CADS International Conference 2012, University of Bologna, Italy.
-[32] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
+[33] [**^**](#ref-griess-dp) Gries, S. T. (2008). Dispersions and adjusted frequencies in corpora. *International Journal of Corpus Linguistics*, *13*(4), 403–437. https://doi.org/10.1075/ijcl.13.4.02gri
-[33] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
+[34] [**^**](#ref-fog-index) Gunning, R. (1968). *The technique of clear writing* (revised ed.). McGraw-Hill Book Company.
-[34] [**^**](#ref-formula-de-comprensibilidad-de-gutierrez-de-polini) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
+[35] [**^**](#ref-formula-de-comprensibilidad-de-gutierrez-de-polini) Gutiérrez de Polini, L. E. (1972). *Investigación sobre lectura en Venezuela* [Paper presentation]. Primeras Jornadas de Educación Primaria, Ministerio de Educación, Caracas, Venezuela.
-[35] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
+[36] [**^**](#ref-log-ratio) Hardie, A. (2014, April 28). *Log ratio: An informal introduction*. ESRC Centre for Corpus Approaches to Social Science (CASS). http://cass.lancs.ac.uk/log-ratio-an-informal-introduction/.
-[36] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
+[37] [**^**](#ref-pearsons-chi-squared-test)[**^**](#ref-diff-coeff) Hofland, K., & Johanson, S. (1982). *Word frequencies in British and American English*. Norwegian Computing Centre for the Humanities.
-[37] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
+[38] [**^**](#ref-juillands-d)[**^**](#ref-juillands-u) Juilland, A., & Chang-Rodriguez, E. (1964). *Frequency dictionary of Spanish words*. Mouton.
-[38] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
- +[39] [**^**](#ref-re) Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. *The Journal of Educational Research*, *21*, 283–287.
-[39] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
+[40] [**^**](#ref-mann-whiteney-u-test) Kilgarriff, A. (2001). Comparing corpora. *International Journal of Corpus Linguistics*, *6*(1), 232–263. https://doi.org/10.1075/ijcl.6.1.05kil
-[40] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
+[41] [**^**](#ref-kilgarriffs-ratio) Kilgarriff, A. (2009). Simple maths for keywords. In M. Mahlberg, V. González-Díaz, & C. Smith (Eds.), *Proceedings of the Corpus Linguistics Conference 2009* (p. 171). University of Liverpool.
-[41] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
+[42] [**^**](#ref-mi-log-f) Kilgarriff, A., & Tugwell, D. (2002). WASP-bench: An MT lexicographers' workstation supporting state-of-the-art lexical disambiguation. In *Proceedings of the 8th Machine Translation Summit* (pp. 187–190). European Association for Machine Translation.
-[42] [**^**](#ref-flesch-kincaid-grade-level) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
+[43] [**^**](#ref-flesch-kincaid-grade-level) Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). *Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel*. Naval Air Station Memphis.
-[43] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
- -[44] [**^**](#ref-fernandez-huertas-readability-score) Law, Gwillim. (2011, May 27). *Error in the Fernandez Huerta readability formula*. LINGUIST List. https://linguistlist.org/issues/22/22-2332/
+[44] [**^**](#ref-kromers-ur) Kromer, V. (2003). A usage measure based on psychophysical relations. *Journal of Quantitative Linguistics*, *10*(2), 177–186. https://doi.org/10.1076/jqul.10.2.177.16718
[45] [**^**](#ref-mi-log-f) Lexical Computing. (2015, July 8). *Statistics used in Sketch Engine*. Sketch Engine. https://www.sketchengine.eu/documentation/statistics-used-in-sketch-engine/
@@ -1495,7 +1487,7 @@ Cubic Association Ratio
([Daille, 1994](#ref-daille-1994), [1995](#ref-daille [67] [**^**](#ref-spache-grade-level) Spache, G. (1953). A new readability formula for primary-grade reading materials. *Elementary School Journal*, *53*(7), 410–413. https://doi.org/10.1086/458513
-[68] [**^**](#ref-szigriszts-perspicuity_index) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
+[68] [**^**](#ref-re) Szigriszt Pazos, F. (1993). *Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad* [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y
[69] [**^**](#ref-lfmd)[**^**](#ref-md) Thanopoulos, A., Fakotakis, N., & Kokkinakis, G. (2002). Comparative evaluation of collocation extraction metrics. In M. G. González & C. P. S. Araujo (Eds.), *Proceedings of the Third International Conference on Language Resources and Evaluation* (pp. 620–625). European Language Resources Association.
diff --git a/doc/measures/readability/fernandez_huertas_readability_score.svg b/doc/measures/readability/fernandez_huertas_readability_score.svg deleted file mode 100644 index 8c177dbf2..000000000 --- a/doc/measures/readability/fernandez_huertas_readability_score.svg +++ /dev/null @@ -1,91 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/doc/measures/readability/re.svg b/doc/measures/readability/re.svg index 037e3b7e8..dce0c2fbf 100644 --- a/doc/measures/readability/re.svg +++ b/doc/measures/readability/re.svg @@ -1,26 +1,12 @@ - + - - - - - - - - - - - - - - - - - + + + @@ -37,8 +23,8 @@ - - + + @@ -50,379 +36,394 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/doc/measures/readability/wstf.svg b/doc/measures/readability/wstf.svg index 491c0e675..43a0b56f1 100644 --- a/doc/measures/readability/wstf.svg +++ b/doc/measures/readability/wstf.svg @@ -1,6 +1,6 @@ - + @@ -81,240 +81,240 @@ - - + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/wl_tests_measures/test_measures_readability.py b/tests/wl_tests_measures/test_measures_readability.py index d192332a7..4a7e5f3f4 100644 --- a/tests/wl_tests_measures/test_measures_readability.py +++ b/tests/wl_tests_measures/test_measures_readability.py @@ -45,22 +45,36 @@ def __init__(self, tokens_multilevel, lang = 'eng_us'): test_text_eng_120 = Wl_Test_Text(TOKENS_MULTILEVEL_120) test_text_eng_150 = Wl_Test_Text(TOKENS_MULTILEVEL_150) +test_text_ara_0 = Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'ara') +test_text_ara_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'ara') + +test_text_deu_0 = Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'deu_de') +test_text_deu_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'deu_de') + +test_text_ita_0 = Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'ita') +test_text_ita_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'ita') + test_text_spa_0 = Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'spa') test_text_spa_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'spa') test_text_spa_120 = Wl_Test_Text(TOKENS_MULTILEVEL_120, lang = 'spa') test_text_spa_150 = Wl_Test_Text(TOKENS_MULTILEVEL_150, lang = 'spa') +test_text_afr_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'afr') +test_text_nld_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'nld') +test_text_fra_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'fra') +test_text_pol_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'pol') +test_text_rus_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'rus') test_text_other_12 = Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'other') def test_automated_ara_readability_index(): - aari_ara_0 = wl_measures_readability.automated_ara_readability_index(main, Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'ara')) - aari_ara_12 = wl_measures_readability.automated_ara_readability_index(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'ara')) + aari_ara_0 = wl_measures_readability.automated_ara_readability_index(main, test_text_ara_0) + aari_ara_12 = wl_measures_readability.automated_ara_readability_index(main, test_text_ara_12) aari_eng_12 = wl_measures_readability.automated_ara_readability_index(main, test_text_eng_12) print('Automated Arabic Readability Index:') - print(f'\t{aari_ara_0} (ara/0)') - print(f'\t{aari_ara_12} (ara/12)') - print(f'\t{aari_eng_12} (eng/12)') + print(f'\tara/0: {aari_ara_0}') + print(f'\tara/12: {aari_ara_12}') + print(f'\teng/12: {aari_eng_12}') assert aari_ara_0 == 'text_too_short' assert aari_ara_12 == 3.28 * 46 + 1.43 * (46 / 12) + 1.24 * (12 / 3) @@ -72,9 +86,9 @@ def test_automated_readability_index(): ari_spa_12 = wl_measures_readability.automated_readability_index(main, test_text_spa_12) print('Automated Readability Index:') - print(f'\t{ari_eng_0} (0)') - print(f'\t{ari_eng_12} (eng/12)') - print(f'\t{ari_spa_12} (spa/12)') + print(f'\teng/0: {ari_eng_0}') + print(f'\teng/12: {ari_eng_12}') + print(f'\tspa/12: {ari_spa_12}') assert ari_eng_0 == 'text_too_short' assert ari_eng_12 == ari_spa_12 == 0.5 * (12 / 3) + 4.71 * (47 / 12) - 21.43 @@ -85,9 +99,9 @@ def test_coleman_liau_index(): grade_level_spa_12 = wl_measures_readability.coleman_liau_index(main, test_text_spa_12) print('Coleman-Liau Index:') - print(f'\t{grade_level_eng_0} (0)') - print(f'\t{grade_level_eng_12} (eng/12)') - print(f'\t{grade_level_spa_12} (spa/12)') + print(f'\teng/0: {grade_level_eng_0}') + print(f'\teng/12: {grade_level_eng_12}') + print(f'\tspa/12: {grade_level_spa_12}') est_cloze_pct = 141.8401 - 0.21459 * (45 / 12 * 100) + 1.079812 * (3 / 12 * 100) @@ -100,9 +114,9 @@ def test_dale_chall_readability_score(): x_c50_spa_12 = wl_measures_readability.dale_chall_readability_score(main, test_text_spa_12) print('Dale-Chall Readibility Score:') - print(f'\t{x_c50_eng_0} (0)') - print(f'\t{x_c50_eng_12} (eng/12)') - print(f'\t{x_c50_spa_12} (spa/12)') + print(f'\teng/0: {x_c50_eng_0}') + print(f'\teng/12: {x_c50_eng_12}') + print(f'\tspa/12: {x_c50_spa_12}') assert x_c50_eng_0 == 'text_too_short' assert x_c50_eng_12 == 0.1579 * (1 / 12) + 0.0496 * (12 / 3) + 3.6365 @@ -114,27 +128,13 @@ def test_devereux_readability_index(): grade_placement_spa_12 = wl_measures_readability.devereux_readability_index(main, test_text_spa_12) print('Devereux Readability Index:') - print(f'\t{grade_placement_eng_0} (0)') - print(f'\t{grade_placement_eng_12} (eng/12)') - print(f'\t{grade_placement_spa_12} (spa/12)') + print(f'\teng/0: {grade_placement_eng_0}') + print(f'\teng/12: {grade_placement_eng_12}') + print(f'\tspa/12: {grade_placement_spa_12}') assert grade_placement_eng_0 == 'text_too_short' assert grade_placement_eng_12 == grade_placement_spa_12 == 1.56 * (47 / 12) + 0.19 * (12 / 3) - 6.49 -def test_fernandez_huertas_readability_score(): - score_spa_0 = wl_measures_readability.fernandez_huertas_readability_score(main, test_text_spa_0) - score_spa_12 = wl_measures_readability.fernandez_huertas_readability_score(main, test_text_spa_12) - score_eng_12 = wl_measures_readability.fernandez_huertas_readability_score(main, test_text_eng_12) - - print("Fernández Huerta's Readability Score:") - print(f'\t{score_spa_0} (spa/0)') - print(f'\t{score_spa_12} (spa/12)') - print(f'\t{score_eng_12} (eng/12)') - - assert score_spa_0 == 'text_too_short' - assert score_spa_12 == 206.84 - 60 * (18 / 12) - 102 * (3 / 12) - assert score_eng_12 == 'no_support' - def test_flesch_kincaid_grade_level(): gl_eng_0 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_eng_0) gl_eng_12 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_eng_12) @@ -142,10 +142,10 @@ def test_flesch_kincaid_grade_level(): gl_other_12 = wl_measures_readability.flesch_kincaid_grade_level(main, test_text_other_12) print('Flesch-Kincaid Grade Level:') - print(f'\t{gl_eng_0} (0)') - print(f'\t{gl_eng_12} (eng/12)') - print(f'\t{gl_spa_12} (spa/12)') - print(f'\t{gl_other_12} (other/12)') + print(f'\teng/0: {gl_eng_0}') + print(f'\teng/12: {gl_eng_12}') + print(f'\tspa/12: {gl_spa_12}') + print(f'\tother/12: {gl_other_12}') assert gl_eng_0 == 'text_too_short' assert gl_eng_12 == 0.39 * (12 / 3) + 11.8 * (15 / 12) - 15.59 @@ -155,18 +155,50 @@ def test_flesch_kincaid_grade_level(): def test_flesch_reading_ease(): flesch_re_eng_0 = wl_measures_readability.flesch_reading_ease(main, test_text_eng_0) flesch_re_eng_12 = wl_measures_readability.flesch_reading_ease(main, test_text_eng_12) - flesch_re_spa_12 = wl_measures_readability.flesch_reading_ease(main, test_text_spa_12) + + main.settings_custom['measures']['readability']['re']['variant_nld'] = 'Douma' + flesch_re_nld_12_douma = wl_measures_readability.flesch_reading_ease(main, test_text_nld_12) + main.settings_custom['measures']['readability']['re']['variant_nld'] = "Brouwer's Leesindex A" + flesch_re_nld_12_brouwer = wl_measures_readability.flesch_reading_ease(main, test_text_nld_12) + + flesch_re_fra_12 = wl_measures_readability.flesch_reading_ease(main, test_text_fra_12) + flesch_re_deu_12 = wl_measures_readability.flesch_reading_ease(main, test_text_deu_12) + flesch_re_ita_12 = wl_measures_readability.flesch_reading_ease(main, test_text_ita_12) + flesch_re_rus_12 = wl_measures_readability.flesch_reading_ease(main, test_text_rus_12) + + main.settings_custom['measures']['readability']['re']['variant_spa'] = 'Fernández Huerta' + flesch_re_spa_12_fh = wl_measures_readability.flesch_reading_ease(main, test_text_spa_12) + main.settings_custom['measures']['readability']['re']['variant_spa'] = 'Szigriszt Pazos' + flesch_re_spa_12_sp = wl_measures_readability.flesch_reading_ease(main, test_text_spa_12) + + flesch_re_afr_12 = wl_measures_readability.flesch_reading_ease(main, test_text_afr_12) flesch_re_other_12 = wl_measures_readability.flesch_reading_ease(main, test_text_other_12) print('Flesch Reading Ease:') - print(f'\t{flesch_re_eng_0} (0)') - print(f'\t{flesch_re_eng_12} (eng/12)') - print(f'\t{flesch_re_spa_12} (spa/12)') - print(f'\t{flesch_re_other_12} (other/12)') + print(f'\teng/0: {flesch_re_eng_0}') + print(f'\teng/12: {flesch_re_eng_12}') + print(f'\tnld-douma/12: {flesch_re_nld_12_douma}') + print(f'\tnld-brouwer/12: {flesch_re_nld_12_brouwer}') + print(f'\tfra/12: {flesch_re_fra_12}') + print(f'\tdeu/12: {flesch_re_deu_12}') + print(f'\tita/12: {flesch_re_ita_12}') + print(f'\trus/12: {flesch_re_rus_12}') + print(f'\tspa-fh/12: {flesch_re_spa_12_fh}') + print(f'\tspa-sp/12: {flesch_re_spa_12_sp}') + print(f'\tafr/12: {flesch_re_afr_12}') + print(f'\tother/12: {flesch_re_other_12}') assert flesch_re_eng_0 == 'text_too_short' assert flesch_re_eng_12 == 206.835 - 0.846 * (15 / 12 * 100) - 1.015 * (12 / 3) - assert flesch_re_spa_12 != 'no_support' + assert flesch_re_nld_12_douma == 206.84 - 77 * (18 / 12) - 0.93 * (12 / 3) + assert flesch_re_nld_12_brouwer == 195 - (200 / 3) * (18 / 12) - 2 * (12 / 3) + assert flesch_re_fra_12 == 207 - 73.6 * (16 / 12) - 1.015 * (12 / 3) + assert flesch_re_deu_12 == 180 - 58.5 * (15 / 12) - (12 / 3) + assert flesch_re_ita_12 == 217 - 60 * (19 / 12) - 1.3 * (12 / 3) + assert flesch_re_rus_12 == 206.835 - 60.1 * (13 / 12) - 1.3 * (12 / 3) + assert flesch_re_spa_12_fh == 206.84 - 60 * (18 / 12) - 1.02 * (12 / 3) + assert flesch_re_spa_12_sp == 206.84 - 62.3 * (18 / 12) - (12 / 3) + assert flesch_re_afr_12 == 206.835 - 0.846 * (18 / 12 * 100) - 1.015 * (12 / 3) assert flesch_re_other_12 == 'no_support' def test_flesch_reading_ease_simplified(): @@ -176,10 +208,10 @@ def test_flesch_reading_ease_simplified(): flesch_re_simplified_other_12 = wl_measures_readability.flesch_reading_ease_simplified(main, test_text_other_12) print('Flesch Reading Ease (Simplified):') - print(f'\t{flesch_re_simplified_eng_0} (0)') - print(f'\t{flesch_re_simplified_eng_12} (eng/12)') - print(f'\t{flesch_re_simplified_spa_12} (spa/12)') - print(f'\t{flesch_re_simplified_other_12} (other/12)') + print(f'\teng/0: {flesch_re_simplified_eng_0}') + print(f'\teng/12: {flesch_re_simplified_eng_12}') + print(f'\tspa/12: {flesch_re_simplified_spa_12}') + print(f'\tother/12: {flesch_re_simplified_other_12}') assert flesch_re_simplified_eng_0 == 'text_too_short' assert flesch_re_simplified_eng_12 == flesch_re_simplified_spa_12 == 1.599 * (9 / 12 * 100) - 1.015 * (12 / 3) - 31.517 @@ -192,10 +224,10 @@ def test_forcast_grade_level(): rgl_other_12 = wl_measures_readability.forcast_grade_level(main, test_text_other_12) print('FORCAST Grade Level:') - print(f'\t{rgl_eng_12} (eng/12)') - print(f'\t{rgl_eng_150} (eng/150)') - print(f'\t{rgl_spa_150} (spa/150)') - print(f'\t{rgl_other_12} (other/12)') + print(f'\teng/12: {rgl_eng_12}') + print(f'\teng/150: {rgl_eng_150}') + print(f'\tspa/150: {rgl_spa_150}') + print(f'\tother/12: {rgl_other_12}') assert rgl_eng_12 == 'text_too_short' assert rgl_eng_150 == rgl_spa_150 == 20.43 - 0.11 * (6 * 18 + 4) @@ -207,9 +239,9 @@ def test_formula_de_comprensibilidad_de_gutierrez_de_polini(): cp_eng_12 = wl_measures_readability.formula_de_comprensibilidad_de_gutierrez_de_polini(main, test_text_eng_12) print('Fórmula de comprensibilidad de Gutiérrez de Polini:') - print(f'\t{cp_spa_0} (spa/0)') - print(f'\t{cp_spa_12} (spa/12)') - print(f'\t{cp_eng_12} (eng/12)') + print(f'\tspa/0: {cp_spa_0}') + print(f'\tspa/12: {cp_spa_12}') + print(f'\teng/12: {cp_eng_12}') assert cp_spa_0 == 'text_too_short' assert cp_spa_12 == 95.2 - 9.7 * (45 / 12) - 0.35 * (12 / 3) @@ -221,23 +253,23 @@ def test_formula_de_crawford(): grade_level_eng_12 = wl_measures_readability.formula_de_crawford(main, test_text_eng_12) print('Fórmula de Crawford:') - print(f'\t{grade_level_spa_0} (spa/0)') - print(f'\t{grade_level_spa_12} (spa/12)') - print(f'\t{grade_level_eng_12} (eng/12)') + print(f'\tspa/0: {grade_level_spa_0}') + print(f'\tspa/12: {grade_level_spa_12}') + print(f'\teng/12: {grade_level_eng_12}') assert grade_level_spa_0 == 'text_too_short' assert grade_level_spa_12 == 3 / 12 * 100 * (-0.205) + 18 / 12 * 100 * 0.049 - 3.407 assert grade_level_eng_12 == 'no_support' def test_gulpease_index(): - gulpease_index_ita_0 = wl_measures_readability.gulpease_index(main, Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'ita')) - gulpease_index_ita_12 = wl_measures_readability.gulpease_index(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'ita')) + gulpease_index_ita_0 = wl_measures_readability.gulpease_index(main, test_text_ita_0) + gulpease_index_ita_12 = wl_measures_readability.gulpease_index(main, test_text_ita_12) gulpease_index_eng_12 = wl_measures_readability.gulpease_index(main, test_text_eng_12) print('Gulpease Index:') - print(f'\t{gulpease_index_ita_0} (ita/0)') - print(f'\t{gulpease_index_ita_12} (ita/12)') - print(f'\t{gulpease_index_eng_12} (eng/12)') + print(f'\tita/0: {gulpease_index_ita_0}') + print(f'\tita/12: {gulpease_index_ita_12}') + print(f'\teng/12: {gulpease_index_eng_12}') assert gulpease_index_ita_0 == 'text_too_short' assert gulpease_index_ita_12 == 89 + (300 * 3 - 10 * 45) / 12 @@ -246,14 +278,14 @@ def test_gulpease_index(): def test_gunning_fog_index(): fog_index_eng_0 = wl_measures_readability.gunning_fog_index(main, test_text_eng_0) fog_index_eng_12_propn = wl_measures_readability.gunning_fog_index(main, test_text_eng_12_propn) - fog_index_pol_12 = wl_measures_readability.gunning_fog_index(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'pol')) + fog_index_pol_12 = wl_measures_readability.gunning_fog_index(main, test_text_pol_12) fog_index_spa_12 = wl_measures_readability.gunning_fog_index(main, test_text_spa_12) print('Gunning Fog Index:') - print(f'\t{fog_index_eng_0} (0)') - print(f'\t{fog_index_eng_12_propn} (eng/12)') - print(f'\t{fog_index_pol_12} (pol/12)') - print(f'\t{fog_index_spa_12} (spa/12)') + print(f'\teng/0: {fog_index_eng_0}') + print(f'\teng/12: {fog_index_eng_12_propn}') + print(f'\tpol/12: {fog_index_pol_12}') + print(f'\tspa/12: {fog_index_spa_12}') assert fog_index_eng_0 == 'text_too_short' assert fog_index_eng_12_propn == 0.4 * (12 / 3 + 1 / 12 * 100) @@ -266,9 +298,9 @@ def test_legibility_mu(): mu_eng_12 = wl_measures_readability.legibility_mu(main, test_text_eng_12) print('Legibilidad µ:') - print(f'\t{mu_spa_0} (spa/0)') - print(f'\t{mu_spa_12} (spa/12)') - print(f'\t{mu_eng_12} (eng/12)') + print(f'\tspa/0: {mu_spa_0}') + print(f'\tspa/12: {mu_spa_12}') + print(f'\teng/12: {mu_eng_12}') assert mu_spa_0 == 'text_too_short' assert mu_spa_12 == (12 / 11) * (3.75 / 7.1875) * 100 @@ -281,10 +313,10 @@ def test_lensear_write(): score_other_12 = wl_measures_readability.lensear_write(main, test_text_other_12) print('Lensear Write:') - print(f'\t{score_eng_0} (eng/0)') - print(f'\t{score_eng_12} (eng/12)') - print(f'\t{score_eng_100} (eng/100)') - print(f'\t{score_other_12} (other/12)') + print(f'\teng/0: {score_eng_0}') + print(f'\teng/12: {score_eng_12}') + print(f'\teng/100: {score_eng_100}') + print(f'\tother/12: {score_other_12}') assert score_eng_0 == 'text_too_short' assert score_eng_12 == 6 * (100 / 12) + 3 * 3 * (100 / 12) @@ -297,9 +329,9 @@ def test_lix(): lix_spa_12 = wl_measures_readability.lix(main, test_text_spa_12) print('Lix:') - print(f'\t{lix_eng_0} (eng/0)') - print(f'\t{lix_eng_12} (eng/12)') - print(f'\t{lix_spa_12} (spa/12)') + print(f'\teng/0: {lix_eng_0}') + print(f'\teng/12: {lix_eng_12}') + print(f'\tspa/12: {lix_spa_12}') assert lix_eng_0 == 'text_too_short' assert lix_eng_12 == 12 / 3 + 100 * (3 / 12) @@ -311,23 +343,23 @@ def test_mcalpine_eflaw(): eflaw_spa_12 = wl_measures_readability.mcalpine_eflaw(main, test_text_spa_12) print('McAlpine EFLAW Readability Score:') - print(f'\t{eflaw_eng_0} (eng/0)') - print(f'\t{eflaw_eng_12} (eng/12)') - print(f'\t{eflaw_spa_12} (spa/12)') + print(f'\teng/0: {eflaw_eng_0}') + print(f'\teng/12: {eflaw_eng_12}') + print(f'\tspa/12: {eflaw_spa_12}') assert eflaw_eng_0 == 'text_too_short' assert eflaw_eng_12 == (12 + 6) / 3 assert eflaw_spa_12 == 'no_support' def test_osman(): - osman_ara_0 = wl_measures_readability.osman(main, Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'ara')) - osman_ara_12 = wl_measures_readability.osman(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'ara')) + osman_ara_0 = wl_measures_readability.osman(main, test_text_ara_0) + osman_ara_12 = wl_measures_readability.osman(main, test_text_ara_12) osman_eng_12 = wl_measures_readability.osman(main, test_text_eng_12) print('OSMAN:') - print(f'\t{osman_ara_0} (ara/0)') - print(f'\t{osman_ara_12} (ara/12)') - print(f'\t{osman_eng_12} (eng/12)') + print(f'\tara/0: {osman_ara_0}') + print(f'\tara/12: {osman_ara_12}') + print(f'\teng/12: {osman_eng_12}') assert osman_ara_0 == 'text_too_short' assert osman_ara_12 == 200.791 - 1.015 * (12 / 3) - 24.181 * ((3 + 23 + 3 + 0) / 12) @@ -339,9 +371,9 @@ def test_rix(): rix_spa_12 = wl_measures_readability.rix(main, test_text_spa_12) print('Rix:') - print(f'\t{rix_eng_0} (eng/0)') - print(f'\t{rix_eng_12} (eng/12)') - print(f'\t{rix_spa_12} (spa/12)') + print(f'\teng/0: {rix_eng_0}') + print(f'\teng/12: {rix_eng_12}') + print(f'\tspa/12: {rix_spa_12}') assert rix_eng_0 == 'text_too_short' assert rix_eng_12 == rix_spa_12 == 3 / 3 @@ -353,10 +385,10 @@ def test_smog_grade(): g_other_12 = wl_measures_readability.smog_grade(main, test_text_other_12) print('SMOG Grade:') - print(f'\t{g_eng_12} (eng/12)') - print(f'\t{g_eng_120} (eng/120)') - print(f'\t{g_spa_120} (spa/120)') - print(f'\t{g_other_12} (other/12)') + print(f'\teng/12: {g_eng_12}') + print(f'\teng/120: {g_eng_120}') + print(f'\tspa/120: {g_spa_120}') + print(f'\tother/12: {g_other_12}') assert g_eng_12 == 'text_too_short' assert g_eng_120 == 3.1291 + 1.043 * (15 ** 0.5) @@ -369,43 +401,29 @@ def test_spache_grade_level(): grade_level_spa_12 = wl_measures_readability.spache_grade_level(main, test_text_spa_12) print('Spache Grade Level:') - print(f'\t{grade_level_eng_12} (eng/12)') - print(f'\t{grade_level_eng_100} (eng/100)') - print(f'\t{grade_level_spa_12} (spa/12)') + print(f'\teng/12: {grade_level_eng_12}') + print(f'\teng/100: {grade_level_eng_100}') + print(f'\tspa/12: {grade_level_spa_12}') assert grade_level_eng_12 == 'text_too_short' assert grade_level_eng_100 == numpy.mean([0.141 * (100 / 25) + 0.086 * (25 / 100 * 100) + 0.839] * 3) assert grade_level_spa_12 == 'no_support' -def test_szigriszts_perspicuity_index(): - p_spa_0 = wl_measures_readability.szigriszts_perspicuity_index(main, test_text_spa_0) - p_spa_12 = wl_measures_readability.szigriszts_perspicuity_index(main, test_text_spa_12) - p_eng_12 = wl_measures_readability.szigriszts_perspicuity_index(main, test_text_eng_12) - - print("Szigriszt's Perspicuity Index:") - print(f'\t{p_spa_0} (spa/0)') - print(f'\t{p_spa_12} (spa/12)') - print(f'\t{p_eng_12} (eng/12)') - - assert p_spa_0 == 'text_too_short' - assert p_spa_12 == 207 - 62.3 * (18 / 12) - (12 / 3) - assert p_eng_12 == 'no_support' - def test_wiener_sachtextformel(): - wstf_deu_0 = wl_measures_readability.wiener_sachtextformel(main, Wl_Test_Text(TOKENS_MULTILEVEL_0, lang = 'deu_de')) - wstf_deu_12_1 = wl_measures_readability.wiener_sachtextformel(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'deu_de'), variant = '1') - wstf_deu_12_2 = wl_measures_readability.wiener_sachtextformel(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'deu_de'), variant = '2') - wstf_deu_12_3 = wl_measures_readability.wiener_sachtextformel(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'deu_de'), variant = '3') - wstf_deu_12_4 = wl_measures_readability.wiener_sachtextformel(main, Wl_Test_Text(TOKENS_MULTILEVEL_12, lang = 'deu_de'), variant = '4') + wstf_deu_0 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_0) + wstf_deu_12_1 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12, variant = '1') + wstf_deu_12_2 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12, variant = '2') + wstf_deu_12_3 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12, variant = '3') + wstf_deu_12_4 = wl_measures_readability.wiener_sachtextformel(main, test_text_deu_12, variant = '4') wstf_eng_12 = wl_measures_readability.wiener_sachtextformel(main, test_text_eng_12) print('Wiener Sachtextformel:') - print(f'\t{wstf_deu_0} (deu/0)') - print(f'\t{wstf_deu_12_1} (deu-1/12)') - print(f'\t{wstf_deu_12_2} (deu-2/12)') - print(f'\t{wstf_deu_12_3} (deu-3/12)') - print(f'\t{wstf_deu_12_4} (deu-4/12)') - print(f'\t{wstf_eng_12} (eng/12)') + print(f'\tdeu/0: {wstf_deu_0}') + print(f'\tdeu-1/12: {wstf_deu_12_1}') + print(f'\tdeu-2/12: {wstf_deu_12_2}') + print(f'\tdeu-3/12: {wstf_deu_12_3}') + print(f'\tdeu-4/12: {wstf_deu_12_4}') + print(f'\teng/12: {wstf_eng_12}') ms = 0 / 12 sl = 12 / 3 @@ -425,7 +443,6 @@ def test_wiener_sachtextformel(): test_coleman_liau_index() test_dale_chall_readability_score() test_devereux_readability_index() - test_fernandez_huertas_readability_score() test_flesch_kincaid_grade_level() test_flesch_reading_ease() test_flesch_reading_ease_simplified() @@ -442,5 +459,4 @@ def test_wiener_sachtextformel(): test_rix() test_smog_grade() test_spache_grade_level() - test_szigriszts_perspicuity_index() test_wiener_sachtextformel() diff --git a/tests/wl_tests_work_area/test_profiler.py b/tests/wl_tests_work_area/test_profiler.py index 2a4cc0644..86ffa7bf2 100644 --- a/tests/wl_tests_work_area/test_profiler.py +++ b/tests/wl_tests_work_area/test_profiler.py @@ -95,7 +95,7 @@ def update_gui(err_msg, texts_stats_files): count_tokens_lens_syls.append(collections.Counter(len_tokens_syls)) count_tokens_lens_chars.append(collections.Counter(len_tokens_chars)) - assert len(readability_statistics) == 24 + assert len(readability_statistics) == 22 # Counts assert count_paras diff --git a/wordless/wl_measures/wl_measures_readability.py b/wordless/wl_measures/wl_measures_readability.py index 6d9a14ad7..000ab3be5 100644 --- a/wordless/wl_measures/wl_measures_readability.py +++ b/wordless/wl_measures/wl_measures_readability.py @@ -216,27 +216,6 @@ def devereux_readability_index(main, text): return grade_placement -# Fernández Huerta's Readability Score -# References: -# Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32. -# Law, Gwillim. (2011, May 27). Error in the Fernandez Huerta readability formula. LINGUIST List. https://linguistlist.org/issues/22/22-2332/ -def fernandez_huertas_readability_score(main, text): - if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']: - text = get_counts(main, text) - - if text.count_words and text.count_sentences: - p = ( - 206.84 - - 60 * (text.count_syls / text.count_words) - - 102 * (text.count_sentences / text.count_words) - ) - else: - p = 'text_too_short' - else: - p = 'no_support' - - return p - # Flesch-Kincaid Grade Level # Reference: Kincaid, J. P., Fishburne, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count, and Flesch reading ease formula) for navy enlisted personnel. Naval Air Station Memphis. https://apps.dtic.mil/sti/pdfs/ADA006655.pdf def flesch_kincaid_grade_level(main, text): @@ -259,8 +238,10 @@ def flesch_kincaid_grade_level(main, text): # Flesch Reading Ease # Reference: # Flesch, R. (1948). A new readability yardstick. Journal of Applied Psychology, 32(3), 221–233. https://doi.org/10.1037/h0057532 -# Dutch variant: +# Dutch variant (Douma): # Douma, W. H. (1960). De leesbaarheid van landbouwbladen: Een onderzoek naar en een toepassing van leesbaarheidsformules [Readability of Dutch farm papers: A discussion and application of readability-formulas]. Afdeling sociologie en sociografie van de Landbouwhogeschool Wageningen. https://edepot.wur.nl/276323 +# Dutch variant (Brouwer's Leesindex A): +# Brouwer, R. H. M. (1963). Onderzoek naar de leesmoeilijkheid van Nederlands proza. Paedagogische studiën, 40, 454–464. https://objects.library.uu.nl/reader/index.php?obj=1874-205260&lan=en # French variant: # Kandel, L., & Moles A. (1958). Application de l’indice de flesch la langue francaise [applying flesch index to french language]. The Journal of Educational Research, 21, 283–287. # Kopient, A., & Grabar, N. (2020). Rated lexicon for the simplification of medical texts. In B. Gersbeck-Schierholz (ed.), HEALTHINFO 2020: The fifth international conference on informatics and assistive technologies for health-care, medical support and wellbeing (pp. 11–17). IARIA. https://hal.science/hal-03095275/document @@ -272,22 +253,78 @@ def flesch_kincaid_grade_level(main, text): # Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf # Russian variant: # Oborneva, I. V. (2006). Автоматизированная оценка сложности учебных текстов на основе статистических параметров [Doctoral dissertation, Institute for Strategy of Education Development of the Russian Academy of Education]. Freereferats.ru. https://static.freereferats.ru/_avtoreferats/01002881899.pdf?ver=3 +# Spanish variant (Fernández Huerta): +# Fernández Huerta, J. (1959). Medidas sencillas de lecturabilidad. Consigna, 214, 29–32. +# Garais, E. (2011). Web applications readability. Journal of Information Systems and Operations Management, 5(1), 117–121. http://www.rebe.rau.ro/RePEc/rau/jisomg/SP11/JISOM-SP11-A13.pdf +# Spanish variant (Szigriszt Pazos): +# Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y def flesch_reading_ease(main, text): if text.lang in main.settings_global['syl_tokenizers']: text = get_counts(main, text) if text.count_words and text.count_sentences: - flesch_re = ( - 206.835 - - 0.846 * (text.count_syls / text.count_words * 100) - - 1.015 * (text.count_words / text.count_sentences) - ) + if text.lang == 'nld': + if main.settings_custom['measures']['readability']['re']['variant_nld'] == 'Douma': + re = ( + 206.84 + - 77 * (text.count_syls / text.count_words) + - 0.93 * (text.count_words / text.count_sentences) + ) + elif main.settings_custom['measures']['readability']['re']['variant_nld'] == "Brouwer's Leesindex A": + re = ( + 195 + - (200 / 3) * (text.count_syls / text.count_words) + - 2 * (text.count_words / text.count_sentences) + ) + elif text.lang == 'fra': + re = ( + 207 + - 73.6 * (text.count_syls / text.count_words) + - 1.015 * (text.count_words / text.count_sentences) + ) + elif text.lang.startswith('deu_'): + re = ( + 180 + - 58.5 * (text.count_syls / text.count_words) + - (text.count_words / text.count_sentences) + ) + elif text.lang == 'ita': + re = ( + 217 + - 60 * (text.count_syls / text.count_words) + - 1.3 * (text.count_words / text.count_sentences) + ) + elif text.lang == 'rus': + re = ( + 206.835 + - 60.1 * (text.count_syls / text.count_words) + - 1.3 * (text.count_words / text.count_sentences) + ) + elif text.lang == 'spa': + if main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Fernández Huerta': + re = ( + 206.84 + - 60 * (text.count_syls / text.count_words) + - 1.02 * (text.count_words / text.count_sentences) + ) + elif main.settings_custom['measures']['readability']['re']['variant_spa'] == 'Szigriszt Pazos': + re = ( + 206.84 + - 62.3 * (text.count_syls / text.count_words) + - (text.count_words / text.count_sentences) + ) + else: + re = ( + 206.835 + - 0.846 * (text.count_syls / text.count_words * 100) + - 1.015 * (text.count_words / text.count_sentences) + ) else: - flesch_re = 'text_too_short' + re = 'text_too_short' else: - flesch_re = 'no_support' + re = 'no_support' - return flesch_re + return re # Flesch Reading Ease (Simplified) # Reference: Farr, J. N., Jenkins, J. J., & Paterson, D. G. (1951). Simplification of Flesch reading ease formula. Journal of Applied Psychology, 35(5), 333–337. https://doi.org/10.1037/h0062427 @@ -689,25 +726,6 @@ def spache_grade_level(main, text): return grade_level -# Szigriszt's Perspicuity Index -# Reference: Szigriszt Pazos, F. (1993). Sistemas predictivos de legibilidad del mensaje escrito: Formula de perspicuidad [Doctoral dissertation, Complutense University of Madrid]. Biblos-e Archivo. https://repositorio.uam.es/bitstream/handle/10486/2488/3907_barrio_cantalejo_ines_maria.pdf?sequence=1&isAllowed=y -def szigriszts_perspicuity_index(main, text): - if text.lang == 'spa' and text.lang in main.settings_global['syl_tokenizers']: - text = get_counts(main, text) - - if text.count_words and text.count_sentences: - p = ( - 207 - - 62.3 * (text.count_syls / text.count_words) - - (text.count_words / text.count_sentences) - ) - else: - p = 'text_too_short' - else: - p = 'no_support' - - return p - # Wiener Sachtextformel # References: # Bamberger, R., & Vanecek, E. (1984). Lesen – Verstehen – Lernen – Schreiben. Jugend und Volk. diff --git a/wordless/wl_profiler.py b/wordless/wl_profiler.py index f77de1efd..cc0faa82e 100644 --- a/wordless/wl_profiler.py +++ b/wordless/wl_profiler.py @@ -362,7 +362,6 @@ def __init__(self, parent): _tr('wl_profiler', 'Coleman-Liau Index'), _tr('wl_profiler', 'Dale-Chall Readability Score'), _tr('wl_profiler', 'Devereaux Readability Index'), - _tr('wl_profiler', "Fernández Huerta's Readability Score"), _tr('wl_profiler', 'Flesch-Kincaid Grade Level'), _tr('wl_profiler', 'Flesch Reading Ease'), _tr('wl_profiler', 'Flesch Reading Ease (Simplified)'), @@ -379,7 +378,6 @@ def __init__(self, parent): _tr('wl_profiler', 'Rix'), _tr('wl_profiler', 'SMOG Grade'), _tr('wl_profiler', 'Spache Grade Level'), - _tr('wl_profiler', "Szigriszt's Perspicuity Index"), _tr('wl_profiler', 'Wiener Sachtextformel') ] @@ -1178,7 +1176,6 @@ def run(self): wl_measures_readability.coleman_liau_index(self.main, text), wl_measures_readability.dale_chall_readability_score(self.main, text), wl_measures_readability.devereux_readability_index(self.main, text), - wl_measures_readability.fernandez_huertas_readability_score(self.main, text), wl_measures_readability.flesch_kincaid_grade_level(self.main, text), wl_measures_readability.flesch_reading_ease(self.main, text), wl_measures_readability.flesch_reading_ease_simplified(self.main, text), @@ -1195,7 +1192,6 @@ def run(self): wl_measures_readability.rix(self.main, text), wl_measures_readability.smog_grade(self.main, text), wl_measures_readability.spache_grade_level(self.main, text), - wl_measures_readability.szigriszts_perspicuity_index(self.main, text), wl_measures_readability.wiener_sachtextformel(self.main, text) ] else: diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index c86e25590..d9d10f45b 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -1741,6 +1741,11 @@ def init_settings_default(main): 'measures': { # Settings - Measures - Readability 'readability': { + 're': { + 'variant_nld': 'Douma', + 'variant_spa': 'Fernández Huerta' + }, + 'wstf': { 'variant': '1' } diff --git a/wordless/wl_settings/wl_settings_measures.py b/wordless/wl_settings/wl_settings_measures.py index c3f3ea692..372189198 100644 --- a/wordless/wl_settings/wl_settings_measures.py +++ b/wordless/wl_settings/wl_settings_measures.py @@ -31,25 +31,51 @@ def __init__(self, main): self.settings_default = self.main.settings_default['measures']['readability'] self.settings_custom = self.main.settings_custom['measures']['readability'] + # Flesch Reading Ease + self.group_box_re = QGroupBox(self.tr('Flesch Reading Ease'), self) + + self.label_re_variant_nld = QLabel(self.tr('Dutch variant:'), self) + self.combo_box_re_variant_nld = wl_boxes.Wl_Combo_Box(self) + self.label_re_variant_spa = QLabel(self.tr('Spanish variant:'), self) + self.combo_box_re_variant_spa = wl_boxes.Wl_Combo_Box(self) + + self.combo_box_re_variant_nld.addItems([ + "Brouwer's Leesindex A", + 'Douma', + ]) + self.combo_box_re_variant_spa.addItems([ + 'Fernández Huerta', + 'Szigriszt Pazos' + ]) + + self.group_box_re.setLayout(wl_layouts.Wl_Layout()) + self.group_box_re.layout().addWidget(self.label_re_variant_nld, 0, 0) + self.group_box_re.layout().addWidget(self.combo_box_re_variant_nld, 0, 1) + self.group_box_re.layout().addWidget(self.label_re_variant_spa, 1, 0) + self.group_box_re.layout().addWidget(self.combo_box_re_variant_spa, 1, 1) + + self.group_box_re.layout().setColumnStretch(2, 1) + # Wiener Sachtextformel self.group_box_wstf = QGroupBox(self.tr('Wiener Sachtextformel'), self) - self.label_variant = QLabel(self.tr('Variant:'), self) - self.combo_box_variant = wl_boxes.Wl_Combo_Box(self) + self.label_wstf_variant = QLabel(self.tr('Variant:'), self) + self.combo_box_wstf_variant = wl_boxes.Wl_Combo_Box(self) - self.combo_box_variant.addItems(['1', '2', '3', '4']) + self.combo_box_wstf_variant.addItems(['1', '2', '3', '4']) self.group_box_wstf.setLayout(wl_layouts.Wl_Layout()) - self.group_box_wstf.layout().addWidget(self.label_variant, 0, 0) - self.group_box_wstf.layout().addWidget(self.combo_box_variant, 0, 1) + self.group_box_wstf.layout().addWidget(self.label_wstf_variant, 0, 0) + self.group_box_wstf.layout().addWidget(self.combo_box_wstf_variant, 0, 1) self.group_box_wstf.layout().setColumnStretch(2, 1) self.setLayout(wl_layouts.Wl_Layout()) - self.layout().addWidget(self.group_box_wstf, 0, 0) + self.layout().addWidget(self.group_box_re, 0, 0) + self.layout().addWidget(self.group_box_wstf, 1, 0) self.layout().setContentsMargins(6, 4, 6, 4) - self.layout().setRowStretch(1, 1) + self.layout().setRowStretch(2, 1) def load_settings(self, defaults = False): if defaults: @@ -57,12 +83,20 @@ def load_settings(self, defaults = False): else: settings = copy.deepcopy(self.settings_custom) + # Flesch Reading Ease + self.combo_box_re_variant_nld.setCurrentText(settings['re']['variant_nld']) + self.combo_box_re_variant_spa.setCurrentText(settings['re']['variant_spa']) + # Wiener Sachtextformel - self.combo_box_variant.setCurrentText(settings['wstf']['variant']) + self.combo_box_wstf_variant.setCurrentText(settings['wstf']['variant']) def apply_settings(self): + # Flesch Reading Ease + self.settings_custom['re']['variant_nld'] = self.combo_box_re_variant_nld.currentText() + self.settings_custom['re']['variant_spa'] = self.combo_box_re_variant_spa.currentText() + # Wiener Sachtextformel - self.settings_custom['wstf']['variant'] = self.combo_box_variant.currentText() + self.settings_custom['wstf']['variant'] = self.combo_box_wstf_variant.currentText() return True