From 875d196976de471d6b65bfa2960b01ccee675498 Mon Sep 17 00:00:00 2001 From: BLKSerene Date: Sat, 18 May 2024 00:55:44 +0800 Subject: [PATCH] Settings: Add Settings - Tables - Dependency Parser; Work Area: Dependency Parser - Sentence - Highlight heads and dependents --- CHANGELOG.md | 2 + doc/doc.md | 54 +++--- tests/tests_checks/test_checks_work_area.py | 40 ++-- tests/tests_settings/test_settings_tables.py | 7 + wordless/wl_checks/wl_checks_work_area.py | 71 ++++--- wordless/wl_concordancer.py | 22 +-- wordless/wl_dependency_parser.py | 40 +++- wordless/wl_nlp/wl_nlp_utils.py | 1 + wordless/wl_results/wl_results_search.py | 2 +- wordless/wl_settings/wl_settings.py | 3 + wordless/wl_settings/wl_settings_default.py | 10 +- wordless/wl_settings/wl_settings_tables.py | 63 +++++- wordless/wl_widgets/wl_tables.py | 193 ++++++++----------- 13 files changed, 299 insertions(+), 209 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85fed6c14..7e29d5fb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ ## [3.5.0](https://github.com/BLKSerene/Wordless/releases/tag/3.5.0) - ??/??/2024 ### 🎉 New Features - Settings: Add Settings - Stop Word Lists - Stop Word List Settings - Case-sensitive +- Settings: Add Settings - Tables - Dependency Parser - Utils: Add Stanza's Sindhi part-of-speech tagger - Utils: Add VADER's sentiment analyzers - Work Area: Add Profiler - Lexical Diversity - Brunét's Index / Honoré's statistic @@ -29,6 +30,7 @@ - Settings: Settings - Part-of-speeach Tagging - Tagsets - Mapping Settings - Allow editing of tagset mapping of spaCy's Catalan, Danish, French, Greek (Modern), Macedonian, Norwegian (Bokmål), Portuguese, Russian, Spanish, and Ukrainian part-of-speech taggers - Settings: Settings - Part-of-speeach Tagging - Tagsets - Mapping Settings - Allow editing of tagset mapping of Stanza's Armenian (Eastern), Armenian (Western), Basque, Buryat (Russia), Danish, French, Greek (Modern), Hebrew (Modern), Hungarian, Ligurian, Manx, Marathi, Nigerian Pidgin, Pomak, Portuguese, Russian, Sanskrit, Sindhi, Sorbian (Upper), and Telugu part-of-speech taggers - Utils: Update custom stop word lists +- Work Area: Dependency Parser - Sentence - Highlight heads and dependents ### 📌 Bugfixes - Utils: Fix downloading of Stanza models diff --git a/doc/doc.md b/doc/doc.md index 15029c3cc..3d3752e73 100644 --- a/doc/doc.md +++ b/doc/doc.md @@ -61,17 +61,17 @@ The main window of *Wordless* is divided into several sections: - **1.4 Status Bar**
The *Status Bar* resides at the bottom of the main window. - You can show/hide the *Status Bar* by checking/unchecking **Menu → Preferences → Show Status Bar** + You can show/hide the *Status Bar* by checking/unchecking **Menu Bar → Preferences → Show Status Bar** -You can modify the global scaling factor and font settings of the user interface via **Menu → Preferences → General → User Interface Settings**. +You can modify the global scaling factor and font settings of the user interface via **Menu Bar → Preferences → General → User Interface Settings**. ## [2 File Area](#doc) -In most cases, the first thing to do in *Wordless* is open and select your files to be processed via **Menu → File → Open Files/Folder**. +In most cases, the first thing to do in *Wordless* is open and select your files to be processed via **Menu Bar → File → Open Files/Folder**. Files are loaded, cached and selected automatically after being added to the *File Table*. **Only selected files will be processed by *Wordless***. You can drag and drop files around the *File Table* to change their orders, which would be reflected in the results. -By default, *Wordless* tries to detect the encoding and language settings of all files for you, you should double check and make sure that the settings of each and every file are correct. If you prefer changing file settings manually, you could uncheck **Auto-detect encodings** and/or **Auto-detect languages** in the *Open Files* dialog. The default file settings could be modified via **Menu → Preferences → Settings → Files → Default Settings**. +By default, *Wordless* tries to detect the encoding and language settings of all files for you, you should double check and make sure that the settings of each and every file are correct. If you prefer changing file settings manually, you could uncheck **Auto-detect encodings** and/or **Auto-detect languages** in the *Open Files* dialog. The default file settings could be modified via **Menu Bar → Preferences → Settings → Files → Default Settings**. - **2.1 Open Files**
Add one single file or multiple files to the *File Table*. @@ -126,7 +126,7 @@ All statistics are grouped into 5 tables for better readability: Readability, Co The percentage of the number of paragraphs in each file out of the total number of paragraphs in all files. - **3.1.2.3 Count of Sentences**
- The number of sentences in each file. *Wordless* automatically applies the built-in sentence tokenizer according to the language of each file to calculate the number of sentences in each file. You can modify sentence tokenizer settings via **Menu → Preferences → Settings → Sentence Tokenization → Sentence Tokenizer Settings**. + The number of sentences in each file. *Wordless* automatically applies the built-in sentence tokenizer according to the language of each file to calculate the number of sentences in each file. You can modify sentence tokenizer settings via **Menu Bar → Preferences → Settings → Sentence Tokenization → Sentence Tokenizer Settings**. - **3.1.2.4 Count of Sentences %**
The percentage of the number of sentences in each file out of the total number of sentences in all files. @@ -138,7 +138,7 @@ All statistics are grouped into 5 tables for better readability: Readability, Co The percentage of the number of sentence segments in each file out of the total number of sentence segments in all files. - **3.1.2.7 Count of Tokens**
- The number of tokens in each file. *Wordless* automatically applies the built-in word tokenizer according to the language of each file to calculate the number of tokens in each file. You can modify word tokenizer settings via **Menu → Preferences → Settings → Word Tokenization → Word Tokenizer Settings**. + The number of tokens in each file. *Wordless* automatically applies the built-in word tokenizer according to the language of each file to calculate the number of tokens in each file. You can modify word tokenizer settings via **Menu Bar → Preferences → Settings → Word Tokenization → Word Tokenizer Settings**. You can specify what should be counted as a "token" via **Token Settings** in the *Settings Area* @@ -152,7 +152,7 @@ All statistics are grouped into 5 tables for better readability: Readability, Co The percentage of the number of token types in each file out of the total number of token types in all files. - **3.1.2.11 Count of Syllables**
- The number of syllables in each files. *Wordless* automatically applies the built-in syllable tokenizer according to the language of each file to calculate the number of syllable in each file. You can modify syllable tokenizer settings via **Menu → Preferences → Settings → Syllable Tokenization → Syllable Tokenizer Settings**. + The number of syllables in each files. *Wordless* automatically applies the built-in syllable tokenizer according to the language of each file to calculate the number of syllable in each file. You can modify syllable tokenizer settings via **Menu Bar → Preferences → Settings → Syllable Tokenization → Syllable Tokenizer Settings**. - **3.1.2.12 Count of Syllables %**
The percentage of the number of syllables in each file out of the total number of syllable in all files. @@ -322,7 +322,7 @@ All statistics are grouped into 5 tables for better readability: Readability, Co ### [3.2 Concordancer](#doc) In *Concordancer*, you can search for tokens in different files and generate concordance lines. You can adjust settings for data generation via **Generation Settings**. -After the concordance lines are generated and displayed in the table, you can sort the results by clicking **Sort Results** or search in results by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. +After the concordance lines are generated and displayed in the table, you can sort the results by clicking **Sort Results** or search in results by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. Highlight colors for sorting can be modified via **Menu Bar → Preferences → Settings → Tables → Concordancer → Sorting**. You can generate concordance plots for all search terms. You can modify the settings for the generated figure via **Figure Settings**. @@ -384,7 +384,9 @@ After the parallel concordance lines are generated and displayed in the table, y - **3.3.3 Parallel Units**
The parallel unit (paragraph) where the search term is found in each file. - + Highlight colors for search terms can be modified via **Menu Bar → Preferences → Settings → Tables → Parallel Concordancer → Highlight Color Settings**. + + ### [3.4 Dependency Parser](#doc) > [!NOTE] > Added in *Wordless* 3.0.0 @@ -410,6 +412,8 @@ You can select lines in the *Results Area* and then click *Generate Figure* to s - **3.4.5 Sentence**
The sentence where the dependency structure is found. + Highlight colors for the head and the dependent can be modified via **Menu Bar → Preferences → Settings → Tables → Dependency Parser → Highlight Color Settings**. + - **3.4.6 Sentence No.**
The position of the sentence where the dependency structure is found. @@ -431,7 +435,7 @@ You can further filter the results as you see fit by clicking **Filter Results** You can generate line charts or word clouds for wordlists using any statistics. You can modify the settings for the generated figure via **Figure Settings**. - **3.5.1 Rank**
- The rank of the token sorted by its frequency in the first file in descending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. + The rank of the token sorted by its frequency in the first file in descending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu Bar → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. - **3.5.2 Token**
You can specify what should be counted as a "token" via **Token Settings**. @@ -470,7 +474,7 @@ You can generate line charts or word clouds for n-grams using any statistics. Yo You can further filter the results as you see fit by clicking **Filter Results** or search in the results for the part that might be of interest to you by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. - **3.6.1 Rank**
- The rank of the n-gram sorted by its frequency in the first file in descending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. + The rank of the n-gram sorted by its frequency in the first file in descending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu Bar → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. - **3.6.2 N-gram**
You can specify what should be counted as a "n-gram" via **Token Settings**. @@ -502,7 +506,7 @@ You can generate line charts, word clouds, and network graphs for patterns of co You can further filter the results as you see fit by clicking **Filter Results** or search in the results for the part that might be of interest to you by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. - **3.7.1 Rank**
- The rank of the collocating token sorted by the p-value of the significance test conducted on the node and the collocating token in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. + The rank of the collocating token sorted by the p-value of the significance test conducted on the node and the collocating token in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu Bar → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. - **3.7.2 Node**
The search term. You can specify what should be counted as a "token" via **Token Settings**. @@ -550,7 +554,7 @@ You can generate line charts or word clouds for patterns of colligation using an You can further filter the results as you see fit by clicking **Filter Results** or search in the results for the part that might be of interest to you by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. - **3.8.1 Rank**
- The rank of the collocating part of speech sorted by the p-value of the significance test conducted on the node and the collocating part of speech in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. + The rank of the collocating part of speech sorted by the p-value of the significance test conducted on the node and the collocating part of speech in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu Bar → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. - **3.8.2 Node**
The search term. You can specify what should be counted as a "token" via **Token Settings**. @@ -596,7 +600,7 @@ You can generate line charts or word clouds for keywords using any statistics. Y You can further filter the results as you see fit by clicking **Filter Results** or search in the results for the part that might be of interest to you by clicking **Search in Results**, both buttons residing at the right corner of the *Results Area*. - **3.9.1 Rank**
- The rank of the keyword sorted by the p-value of the significance test conducted on the keyword in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. + The rank of the keyword sorted by the p-value of the significance test conducted on the keyword in the first file in ascending order (by default). You can sort the results again by clicking the column headers. You can use continuous numbering after tied ranks (eg. 1/1/1/2/2/3 instead of 1/1/1/4/4/6) by checking **Menu Bar → Preferences → Settings → Tables → Rank Settings → Continue numbering after ties**. - **3.9.2 Keyword**
The candidates of potential keywords. You can specify what should be counted as a "token" via **Token Settings**. @@ -778,7 +782,7 @@ Word documents¹²³ |\*.docx XML files¹ |\*.xml > [!IMPORTANT] -> 1. Non-TXT files will be automatically converted to TXT files when being imported into *Wordless*. You can check the converted files under folder **imports** at the installation location of *Wordless* on your computer (as for macOS users, right click **Wordless.app**, select **Show Package Contents** and navigate to **Contents/MacOS/imports/**). You can change this location via **Menu → Preferences → Settings → General → Import → Temporary Files → Default path**. +> 1. Non-TXT files will be automatically converted to TXT files when being imported into *Wordless*. You can check the converted files under folder **imports** at the installation location of *Wordless* on your computer (as for macOS users, right click **Wordless.app**, select **Show Package Contents** and navigate to **Contents/MacOS/imports/**). You can change this location via **Menu Bar → Preferences → Settings → General → Import → Temporary Files → Default path**. > 1. It is **not recommended to directly import non-text files into *Wordless***, as accuracy of text extraction could not be guaranteed and unintended data loss might occur, for which reason users are encouraged to **convert their files using specialized tools and make discreet decisions** on which part of your data should be kept, transformed, and discarded. > 1. The legacy Microsoft 97-03 Word documents (\*.doc) and Microsoft 97-03 Excel Workbooks (\*.xls) are not supported. > 1. *Wordless* could only extract text from **text-searchable PDF files** and is not capable of automatically converting scanned PDF files into text-searchable ones. @@ -1119,7 +1123,7 @@ Measure of Readability|Formula|Supported Languages Al-Heeti's Readability Prediction Formula¹
([Al-Heeti, 1984, pp. 102, 104, 106](#ref-al-heeti-1984))|![Formula](/doc/measures/readability/rd.svg)|**Arabic** Automated Arabic Readability Index
([Al-Tamimi et al., 2013](#ref-al-tamimi-et-al-2013))|![Formula](/doc/measures/readability/aari.svg)|**Arabic** Automated Readability Index¹
([Smith & Senter, 1967, p. 8](#ref-smith-senter-1967)
Navy: [Kincaid et al., 1975, p. 14](#ref-kincaid-et-al-1975))|![Formula](/doc/measures/readability/ari.svg)|All languages -Bormuth's Cloze Mean & Grade Placement
([Bormuth, 1969, pp. 152, 160](#ref-bormuth-1969))|![Formula](/doc/measures/readability/bormuths_cloze_mean_gp.svg)
where **C** is the cloze criterion score, whose value could be changed via **Menu → Preferences → Settings → Measures → Readability → Bormuth's Grade Placement - Cloze criterion score**|**English** +Bormuth's Cloze Mean & Grade Placement
([Bormuth, 1969, pp. 152, 160](#ref-bormuth-1969))|![Formula](/doc/measures/readability/bormuths_cloze_mean_gp.svg)
where **C** is the cloze criterion score, whose value could be changed via **Menu Bar → Preferences → Settings → Measures → Readability → Bormuth's Grade Placement → Cloze criterion score**|**English** Coleman-Liau Index
([Coleman & Liau, 1975](#ref-coleman-liau-1975))|![Formula](/doc/measures/readability/coleman_liau_index.svg)|All languages Coleman's Readability Formula¹
([Liau et al., 1976](#ref-liau-et-al-1976))|![Formula](/doc/measures/readability/colemans_readability_formula.svg)|All languages²³ Dale-Chall Readability Formula¹
([Dale & Chall, 1948a](#ref-dale-chall-1948a); [Dale & Chall, 1948b](#ref-dale-chall-1948b)
Powers-Sumner-Kearl: [Powers et al., 1958](#ref-powers-et-al-1958)
New: [Chall & Dale, 1995](#ref-chall-dale-1995))|![Formula](/doc/measures/readability/x_c50.svg)|**English** @@ -1156,7 +1160,7 @@ Measure of Readability|Formula|Supported Languages Wheeler & Smith's Readability Formula
([Wheeler & Smith, 1954](#ref-wheeler-smith-1954))|![Formula](/doc/measures/readability/wheeler_smiths_readability_formula.svg)
where **NumUnits** is the number of sentence segments ending in periods, question marks, exclamation marks, colons, semicolons, and dashes.|All languages² > [!NOTE] -> 1. Variants available and can be selected via **Menu - Preferences - Settings - Measures - Readability** +> 1. Variants available and can be selected via **Menu Bar → Preferences → Settings → Measures → Readability** > 1. Requires **built-in syllable tokenization support** > 1. Requires **built-in part-of-speech tagging support** @@ -1248,12 +1252,12 @@ Measure of Lexical Diversity|Formula Corrected TTR
([Carroll, 1964](#ref-carroll-1964))|![Formula](/doc/measures/lexical_diversity/cttr.svg) Fisher's Index of Diversity
([Fisher et al., 1943](#ref-fisher-et-al-1943))|![Formula](/doc/measures/lexical_diversity/fishers_index_of_diversity.svg)
where W₋₁ is the -1 branch of the [Lambert W function](https://en.wikipedia.org/wiki/Lambert_W_function) Herdan's Vₘ
([Herdan, 1955](#ref-herdan-1955))|![Formula](/doc/measures/lexical_diversity/herdans_vm.svg) -HD-D
([McCarthy & Jarvis, 2010](#ref-mccarthy-jarvis-2010))|For detailed calculation procedures, see reference.
The sample size could be modified via **Menu → Preferences → Settings → Measures → Type-token Ratio → HD-D → Sample size**. +HD-D
([McCarthy & Jarvis, 2010](#ref-mccarthy-jarvis-2010))|For detailed calculation procedures, see reference.
The sample size could be modified via **Menu Bar → Preferences → Settings → Measures → Type-token Ratio → HD-D → Sample size**. Honoré's statistic
([Honoré, 1979](#ref-honore-1979))|![Formula](/doc/measures/lexical_diversity/honores_stat.svg) LogTTR¹
(Herdan: [Herdan, 1960, p. 28](#ref-herdan-1960)
Somers: [Somers, 1966](#ref-somers-1966)
Rubet: [Dugast, 1979](#ref-dugast-1979)
Maas: [Maas, 1972](#ref-maas-1972)
Dugast: [Dugast, 1978](#ref-dugast-1978); [Dugast, 1979](#ref-dugast-1979))|![Formula](/doc/measures/lexical_diversity/logttr.svg) -Mean Segmental TTR
([Johnson, 1944](#ref-johnson-1944))|![Formula](/doc/measures/lexical_diversity/msttr.svg)
where **n** is the number of equal-sized segment, the length of which could be modified via **Menu → Preferences → Settings → Measures → Type-token Ratio → Mean Segmental TTR → Number of tokens in each segment**, **NumTypesSegᵢ** is the number of token types in the **i**-th segment, and **NumTokensSegᵢ** is the number of tokens in the **i**-th segment. -Measure of Textual Lexical Diversity
([McCarthy, 2005, pp. 95–96, 99–100](#ref-mccarthy-2005); [McCarthy & Jarvis, 2010](#ref-mccarthy-jarvis-2010))|For detailed calculation procedures, see references.
The factor size could be modified via **Menu → Preferences → Settings → Measures → Type-token Ratio → Measure of Textual Lexical Diversity → Factor size**. -Moving-average TTR
([Covington & McFall, 2010](#ref-covington-mcfall-2010))|![Formula](/doc/measures/lexical_diversity/mattr.svg)
where **w** is the window size which could be modified via **Menu → Preferences → Settings → Measures → Type-token Ratio → Moving-average TTR → Window size**, **NumTypesWindowₚ** is the number of token types within the moving window starting at position **p**, and **NumTokensWindowₚ** is the number of tokens within the moving window starting at position **p**. +Mean Segmental TTR
([Johnson, 1944](#ref-johnson-1944))|![Formula](/doc/measures/lexical_diversity/msttr.svg)
where **n** is the number of equal-sized segment, the length of which could be modified via **Menu Bar → Preferences → Settings → Measures → Type-token Ratio → Mean Segmental TTR → Number of tokens in each segment**, **NumTypesSegᵢ** is the number of token types in the **i**-th segment, and **NumTokensSegᵢ** is the number of tokens in the **i**-th segment. +Measure of Textual Lexical Diversity
([McCarthy, 2005, pp. 95–96, 99–100](#ref-mccarthy-2005); [McCarthy & Jarvis, 2010](#ref-mccarthy-jarvis-2010))|For detailed calculation procedures, see references.
The factor size could be modified via **Menu Bar → Preferences → Settings → Measures → Type-token Ratio → Measure of Textual Lexical Diversity → Factor size**. +Moving-average TTR
([Covington & McFall, 2010](#ref-covington-mcfall-2010))|![Formula](/doc/measures/lexical_diversity/mattr.svg)
where **w** is the window size which could be modified via **Menu Bar → Preferences → Settings → Measures → Type-token Ratio → Moving-average TTR → Window size**, **NumTypesWindowₚ** is the number of token types within the moving window starting at position **p**, and **NumTokensWindowₚ** is the number of tokens within the moving window starting at position **p**. Popescu-Mačutek-Altmann's B₁/B₂/B₃/B₄/B₅
([Popescu et al., 2008](#ref-popescu-et-al-2008))|![Formula](/doc/measures/lexical_diversity/popescu_macutek_altmanns_b1_b2_b3_b4_b5.svg) Popescu's R₁
([Popescu, 2009, pp. 18, 30, 33](#ref-popescu-2009))|For detailed calculation procedures, see reference. Popescu's R₂
([Popescu, 2009, pp. 35–36, 38](#ref-popescu-2009))|For detailed calculation procedures, see reference. @@ -1269,12 +1273,12 @@ Measure of Lexical Diversity|Formula Yule's Index of Diversity
([Williams, 1970, p. 100](#ref-williams-1970))|![Formula](/doc/measures/lexical_diversity/yules_index_of_diversity.svg) > [!NOTE] -> 1. Variants available and can be selected via **Menu - Preferences - Settings - Measures - Lexical Diversity** +> 1. Variants available and can be selected via **Menu Bar → Preferences → Settings → Measures → Lexical Diversity** #### [4.4.3 Measures of Dispersion and Adjusted Frequency](#doc) -For parts-based measures, each file is divided into **n** (whose value you could modify via **Menu → Preferences → Settings → Measures → Dispersion / Adjusted Frequency → General Settings → Divide each file into subsections**) sub-sections and the frequency of the word in each part is counted and denoted by **F₁**, **F₂**, **F₃**, ..., **Fₙ** respectively. The total frequency of the word in each file is denoted by **F** and the mean value of the frequencies over all sub-sections is denoted by **F̅**. +For parts-based measures, each file is divided into **n** (whose value you could modify via **Menu Bar → Preferences → Settings → Measures → Dispersion / Adjusted Frequency → General Settings → Divide each file into subsections**) sub-sections and the frequency of the word in each part is counted and denoted by **F₁**, **F₂**, **F₃**, ..., **Fₙ** respectively. The total frequency of the word in each file is denoted by **F** and the mean value of the frequencies over all sub-sections is denoted by **F̅**. For distance-based measures, the distance between each pair of subsequent occurrences of the word is calculated and denoted by **d₁**, **d₂**, **d₃**, ..., **dF** respectively. The total number of tokens in each file is denoted by **N**. @@ -1343,7 +1347,7 @@ Measure of Dispersion (Parts-based)|Measure of Adjusted Frequency (Parts-based)| -----------------------------------|-------------------------------------------|------- Carroll's D₂
([Carroll, 1970](#ref-carroll-1970))|Carroll's Uₘ
([Carroll, 1970](#ref-carroll-1970))|![Formula](/doc/measures/dispersion_adjusted_frequency/carrolls_um.svg)  |Engwall's FM
([Engwall, 1974](#ref-engwall-1974))|![Formula](/doc/measures/dispersion_adjusted_frequency/engwalls_fm.svg)
where **R** is the number of sub-sections in which the word appears at least once. -Gries's DP
([Gries, 2008](#ref-gries-2008); [Lijffijt & Gries, 2012](#ref-lijffijt-gries-2012))||![Formula](/doc/measures/dispersion_adjusted_frequency/griess_dp.svg)

* Normalization is applied by default, which behavior you could change via **Menu → Preferences → Settings → Measures → Dispersion → Gries's DP → Apply normalization**. +Gries's DP
([Gries, 2008](#ref-gries-2008); [Lijffijt & Gries, 2012](#ref-lijffijt-gries-2012))||![Formula](/doc/measures/dispersion_adjusted_frequency/griess_dp.svg)

* Normalization is applied by default, which behavior you could change via **Menu Bar → Preferences → Settings → Measures → Dispersion → Gries's DP → Apply normalization**. Juilland's D
([Juilland & Chang-Rodrigues, 1964](#ref-juilland-chang-rodrigues-1964))|Juilland's U
([Juilland & Chang-Rodrigues, 1964](#ref-juilland-chang-rodrigues-1964))|![Formula](/doc/measures/dispersion_adjusted_frequency/juillands_u.svg)  |Kromer's UR
([Kromer, 2003](#ref-kromer-2003))|![Formula](/doc/measures/dispersion_adjusted_frequency/kromers_ur.svg)
where **ψ** is the [digamma function](https://en.wikipedia.org/wiki/Digamma_function), and **C** is the [Euler–Mascheroni constant](https://en.wikipedia.org/wiki/Euler%E2%80%93Mascheroni_constant). Lyne's D₃
([Lyne, 1985](#ref-lyne-1985))||![Formula](/doc/measures/dispersion_adjusted_frequency/lynes_d3.svg) @@ -1509,7 +1513,7 @@ Measure of Effect Size|Formula Dice's Coefficient
([Smadja et al., 1996](#ref-smadja-et-al-1996))|![Formula](/doc/measures/effect_size/dices_coeff.svg) Difference Coefficient
([Hofland & Johanson, 1982](#ref-hofland-johanson-1982); [Gabrielatos, 2018](#ref-gabrielatos-2018))|![Formula](/doc/measures/effect_size/diff_coeff.svg) Jaccard Index
([Dunning, 1998](#ref-dunning-1998))|![Formula](/doc/measures/effect_size/jaccard_index.svg) -Kilgarriff's Ratio
([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)
where **α** is the smoothing parameter, whose value could be changed via **Menu → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing Parameter**. +Kilgarriff's Ratio
([Kilgarriff, 2009](#ref-kilgarriff-2009))|![Formula](/doc/measures/effect_size/kilgarriffs_ratio.svg)
where **α** is the smoothing parameter, whose value could be changed via **Menu Bar → Preferences → Settings → Measures → Effect Size → Kilgarriff's Ratio → Smoothing Parameter**. Log Ratio
([Hardie, 2014](#ref-hardie-2014))|![Formula](/doc/measures/effect_size/log_ratio.svg) Log-Frequency Biased MD
([Thanopoulos et al., 2002](#ref-thanopoulos-et-al-2002))|![Formula](/doc/measures/effect_size/lfmd.svg) logDice
([Rychlý, 2008](#ref-rychly-2008))|![Formula](/doc/measures/effect_size/log_dice.svg) diff --git a/tests/tests_checks/test_checks_work_area.py b/tests/tests_checks/test_checks_work_area.py index 9673cf315..7bcf41d08 100644 --- a/tests/tests_checks/test_checks_work_area.py +++ b/tests/tests_checks/test_checks_work_area.py @@ -42,6 +42,9 @@ def test_wl_status_bar_msg_success_generate_table(): def test_wl_status_bar_msg_success_generate_fig(): wl_checks_work_area.wl_status_bar_msg_success_generate_fig(main) +def test_wl_status_bar_msg_success_exp_table(): + wl_checks_work_area.wl_status_bar_msg_success_exp_table(main) + def test_wl_status_bar_msg_success_no_results(): wl_checks_work_area.wl_status_bar_msg_success_no_results(main) @@ -51,6 +54,9 @@ def test_wl_status_bar_msg_err_download_model(): def test_wl_status_bar_msg_err_fatal(): wl_checks_work_area.wl_status_bar_msg_err_fatal(main) +def test_wl_status_bar_msg_file_access_denied(): + wl_checks_work_area.wl_status_bar_msg_file_access_denied(main) + def test_check_search_terms(): assert wl_checks_work_area.check_search_terms(main, { 'multi_search_mode': False, @@ -81,37 +87,40 @@ def test_check_nlp_support(): assert wl_checks_work_area.check_nlp_support( main, nlp_utils = ['pos_taggers'], - files = [file_eng_us], - test = True + files = [file_eng_us] ) assert not wl_checks_work_area.check_nlp_support( main, nlp_utils = ['pos_taggers'], - files = [file_xxx], - test = True + files = [file_xxx] ) main.settings_custom['file_area']['files_open'] = [file_eng_us] main.settings_custom['file_area']['files_open_ref'] = [file_xxx] - assert wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], test = True) - assert not wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], ref = True, test = True) + assert wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers']) + assert not wl_checks_work_area.check_nlp_support(main, nlp_utils = ['pos_taggers'], ref = True) def test_check_results(): - assert wl_checks_work_area.check_results(main, '', 'test', test = True) - assert not wl_checks_work_area.check_results(main, 'test', '', test = True) + assert wl_checks_work_area.check_results(main, '', 'test') + assert not wl_checks_work_area.check_results(main, 'test', '') def test_check_results_download_model(): - wl_checks_work_area.check_results_download_model(main, '', 'test', test = True) - wl_checks_work_area.check_results_download_model(main, 'test', '', test = True) + wl_checks_work_area.check_results_download_model(main, '', 'test') + wl_checks_work_area.check_results_download_model(main, 'test', '') def test_check_err_table(): - wl_checks_work_area.check_err_table(main, '', test = True) - wl_checks_work_area.check_err_table(main, 'test', test = True) + wl_checks_work_area.check_err_table(main, '') + wl_checks_work_area.check_err_table(main, 'test') def test_check_err_fig(): - wl_checks_work_area.check_err_fig(main, '', test = True) - wl_checks_work_area.check_err_fig(main, 'test', test = True) + wl_checks_work_area.check_err_fig(main, '') + wl_checks_work_area.check_err_fig(main, 'test') + +def test_check_err_exp_table(): + wl_checks_work_area.check_err_exp_table(main, '', 'test') + wl_checks_work_area.check_err_exp_table(main, 'permission_err', 'test') + wl_checks_work_area.check_err_exp_table(main, 'err', 'test') if __name__ == '__main__': test_wl_msg_box_missing_search_terms() @@ -122,9 +131,11 @@ def test_check_err_fig(): test_wl_status_bar_msg_missing_search_terms() test_wl_status_bar_msg_success_generate_table() test_wl_status_bar_msg_success_generate_fig() + test_wl_status_bar_msg_success_exp_table() test_wl_status_bar_msg_success_no_results() test_wl_status_bar_msg_err_download_model() test_wl_status_bar_msg_err_fatal() + test_wl_status_bar_msg_file_access_denied() test_check_search_terms() test_check_nlp_support() @@ -132,3 +143,4 @@ def test_check_err_fig(): test_check_results_download_model() test_check_err_table() test_check_err_fig() + test_check_err_exp_table() diff --git a/tests/tests_settings/test_settings_tables.py b/tests/tests_settings/test_settings_tables.py index 629274d89..213fa6973 100644 --- a/tests/tests_settings/test_settings_tables.py +++ b/tests/tests_settings/test_settings_tables.py @@ -39,7 +39,14 @@ def test_wl_settings_tables_parallel_concordancer(): settings_tables_parallel_concordancer.load_settings(defaults = True) settings_tables_parallel_concordancer.apply_settings() +def test_wl_settings_tables_dependency_parser(): + settings_tables_dependency_parser = wl_settings_tables.Wl_Settings_Tables_Dependency_Parser(main) + settings_tables_dependency_parser.load_settings() + settings_tables_dependency_parser.load_settings(defaults = True) + settings_tables_dependency_parser.apply_settings() + if __name__ == '__main__': test_wl_settings_tables() test_wl_settings_tables_concordancer() test_wl_settings_tables_parallel_concordancer() + test_wl_settings_tables_dependency_parser() diff --git a/wordless/wl_checks/wl_checks_work_area.py b/wordless/wl_checks/wl_checks_work_area.py index f632d0ec4..af196d226 100644 --- a/wordless/wl_checks/wl_checks_work_area.py +++ b/wordless/wl_checks/wl_checks_work_area.py @@ -63,6 +63,9 @@ def wl_status_bar_msg_success_generate_table(main): def wl_status_bar_msg_success_generate_fig(main): main.statusBar().showMessage(_tr('wl_checks_work_area', 'Figure generated successfully.')) +def wl_status_bar_msg_success_exp_table(main): + main.statusBar().showMessage(_tr('wl_checks_work_area', 'Table exported successfully.')) + def wl_status_bar_msg_success_no_results(main): main.statusBar().showMessage(_tr('wl_checks_work_area', 'No results to display.')) @@ -72,6 +75,9 @@ def wl_status_bar_msg_err_download_model(main): def wl_status_bar_msg_err_fatal(main): main.statusBar().showMessage(_tr('wl_checks_work_area', 'A fatal error has just occurred!')) +def wl_status_bar_msg_file_access_denied(main): + main.statusBar().showMessage(_tr('wl_checks_work_area', 'File access denied!')) + def check_search_terms(main, search_settings, show_warning = True): if ( (not search_settings['multi_search_mode'] and search_settings['search_term']) @@ -94,7 +100,7 @@ def check_search_terms(main, search_settings, show_warning = True): 'dependency_parsers': _tr('wl_checks_work_area', 'Dependency parsing') } -def check_nlp_support(main, nlp_utils, files = None, ref = False, test = False): +def check_nlp_support(main, nlp_utils, files = None, ref = False): support_ok = True nlp_utils_no_support = [] @@ -147,11 +153,7 @@ def check_nlp_support(main, nlp_utils, files = None, ref = False, test = False): ) dialog_err_files.table_err_files.enable_updates() - - if test: - dialog_err_files.open() - else: - dialog_err_files.exec_() + dialog_err_files.open() wl_status_bar_msg_lang_support_unavailable(main) @@ -159,17 +161,13 @@ def check_nlp_support(main, nlp_utils, files = None, ref = False, test = False): return support_ok -def check_results(main, err_msg, results, test = False): +def check_results(main, err_msg, results): results_ok = True if err_msg: results_ok = False - if test: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() - else: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_() - + wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_status_bar_msg_err_fatal(main) elif not any(results): results_ok = False @@ -179,7 +177,7 @@ def check_results(main, err_msg, results, test = False): return results_ok -def check_results_download_model(main, err_msg, model_name = '', test = False): +def check_results_download_model(main, err_msg, model_name = ''): results_ok = True try: @@ -195,35 +193,50 @@ def check_results_download_model(main, err_msg, model_name = '', test = False): err_msg = traceback.format_exc() if err_msg: - if test: - wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).open() - else: - wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).exec_() - + wl_dialogs_errs.Wl_Dialog_Err_Download_Model(main, err_msg).open() wl_status_bar_msg_err_download_model(main) results_ok = False return results_ok -def check_err_table(main, err_msg, test = False): +def check_err_table(main, err_msg): if err_msg: - if test: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() - else: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_() - + wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_status_bar_msg_err_fatal(main) else: wl_status_bar_msg_success_generate_table(main) -def check_err_fig(main, err_msg, test = False): +def check_err_fig(main, err_msg): if err_msg: - if test: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() - else: - wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).exec_() + wl_status_bar_msg_err_fatal(main) + wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() wl_status_bar_msg_err_fatal(main) else: wl_status_bar_msg_success_generate_fig(main) + +def check_err_exp_table(main, err_msg, file_path): + if err_msg: + if err_msg == 'permission_err': + wl_msg_boxes.Wl_Msg_Box_Info( + main, + title = _tr('wl_checks_work_area', 'File Access Denied'), + text = _tr('wl_checks_work_area', ''' +
Access to "{}" is denied, please specify another location or close the file and try again.
+ ''').format(file_path) + ).open() + else: + wl_dialogs_errs.Wl_Dialog_Err_Fatal(main, err_msg).open() + + wl_status_bar_msg_file_access_denied(main) + else: + wl_msg_boxes.Wl_Msg_Box_Info( + main, + title = _tr('wl_checks_work_area', 'Export Completed'), + text = _tr('wl_checks_work_area', ''' +
The table has been successfully exported to "{}".
+ ''').format(file_path) + ).open() + + wl_status_bar_msg_success_exp_table(main) diff --git a/wordless/wl_concordancer.py b/wordless/wl_concordancer.py index 653c84ece..446e8fc09 100644 --- a/wordless/wl_concordancer.py +++ b/wordless/wl_concordancer.py @@ -518,6 +518,16 @@ def update_gui_table(self, err_msg, concordance_lines): no_para, len_paras = concordance_line[7] file_name = concordance_line[8] + # Left + self.setIndexWidget( + self.model().index(i, 0), + wl_labels.Wl_Label_Html(' '.join(left_tokens_raw), self.main) + ) + self.indexWidget(self.model().index(i, 0)).setAlignment(Qt.AlignRight | Qt.AlignVCenter) + + self.indexWidget(self.model().index(i, 0)).tokens_raw = left_tokens_raw + self.indexWidget(self.model().index(i, 0)).tokens_search = left_tokens_search + # Node label_node = wl_labels.Wl_Label_Html( f''' @@ -529,23 +539,11 @@ def update_gui_table(self, err_msg, concordance_lines): ) self.setIndexWidget(self.model().index(i, 1), label_node) - self.indexWidget(self.model().index(i, 1)).setAlignment(Qt.AlignHCenter | Qt.AlignVCenter) self.indexWidget(self.model().index(i, 1)).tokens_raw = node_tokens_raw self.indexWidget(self.model().index(i, 1)).tokens_search = node_tokens_search - # Left - self.setIndexWidget( - self.model().index(i, 0), - wl_labels.Wl_Label_Html(' '.join(left_tokens_raw), self.main) - ) - - self.indexWidget(self.model().index(i, 0)).setAlignment(Qt.AlignRight | Qt.AlignVCenter) - - self.indexWidget(self.model().index(i, 0)).tokens_raw = left_tokens_raw - self.indexWidget(self.model().index(i, 0)).tokens_search = left_tokens_search - # Right self.setIndexWidget( self.model().index(i, 2), diff --git a/wordless/wl_dependency_parser.py b/wordless/wl_dependency_parser.py index 6dc0d83fe..fde297ae4 100644 --- a/wordless/wl_dependency_parser.py +++ b/wordless/wl_dependency_parser.py @@ -29,9 +29,9 @@ from wordless.wl_checks import wl_checks_work_area from wordless.wl_dialogs import wl_dialogs_misc -from wordless.wl_nlp import wl_dependency_parsing, wl_matching, wl_texts, wl_token_processing +from wordless.wl_nlp import wl_dependency_parsing, wl_matching, wl_token_processing from wordless.wl_utils import wl_misc, wl_threading -from wordless.wl_widgets import wl_layouts, wl_tables, wl_widgets +from wordless.wl_widgets import wl_labels, wl_layouts, wl_tables, wl_widgets _tr = QCoreApplication.translate @@ -390,18 +390,26 @@ def update_gui_table(self, err_msg, results): self.model().setItem(i, 0, wl_tables.Wl_Table_Item(head)) # Dependent self.model().setItem(i, 1, wl_tables.Wl_Table_Item(dependent)) + # Dependency Relation self.model().setItem(i, 2, wl_tables.Wl_Table_Item(dependency_relation)) + # Dependency Distance self.set_item_num(i, 3, dependency_len) self.set_item_num(i, 4, numpy.abs(dependency_len)) + # Sentence - self.model().setItem(i, 5, wl_tables.Wl_Table_Item(' '.join(sentence_tokens_raw))) - self.model().item(i, 5).tokens_raw = sentence_tokens_raw - self.model().item(i, 5).tokens_search = sentence_tokens_search + self.setIndexWidget( + self.model().index(i, 5), + wl_labels.Wl_Label_Html(' '.join(sentence_tokens_raw), self.main) + ) + self.indexWidget(self.model().index(i, 5)).tokens_raw = sentence_tokens_raw + self.indexWidget(self.model().index(i, 5)).tokens_search = sentence_tokens_search + # Sentence No. self.set_item_num(i, 6, no_sentence) self.set_item_num(i, 7, no_sentence, len_sentences) + # File self.model().setItem(i, 8, wl_tables.Wl_Table_Item(file)) @@ -495,6 +503,9 @@ def run(self): len_sentences = len(offsets_sentences) i_token = 0 + head_color = self.main.settings_custom['tables']['dependency_parser']['highlight_color_settings']['head_color'] + dependent_color = self.main.settings_custom['tables']['dependency_parser']['highlight_color_settings']['dependent_color'] + for para in text.tokens_multilevel: for sentence in para: sentence = list(wl_misc.flatten_list(sentence)) @@ -523,7 +534,24 @@ def run(self): no_sentence = bisect.bisect(offsets_sentences, j) # Sentence - sentence_tokens_raw = wl_texts.to_display_texts(sentence) + sentence_tokens_raw = [] + + for sentence_token in sentence: + if sentence_token == head: + sentence_tokens_raw.append(f''' + + {sentence_token.display_text()} + + ''') + elif sentence_token == token: + sentence_tokens_raw.append(f''' + + {sentence_token.display_text()} + + ''') + else: + sentence_tokens_raw.append(sentence_token.display_text()) + # Remove empty tokens for searching in results sentence_tokens_search = [token for token in sentence if token] diff --git a/wordless/wl_nlp/wl_nlp_utils.py b/wordless/wl_nlp/wl_nlp_utils.py index f52db2463..bff7ccad0 100644 --- a/wordless/wl_nlp/wl_nlp_utils.py +++ b/wordless/wl_nlp/wl_nlp_utils.py @@ -111,6 +111,7 @@ def get_langs_stanza(main, util_type): return langs_stanza +@wl_misc.log_timing def check_models(main, langs, lang_utils = None): def update_gui_stanza(main, err_msg): nonlocal models_ok diff --git a/wordless/wl_results/wl_results_search.py b/wordless/wl_results/wl_results_search.py index 2b574d53b..3c09c0ba1 100644 --- a/wordless/wl_results/wl_results_search.py +++ b/wordless/wl_results/wl_results_search.py @@ -372,7 +372,7 @@ def run(self): ] for col in cols_to_search: - # Concordancer - Left, Node, Right & Parallel Concordancer - Parallel Unit + # Concordancer - Left, Node, Right / Parallel Concordancer - Parallel Unit / Dependency Parser - Sentence if table.indexWidget(table.model().index(0, col)): for row in rows_to_search: results[(row, col)] = table.indexWidget(table.model().index(row, col)).tokens_search diff --git a/wordless/wl_settings/wl_settings.py b/wordless/wl_settings/wl_settings.py index 022bc1129..f8b762f47 100644 --- a/wordless/wl_settings/wl_settings.py +++ b/wordless/wl_settings/wl_settings.py @@ -99,6 +99,7 @@ def __init__(self, main): self.tree_settings.model().appendRow(QStandardItem(self.tr('Tables'))) self.tree_settings.model().item(11).appendRow(QStandardItem(self.tr('Concordancer'))) self.tree_settings.model().item(11).appendRow(QStandardItem(self.tr('Parallel Concordancer'))) + self.tree_settings.model().item(11).appendRow(QStandardItem(self.tr('Dependency Parser'))) self.tree_settings.model().appendRow(QStandardItem(self.tr('Figures'))) self.tree_settings.model().item(12).appendRow(QStandardItem(self.tr('Line Charts'))) @@ -153,6 +154,7 @@ def __init__(self, main): self.settings_tables = wl_settings_tables.Wl_Settings_Tables(self.main) self.settings_tables_concordancer = wl_settings_tables.Wl_Settings_Tables_Concordancer(self.main) self.settings_tables_parallel_concordancer = wl_settings_tables.Wl_Settings_Tables_Parallel_Concordancer(self.main) + self.settings_tables_dependency_parser = wl_settings_tables.Wl_Settings_Tables_Dependency_Parser(self.main) # Figures self.settings_figs_line_charts = wl_settings_figs.Wl_Settings_Figs_Line_Charts(self.main) @@ -190,6 +192,7 @@ def __init__(self, main): self.tr('Tables'): self.settings_tables, self.tr('Concordancer'): self.settings_tables_concordancer, self.tr('Parallel Concordancer'): self.settings_tables_parallel_concordancer, + self.tr('Dependency Parser'): self.settings_tables_dependency_parser, self.tr('Line Charts'): self.settings_figs_line_charts, self.tr('Word Clouds'): self.settings_figs_word_clouds, diff --git a/wordless/wl_settings/wl_settings_default.py b/wordless/wl_settings/wl_settings_default.py index ceb13e278..e1b5b7b72 100644 --- a/wordless/wl_settings/wl_settings_default.py +++ b/wordless/wl_settings/wl_settings_default.py @@ -2143,9 +2143,17 @@ def init_settings_default(main): # Settings - Tables - Parallel Concordancer 'parallel_concordancer': { - 'color_settings': { + 'highlight_color_settings': { 'search_term_color': '#FF0000' # Red } + }, + + # Settings - Tables - Dependency Parser + 'dependency_parser': { + 'highlight_color_settings': { + 'head_color': '#FF0000', # Red + 'dependent_color': '#3F864C', # Green + } } }, diff --git a/wordless/wl_settings/wl_settings_tables.py b/wordless/wl_settings/wl_settings_tables.py index b269b2644..889eac877 100644 --- a/wordless/wl_settings/wl_settings_tables.py +++ b/wordless/wl_settings/wl_settings_tables.py @@ -177,19 +177,19 @@ def __init__(self, main): self.settings_custom = self.main.settings_custom['tables']['parallel_concordancer'] # Color Settings - self.group_box_color_settings = QGroupBox(self.tr('Color Settings'), self) + self.group_box_highlight_color_settings = QGroupBox(self.tr('Highlight Color Settings'), self) self.label_search_term_color = QLabel(self.tr('Search term color:'), self) self.button_search_term_color = wl_buttons.wl_button_color(self) - self.group_box_color_settings.setLayout(wl_layouts.Wl_Layout()) - self.group_box_color_settings.layout().addWidget(self.label_search_term_color, 0, 0) - self.group_box_color_settings.layout().addWidget(self.button_search_term_color, 0, 1) + self.group_box_highlight_color_settings.setLayout(wl_layouts.Wl_Layout()) + self.group_box_highlight_color_settings.layout().addWidget(self.label_search_term_color, 0, 0) + self.group_box_highlight_color_settings.layout().addWidget(self.button_search_term_color, 0, 1) - self.group_box_color_settings.layout().setColumnStretch(2, 1) + self.group_box_highlight_color_settings.layout().setColumnStretch(2, 1) self.setLayout(wl_layouts.Wl_Layout()) - self.layout().addWidget(self.group_box_color_settings, 0, 0) + self.layout().addWidget(self.group_box_highlight_color_settings, 0, 0) self.layout().setContentsMargins(6, 4, 6, 4) self.layout().setRowStretch(1, 1) @@ -201,10 +201,57 @@ def load_settings(self, defaults = False): settings = copy.deepcopy(self.settings_custom) # Color Settings - self.button_search_term_color.set_color(settings['color_settings']['search_term_color']) + self.button_search_term_color.set_color(settings['highlight_color_settings']['search_term_color']) def apply_settings(self): # Color Settings - self.settings_custom['color_settings']['search_term_color'] = self.button_search_term_color.get_color() + self.settings_custom['highlight_color_settings']['search_term_color'] = self.button_search_term_color.get_color() + + return True + +# Settings - Dependency Parser +class Wl_Settings_Tables_Dependency_Parser(wl_settings.Wl_Settings_Node): + def __init__(self, main): + super().__init__(main) + + self.settings_default = self.main.settings_default['tables']['dependency_parser'] + self.settings_custom = self.main.settings_custom['tables']['dependency_parser'] + + # Color Settings + self.group_box_highlight_color_settings = QGroupBox(self.tr('Highlight Color Settings'), self) + + self.label_head_color = QLabel(self.tr('Head color:'), self) + self.button_head_color = wl_buttons.wl_button_color(self) + self.label_dependent_color = QLabel(self.tr('Dependent color:'), self) + self.button_dependent_color = wl_buttons.wl_button_color(self) + + self.group_box_highlight_color_settings.setLayout(wl_layouts.Wl_Layout()) + self.group_box_highlight_color_settings.layout().addWidget(self.label_head_color, 0, 0) + self.group_box_highlight_color_settings.layout().addWidget(self.button_head_color, 0, 1) + self.group_box_highlight_color_settings.layout().addWidget(self.label_dependent_color, 1, 0) + self.group_box_highlight_color_settings.layout().addWidget(self.button_dependent_color, 1, 1) + + self.group_box_highlight_color_settings.layout().setColumnStretch(2, 1) + + self.setLayout(wl_layouts.Wl_Layout()) + self.layout().addWidget(self.group_box_highlight_color_settings, 0, 0) + + self.layout().setContentsMargins(6, 4, 6, 4) + self.layout().setRowStretch(1, 1) + + def load_settings(self, defaults = False): + if defaults: + settings = copy.deepcopy(self.settings_default) + else: + settings = copy.deepcopy(self.settings_custom) + + # Color Settings + self.button_head_color.set_color(settings['highlight_color_settings']['head_color']) + self.button_dependent_color.set_color(settings['highlight_color_settings']['dependent_color']) + + def apply_settings(self): + # Color Settings + self.settings_custom['highlight_color_settings']['head_color'] = self.button_head_color.get_color() + self.settings_custom['highlight_color_settings']['dependent_color'] = self.button_dependent_color.get_color() return True diff --git a/wordless/wl_widgets/wl_tables.py b/wordless/wl_widgets/wl_tables.py index ff9e8577e..1377a5998 100644 --- a/wordless/wl_widgets/wl_tables.py +++ b/wordless/wl_widgets/wl_tables.py @@ -20,6 +20,7 @@ import os import random import re +import traceback import bs4 import docx @@ -31,8 +32,8 @@ QLabel, QPushButton, QTableView ) -from wordless.wl_checks import wl_checks_misc -from wordless.wl_dialogs import wl_dialogs_misc, wl_msg_boxes +from wordless.wl_checks import wl_checks_misc, wl_checks_work_area +from wordless.wl_dialogs import wl_dialogs_misc from wordless.wl_nlp import wl_nlp_utils from wordless.wl_utils import wl_misc, wl_paths, wl_threading from wordless.wl_widgets import wl_buttons @@ -417,9 +418,11 @@ def clr_table(self, num_headers = 1): self.model().itemChanged.emit(QStandardItem()) # Export visible rows only + @wl_misc.log_timing def exp_selected_cells(self): self.exp_all_cells(rows_to_exp = self.get_selected_rows(visible_only = True)) + @wl_misc.log_timing def exp_all_cells(self, rows_to_exp = None): caption = _tr('wl_tables', 'Export Table') default_dir = self.main.settings_custom['general']['exp']['tables']['default_path'] @@ -482,41 +485,26 @@ def exp_all_cells(self, rows_to_exp = None): thread_exp_table = wl_threading.Wl_Thread(worker_exp_table) thread_exp_table.start_worker() - def update_gui_exp(self, exp_success, file_path): - self.results_saved = True + def update_gui_exp(self, err_msg, file_path): + if not err_msg: + self.results_saved = True - if exp_success: - wl_msg_boxes.Wl_Msg_Box_Info( - self.main, - title = _tr('wl_tables', 'Export Completed'), - text = _tr('wl_tables', ''' -
The table has been successfully exported to "{}".
- ''').format(file_path) - ).open() - else: - wl_msg_boxes.Wl_Msg_Box_Info( - self.main, - title = _tr('wl_tables', 'Export Error'), - text = _tr('wl_tables', ''' -
Access to "{}" is denied, please specify another location or close the file and try again.
- ''').format(file_path) - ).open() + wl_checks_work_area.check_err_exp_table(self.main, err_msg, file_path) class Wl_Worker_Exp_Table(wl_threading.Wl_Worker): - worker_done = pyqtSignal(bool, str) + worker_done = pyqtSignal(str, str) def run(self): - if 'headers_int' not in self.table.__dict__: - self.table.headers_int = [] - if 'headers_float' not in self.table.__dict__: - self.table.headers_float = [] - if 'headers_pct' not in self.table.__dict__: - self.table.headers_pct = [] + try: + if 'headers_int' not in self.table.__dict__: + self.table.headers_int = [] + if 'headers_float' not in self.table.__dict__: + self.table.headers_float = [] + if 'headers_pct' not in self.table.__dict__: + self.table.headers_pct = [] - settings_concordancer = self.main.settings_custom['concordancer']['zapping_settings'] + settings_concordancer = self.main.settings_custom['concordancer']['zapping_settings'] - # Check file permissions - try: len_rows = len(self.rows_to_exp) # Export visible columns only cols = [col for col in range(self.table.model().columnCount()) if not self.table.isColumnHidden(col)] @@ -628,10 +616,34 @@ def run(self): dpi_horizontal = QApplication.primaryScreen().logicalDotsPerInchX() dpi_vertical = QApplication.primaryScreen().logicalDotsPerInchY() - # Concordancer - if self.table.tab == 'concordancer': - worksheet.freeze_panes = 'A2' + match self.table.tab: + case 'concordancer': + freeze_panes = 'A2' + + # Left, Node, Right + cols_labels = [0, 1, 2] + cols_table_items = [] + case 'concordancer_parallel': + freeze_panes = 'A2' + cols_labels = [] + # Parallel Unit No. (%) + cols_table_items = [0, 1] + case 'dependency_parser': + freeze_panes = 'A2' + + # Sentence + cols_labels = [5] + cols_table_items = [] + case _: + freeze_panes = 'B2' + + cols_labels = [] + cols_table_items = [] + + worksheet.freeze_panes = freeze_panes + + if self.table.header_orientation == 'hor': # Horizontal headers for col_cell, col_item in enumerate(cols): cell = worksheet.cell(1, 1 + col_cell) @@ -646,8 +658,17 @@ def run(self): for col_cell, col_item in enumerate(cols): cell = worksheet.cell(2 + row_cell, 1 + col_cell) - # Left, Node, and Right - if col_item in [0, 1, 2]: + if ( + ( + cols_labels + and not cols_table_items + and col_item in cols_labels + ) or ( + not cols_labels + and cols_table_items + and col_item not in cols_table_items + ) + ): cell_val = self.table.indexWidget(self.table.model().index(row_item, col_item)).text() cell_val = self.remove_invalid_xml_chars(cell_val) cell.value = cell_val @@ -660,95 +681,39 @@ def run(self): self.style_cell(cell, self.table.model().item(row_item, col_item)) - self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) - # Parallel Concordancer - elif self.table.tab == 'concordancer_parallel': - worksheet.freeze_panes = 'A2' - + self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) + # Profiler + else: # Horizontal headers for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(1, 1 + col_cell) + cell = worksheet.cell(1, 2 + col_cell) cell.value = self.table.model().horizontalHeaderItem(col_item).text() self.style_header_hor(cell) - worksheet.column_dimensions[openpyxl.utils.get_column_letter(1 + col_cell)].width = self.table.horizontalHeader().sectionSize(col_item) / dpi_horizontal * 13 + 3 - - # Cells - for row_cell, row_item in enumerate(self.rows_to_exp): - for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(2 + row_cell, 1 + col_cell) - - if col_item in [0, 1]: - cell_val = self.table.model().item(row_item, col_item).text() - cell_val = self.remove_invalid_xml_chars(cell_val) - cell.value = cell_val + worksheet.column_dimensions[openpyxl.utils.get_column_letter(2 + col_cell)].width = self.table.horizontalHeader().sectionSize(col_item) / dpi_horizontal * 13 + 3 - self.style_cell(cell, self.table.model().item(row_item, col_item)) - else: - cell_val = self.table.indexWidget(self.table.model().index(row_item, col_item)).text() - cell_val = self.remove_invalid_xml_chars(cell_val) - cell.value = cell_val + worksheet.column_dimensions[openpyxl.utils.get_column_letter(1)].width = self.table.verticalHeader().width() / dpi_horizontal * 13 + 3 - self.style_cell_rich_text(cell, self.table.indexWidget(self.table.model().index(row_item, col_item))) - - self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) - else: - worksheet.freeze_panes = 'B2' - - if self.table.header_orientation == 'hor': - # Horizontal Headers - for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(1, 1 + col_cell) - cell.value = self.table.model().horizontalHeaderItem(col_item).text() - - self.style_header_hor(cell) - - worksheet.column_dimensions[openpyxl.utils.get_column_letter(1 + col_cell)].width = self.table.horizontalHeader().sectionSize(col_item) / dpi_horizontal * 13 + 3 - - # Cells - for row_cell, row_item in enumerate(self.rows_to_exp): - for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(2 + row_cell, 1 + col_cell) - - cell_val = self.table.model().item(row_item, col_item).text() - cell_val = self.remove_invalid_xml_chars(cell_val) - cell.value = cell_val + # Vertical headers + for row_cell, row_item in enumerate(self.rows_to_exp): + cell = worksheet.cell(2 + row_cell, 1) + cell.value = self.table.model().verticalHeaderItem(row_item).text() - self.style_cell(cell, self.table.model().item(row_item, col_item)) + self.style_header_vert(cell) - self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) - else: - # Horizontal headers + # Cells + for row_cell, row_item in enumerate(self.rows_to_exp): for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(1, 2 + col_cell) - cell.value = self.table.model().horizontalHeaderItem(col_item).text() - - self.style_header_hor(cell) - - worksheet.column_dimensions[openpyxl.utils.get_column_letter(2 + col_cell)].width = self.table.horizontalHeader().sectionSize(col_item) / dpi_horizontal * 13 + 3 - - worksheet.column_dimensions[openpyxl.utils.get_column_letter(1)].width = self.table.verticalHeader().width() / dpi_horizontal * 13 + 3 - - # Vertical headers - for row_cell, row_item in enumerate(self.rows_to_exp): - cell = worksheet.cell(2 + row_cell, 1) - cell.value = self.table.model().verticalHeaderItem(row_item).text() + cell = worksheet.cell(2 + row_cell, 2 + col_cell) - self.style_header_vert(cell) + cell_val = self.table.model().item(row_item, col_item).text() + cell_val = self.remove_invalid_xml_chars(cell_val) + cell.value = cell_val - # Cells - for row_cell, row_item in enumerate(self.rows_to_exp): - for col_cell, col_item in enumerate(cols): - cell = worksheet.cell(2 + row_cell, 2 + col_cell) - - cell_val = self.table.model().item(row_item, col_item).text() - cell_val = self.remove_invalid_xml_chars(cell_val) - cell.value = cell_val - - self.style_cell(cell, self.table.model().item(row_item, col_item)) + self.style_cell(cell, self.table.model().item(row_item, col_item)) - self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) + self.progress_updated.emit(self.tr('Exporting table... ({} / {})').format(row_cell + 1, len_rows)) # Row height worksheet.row_dimensions[1].height = self.table.horizontalHeader().height() / dpi_vertical * 72 @@ -816,11 +781,13 @@ def run(self): self.main.settings_custom['general']['exp']['tables']['default_path'] = wl_paths.get_normalized_dir(self.file_path) self.main.settings_custom['general']['exp']['tables']['default_type'] = self.file_type - exp_success = True + err_msg = '' except PermissionError: - exp_success = False + err_msg = 'permission_err' + except Exception: # pylint: disable=broad-exception-caught + err_msg = traceback.format_exc() - self.worker_done.emit(exp_success, self.file_path) + self.worker_done.emit(err_msg, self.file_path) # Remove invalid XML characters def remove_invalid_xml_chars(self, text):