Skip to content

Commit

Permalink
Merge pull request #3798 from Hannah-Sten/grazie
Browse files Browse the repository at this point in the history
Improve Grazie implementation
  • Loading branch information
PHPirates authored Dec 10, 2024
2 parents 052a869 + bfdba9e commit a6c464f
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 63 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Added

### Fixed
* Fix various issues with the Grazie implementation, in particular default rules for Grazie Pro

## [0.9.10-alpha.2] - 2024-12-05

Expand Down
2 changes: 2 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ dependencies {
bundledPlugin("tanvd.grazi")
plugin("com.firsttimeinforever.intellij.pdf.viewer.intellij-pdf-viewer:0.17.0")
plugin("com.jetbrains.hackathon.indices.viewer:1.28")
// Does not work in tests: https://youtrack.jetbrains.com/issue/GRZ-5023
// plugin("com.intellij.grazie.pro:0.3.347")
}

// Local dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@ package nl.hannahsten.texifyidea.inspections.grazie
import com.intellij.grazie.grammar.strategy.StrategyUtils
import com.intellij.grazie.text.TextContent
import com.intellij.grazie.text.TextExtractor
import com.intellij.lang.tree.util.children
import com.intellij.psi.PsiElement
import com.intellij.psi.PsiWhiteSpace
import com.intellij.psi.util.startOffset
import nl.hannahsten.texifyidea.lang.commands.Argument
import nl.hannahsten.texifyidea.lang.commands.LatexCommand
import nl.hannahsten.texifyidea.psi.*
import nl.hannahsten.texifyidea.util.merge
import nl.hannahsten.texifyidea.util.overlaps
import nl.hannahsten.texifyidea.util.parser.childrenOfType
import nl.hannahsten.texifyidea.util.parser.endOffset
import nl.hannahsten.texifyidea.util.parser.firstParentOfType
import nl.hannahsten.texifyidea.util.parser.parents
import nl.hannahsten.texifyidea.util.parser.*
import nl.hannahsten.texifyidea.util.toTextRange

/**
Expand All @@ -27,6 +26,10 @@ class LatexTextExtractor : TextExtractor() {
return null
}

return buildTextContent(root)
}

fun buildTextContent(root: LatexContent): TextContent? {
// Since Grazie works by first checking leaf elements, and if it gets null tries one level higher, we cannot return anything (e.g. literal for a command, comment for comments) other than LatexContent because then LatexContent itself will not be used as a root.
// However, we do need it as a root because we need to filter out certain things like inline math ourselves, so that we can make sure all the whitespace around ignored items is correct.
val domain = TextContent.TextDomain.PLAIN_TEXT
Expand All @@ -37,29 +40,38 @@ class LatexTextExtractor : TextExtractor() {
.map { TextContent.Exclusion.exclude(it.toTextRange()) }
.filter { it.start >= 0 && it.end <= textContent.length }

return textContent.excludeRanges(stealthyRanges)
val textToSubmit = textContent.excludeRanges(stealthyRanges)
return textToSubmit
}

/**
* Get ranges to ignore.
* Note: IntRange has an inclusive end.
*/
private fun getStealthyRanges(root: PsiElement): List<IntRange> {
fun getStealthyRanges(root: PsiElement): List<IntRange> {
// Getting text takes time, so we only do it once
val rootText = root.text

// Only keep normaltext, assuming other things (like inline math) need to be ignored.
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>())
val ranges = (root.childrenOfType(LatexNormalText::class) + root.childrenOfType<LatexParameterText>() + root.childrenOfType<PsiWhiteSpace>())
.asSequence()
.filter { it.isNotInMathEnvironment() && it.isNotInSquareBrackets() }
.filter { !it.inMathContext() && it.isNotInSquareBrackets() }
// Ordering is relevant for whitespace
.sortedBy { it.startOffset }
// Always keep newlines, as they may be the only whitespace splitting consecutive commands
.filter { text -> text !is PsiWhiteSpace || text.text.contains("\n") }
// Skip arguments of non-text commands, but keep arguments of unknown commands, in particular if they are in the middle of a sentence
// Even commends which have no text as argument, for example certain reference commands like auteref, may need to be kept in to get correct punctuation
.filterNot { text -> text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type != Argument.Type.TEXT && it.type != Argument.Type.LABEL } == true }
// Environment names are never part of a sentence
.filterNot { text -> text.firstParentOfType<LatexBeginCommand>() != null || text.firstParentOfType<LatexEndCommand>() != null }
// If we encounter an unescaped &, we are in some language construct like a tabular, so we ignore this because ofter a tabular does not contain full sentences
.filter { text -> text.node.children().none { it.elementType == LatexTypes.AMPERSAND } }
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude whitespace at the start
.dropWhile { it is PsiWhiteSpace }
// Ranges that we need to keep
// Note that textRangeInParent will not be correct because that's the text range in the direct parent, not in the root
.flatMap { text ->
// Skip arguments of non-text commands
if (text is LatexParameterText && LatexCommand.lookup(text.firstParentOfType(LatexCommands::class)?.name)?.firstOrNull()?.arguments?.any { it.type == Argument.Type.TEXT } != true) {
return@flatMap emptyList()
}

var start = text.textRange.startOffset - root.startOffset
// If LatexNormalText starts after a newline following a command, the newline is not part of the LatexNormalText so we include it manually to make sure that it is seen as a space between sentences
// NOTE: it is not allowed to start the text we send to Grazie with a newline! If we do, then Grazie will just not do anything. So we exclude the newline for the first normal text in the file.
Expand All @@ -71,7 +83,7 @@ class LatexTextExtractor : TextExtractor() {

// -1 Because endOffset is exclusive, but we are working with inclusive end here
var end = text.textRange.endOffset - 1 - root.startOffset
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command
// If LatexNormalText ends, for example because it is followed by a command, we do want to include the space in front of the command, since it is still typeset as a space, which is not true for the space after the command if the command has no arguments,
// except when the space is followed by inline math, since we ignore inline math altogether (which is probably not correct) we should also ignore the space
if (setOf(' ', '\n').contains(rootText.getOrNull(end + 1)) && rootText.getOrNull(end + 2) != '$') {
end += 1
Expand Down Expand Up @@ -100,14 +112,10 @@ class LatexTextExtractor : TextExtractor() {
ranges.removeAll(overlapped.toSet())
ranges.add(indent.merge(overlapped))
}
// This is approximately (except at the start) the text we send to Grazie
// val text = ranges.sortedBy { it.first }.flatMap { listOf(it.first, it.last) }.toMutableList().also { it.add(0, -1) }
// .chunked(2) { if (it.size > 1) rootText.substring(it[0] + 1, it[1]) else null }

return ranges.sortedBy { it.first }
}

private fun PsiElement.isNotInMathEnvironment() = parents().none { it is LatexMathEnvironment }

private fun PsiElement.isNotInSquareBrackets() = parents().find { it is LatexGroup || it is LatexOptionalParam }
?.let { it is LatexGroup } ?: true
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ enum class LatexGenericRegularCommand(
INDEXSPACE("indexspace"),
INDEX("intex", "entry".asRequired()),
IT("it"),
ITEM("item", "label".asOptional()),
ITEM("item", "label".asOptional(Argument.Type.TEXT)),
ITSHAPE("itshape"),
LABEL("label", "key".asRequired()),
LARGE("large"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import com.intellij.psi.PsiElement
import nl.hannahsten.texifyidea.lang.LatexPackage
import nl.hannahsten.texifyidea.psi.LatexCommands
import nl.hannahsten.texifyidea.psi.LatexParameterText
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
import nl.hannahsten.texifyidea.util.magic.CommandMagic
import nl.hannahsten.texifyidea.util.parser.firstChildOfType
import nl.hannahsten.texifyidea.util.parser.requiredParameters

enum class LatexGlossariesCommand(
Expand Down Expand Up @@ -46,10 +46,10 @@ enum class LatexGlossariesCommand(
"long".asRequired(),
dependency = LatexPackage.GLOSSARIES
),
GLS("gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSUPPER("Gls", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSPLURAL("glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLSPLURALUPPER("Glspl", "label".asRequired(), dependency = LatexPackage.GLOSSARIES),
GLS("gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSUPPER("Gls", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSPLURAL("glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),
GLSPLURALUPPER("Glspl", "label".asRequired(Argument.Type.TEXT), dependency = LatexPackage.GLOSSARIES),

LOADGLSENTRIES(
"loadglsentries",
Expand Down
133 changes: 104 additions & 29 deletions test/nl/hannahsten/texifyidea/inspections/grazie/GrazieInspectionTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ import com.intellij.grazie.ide.msg.GrazieStateLifecycle
import com.intellij.grazie.jlanguage.Lang
import com.intellij.grazie.remote.GrazieRemote
import com.intellij.openapi.application.ApplicationManager
import com.intellij.psi.PsiFile
import com.intellij.spellchecker.inspections.SpellCheckingInspection
import com.intellij.testFramework.fixtures.BasePlatformTestCase
import com.intellij.testFramework.fixtures.impl.CodeInsightTestFixtureImpl
import com.intellij.util.messages.Topic
import nl.hannahsten.texifyidea.file.LatexFileType
import nl.hannahsten.texifyidea.psi.LatexContent
import nl.hannahsten.texifyidea.util.parser.firstChildOfType

class GrazieInspectionTest : BasePlatformTestCase() {

Expand All @@ -28,24 +31,40 @@ class GrazieInspectionTest : BasePlatformTestCase() {
}
}

fun testCheckGrammarInConstructs() {
fun testSingleSentence() {
myFixture.configureByText(LatexFileType, """Is these an error with a sentence ${'$'}\xi${'$'} end or not.""")
myFixture.checkHighlighting()
val testName = getTestName(false)
myFixture.configureByFile("$testName.tex")
myFixture.checkHighlighting(true, false, false, true)
}

fun testMultilineCheckGrammar() {
val testName = getTestName(false)
myFixture.configureByFile("$testName.tex")
fun testCommentInText() {
myFixture.configureByText(
LatexFileType,
"""
\begin{document}
All <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or those are?">those is</GRAMMAR_ERROR> problems in the middle of a sentence.
% <GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem in a comment
<GRAMMAR_ERROR descr="The verb 'is' is singular. Did you mean: this is or Those are?">Those is</GRAMMAR_ERROR> a problem at the beginning of a sentence.
\end{document}
""".trimIndent()
)
myFixture.checkHighlighting(true, false, false, true)
}

fun testInlineMath() {
fun testSentenceAtEnvironmentStart() {
myFixture.configureByText(
LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?"""
LatexFileType,
"""
\begin{document}
<GRAMMAR_ERROR descr="Use An instead of 'A' if the following word starts with a vowel sound, e.g. 'an article', 'an hour'.">A</GRAMMAR_ERROR> apple a day keeps the doctor away.
Some other sentence.
\end{document}
""".trimIndent()
)
myFixture.checkHighlighting(true, false, false, true)
}

fun testInlineMath() {
myFixture.configureByText(LatexFileType, """Does Grazie detect ${'$'}m$ as a sentence?""")
myFixture.checkHighlighting()
}

Expand All @@ -61,9 +80,7 @@ class GrazieInspectionTest : BasePlatformTestCase() {
}

fun testMatchingParens() {
myFixture.configureByText(
LatexFileType, """a (in this case) . aa"""
)
myFixture.configureByText(LatexFileType, """A sentence (in this case). More sentence.""")
myFixture.checkHighlighting()
}

Expand Down Expand Up @@ -122,21 +139,79 @@ class GrazieInspectionTest : BasePlatformTestCase() {
myFixture.checkHighlighting()
}

// Broken in 2023.2 (TEX-177)
// fun testTabular() {
// GrazieRemote.download(Lang.GERMANY_GERMAN)
// GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
// myFixture.configureByText(
// LatexFileType,
// """
// \begin{tabular}{llll}
// ${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
// \multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
// \end{tabular}
//
// Ich bin über die Entwicklung sehr froh.
// """.trimIndent()
// )
// myFixture.checkHighlighting()
// }
fun testGermanGlossaries() {
GrazieRemote.download(Lang.GERMANY_GERMAN)
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
myFixture.configureByText(
LatexFileType,
"""
Der Hintergrund des Themas der Thesis ist der Umbruch beim Prozess des \gls{api}-Managements.
""".trimIndent()
)
myFixture.checkHighlighting()
}

fun testTabular() {
GrazieRemote.download(Lang.GERMANY_GERMAN)
GrazieConfig.update { it.copy(enabledLanguages = it.enabledLanguages + Lang.GERMANY_GERMAN) }
myFixture.configureByText(
LatexFileType,
"""
\begin{tabular}{llll}
${'$'}a${'$'}: & ${'$'}\mathbb{N}${'$'} & \rightarrow & ${'$'}M${'$'} \\
\multicolumn{1}{l}{} & ${'$'}n${'$'} & \mapsto & ${'$'}a(n)${'$'}.
\end{tabular}
Ich bin über die Entwicklung sehr froh.
""".trimIndent()
)
myFixture.checkHighlighting()
}

/*
* These rules are not enabled by default in Grazie Lite, but do show up by default in Grazie Pro.
* To find a rule id, search for the name in https://community.languagetool.org/rule/list and use the id together with the prefex from LangTool.globalIdPrefix
*/

fun testCommaInSentence() {
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.COMMA_PARENTHESIS_WHITESPACE")) }
myFixture.configureByText(LatexFileType, """\label{fig} Similar to the structure presented in \autoref{fig}, it is.""")
myFixture.checkHighlighting()
}

fun testCommandsInSentence() {
GrazieConfig.update { it.copy(userEnabledRules = setOf("LanguageTool.EN.CONSECUTIVE_SPACES")) }
myFixture.configureByText(LatexFileType, """The principles of a generic \ac{PID} controller.""")
myFixture.checkHighlighting()
}

/*
* Grazie Pro
*
* These tests only test false positives in Grazie Pro (com.intellij.grazie.pro.style.StyleInspection), but that is not possible to test at the moment: https://youtrack.jetbrains.com/issue/GRZ-5023
* So we test the excluded ranges directly.
*/

/**
* Text as sent to Grazie.
*/
private fun getSubmittedText(file: PsiFile): String {
return LatexTextExtractor().buildTextContent(file.firstChildOfType(LatexContent::class)!!).toString()
}

fun testNewlinesShouldBeKept() {
val text = """
\section{First}
\section{Second}
""".trimIndent()
myFixture.configureByText(LatexFileType, text)
val submittedText = getSubmittedText(myFixture.file)
assertEquals(
"""
First
Second
""".trimIndent(),
submittedText
)
}
}

This file was deleted.

4 changes: 0 additions & 4 deletions test/resources/inspections/grazie/MultilineCheckGrammar.tex

This file was deleted.

0 comments on commit a6c464f

Please sign in to comment.