From 3fac1660f71af7c2a5b8c4f8054b323dc931eb66 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 19 Jul 2023 09:47:39 +0200 Subject: [PATCH] Fixed trim abstract semantics for Tarsis and added some tests --- .../string/tarsis/RegexAutomaton.java | 14 +++++--- .../lisa/analysis/string/tarsis/Tarsis.java | 3 +- .../analysis/string/tarsis/RepeatTest.java | 29 +++++++-------- .../lisa/analysis/string/tarsis/TrimTest.java | 31 ++++++++++------ .../datastructures/automaton/Automaton.java | 4 +-- .../lisa/util/datastructures/regex/Atom.java | 18 +++++++--- .../lisa/util/datastructures/regex/Comp.java | 36 ++++++++++++++----- .../util/datastructures/regex/EmptySet.java | 15 ++++++-- .../lisa/util/datastructures/regex/Or.java | 16 +++++++-- .../regex/RegularExpression.java | 12 ++++--- .../lisa/util/datastructures/regex/Star.java | 27 +++++++++----- .../util/datastructures/regex/TopAtom.java | 5 ++- 12 files changed, 142 insertions(+), 68 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index cf14b3ddf..3412e1908 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -457,17 +457,21 @@ private RegexAutomaton union(RegexAutomaton... automata) { } /** - * Yields an automaton that corresponds to the {@code n}-time concatenation of {@code this}. + * Yields an automaton that corresponds to the {@code n}-time concatenation + * of {@code this}. + * * @param n the number of repetitions - * @return an automaton that corresponds to the {@code n}-time concatenation of {@code this} + * + * @return an automaton that corresponds to the {@code n}-time concatenation + * of {@code this} */ public RegexAutomaton repeat(long n) { if (n == 0) - return emptyString(); + return emptyString(); return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } - + public RegexAutomaton trim() { - return toRegex().trim().toAutomaton(this); + return toRegex().simplify().trim().toAutomaton(this); } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 482634193..ec17bf0b7 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -27,7 +27,6 @@ import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; - import java.util.Objects; import java.util.SortedSet; import java.util.TreeSet; @@ -424,7 +423,7 @@ else if (intv.interval.isFinite()) { } else return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); } - + public Tarsis trim() { return new Tarsis(this.a.trim()); } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java index 979f978d2..2e5c4ed01 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java @@ -2,13 +2,12 @@ import static org.junit.Assert.assertTrue; -import org.junit.Test; - import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.datastructures.regex.Atom; import it.unive.lisa.util.datastructures.regex.Or; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; public class RepeatTest { @@ -22,50 +21,52 @@ public void repeatSingleString() throws MathNumberConversionException { // "abc".repeat(1) = "abc" assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(abc)); - + // "abc".repeat(2) = "abcabc" assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abcabc)); // "abc".repeat(0) = "" assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - // "abc".repeat([1,2]) = {"abc", "abcabc"} assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(abc.union(abcabc))); // "abc".repeat([0,+infty]) = (abc)* - assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); + assertTrue( + a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); // "abc".repeat([1,+infty]) = abc(abc)* - assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc.concat(abc_star))); + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(abc.concat(abc_star))); } - + @Test public void repeatTwoStrings() throws MathNumberConversionException { - + RegexAutomaton ab_or_cd = new Or(new Atom("ab"), new Atom("cd")).toAutomaton(RegexAutomaton.emptyLang()); - RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")).toAutomaton(RegexAutomaton.emptyLang()); + RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")) + .toAutomaton(RegexAutomaton.emptyLang()); Tarsis a = new Tarsis(ab_or_cd); // {"ab", "cd"}.repeat(1) = {"ab", "cd"} assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(ab_or_cd)); - - + // {"ab", "cd"}.repeat(2) = {"abab", "cdcd"} assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abab_or_cdcd)); // {"ab", "cd"}.repeat(0) = {""} assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - // {"ab", "cd"}.repeat([1,2]) = {"ab", "cd", "abab", "cdcd"} assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(ab_or_cd.union(abab_or_cdcd))); // {"ab", "cd"}.repeat([0,+infty]) = (ab|cd)* - assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.star())); + assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.star())); // {"ab", "cd"}.repeat([1,+infty]) = (ab|cd)(ab|cd)* - assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java index 5faaa66fc..6619d51bd 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -2,14 +2,13 @@ import static org.junit.Assert.assertTrue; -import org.junit.Test; - import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; public class TrimTest { @Test - public void repeatSingleString() throws MathNumberConversionException { + public void trimSingleString() throws MathNumberConversionException { RegexAutomaton abc = RegexAutomaton.string("abc"); RegexAutomaton ws = RegexAutomaton.string(" "); RegexAutomaton epsilon = RegexAutomaton.string(""); @@ -17,29 +16,39 @@ public void repeatSingleString() throws MathNumberConversionException { RegexAutomaton empty_star = RegexAutomaton.string(" ").star(); RegexAutomaton comp1 = RegexAutomaton.string(" ").concat(RegexAutomaton.string(" abc")); RegexAutomaton comp2 = RegexAutomaton.string(" a").concat(RegexAutomaton.string(" b ")); + RegexAutomaton comp3 = RegexAutomaton.string(" abc ").concat(RegexAutomaton.string(" ")); // "abc".trim() = "abc" Tarsis a = new Tarsis(abc); assertTrue(a.trim().getAutomaton().isEqualTo(abc)); - // " ".trim() = "" + // " ".trim() = "" Tarsis b = new Tarsis(ws); assertTrue(b.trim().getAutomaton().isEqualTo(epsilon)); - // " a b c ".trim() = "a b c" + // " a b c ".trim() = "a b c" Tarsis c = new Tarsis(abc_with_ws); assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); - // (" ")*.trim() = "" + // (" ")*.trim() = "" Tarsis d = new Tarsis(empty_star); assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - - // " " + " abc".trim() = "abc" + + // " " + " abc".trim() = "abc" Tarsis e = new Tarsis(comp1); assertTrue(e.trim().getAutomaton().isEqualTo(abc)); - - // " a" + " b ".trim() = "a b " + + // " a" + " b ".trim() = "a b" Tarsis f = new Tarsis(comp2); - assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b "))); + assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b"))); + + // " a b c "*.trim() = "a b c " + (" a b c ")* + " a b c" + Tarsis g = new Tarsis(abc_with_ws.star()); + assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c ").concat(abc_with_ws.star()) + .concat(RegexAutomaton.string(" a b c")).star())); + + // " abc " + " ".trim() = "abc" + Tarsis h = new Tarsis(comp3); + assertTrue(h.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc"))); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java index 1cd96ca76..b10a67490 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java @@ -37,7 +37,7 @@ * this class have on their transitions */ public abstract class Automaton, T extends TransitionSymbol> -implements AutomataFactory { + implements AutomataFactory { /** * The states of this automaton. @@ -1053,7 +1053,7 @@ public String prettyPrint() { for (Transition t : transitions) result.append("\t").append(st).append(" [").append(t.getSymbol()).append("] -> ") - .append(t.getDestination()).append("\n"); + .append(t.getDestination()).append("\n"); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 7be4e987a..4a9e7626e 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -1,12 +1,12 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; /** * A {@link RegularExpression} representing a single string. @@ -79,7 +79,7 @@ public RegularExpression simplify() { @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return isEmpty() ? factory.emptyString() : factory.singleString(string); } @@ -199,4 +199,14 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return new Atom(this.string.trim()); } + + @Override + public RegularExpression trimLeft() { + return new Atom(StringUtils.stripStart(this.string, null)); + } + + @Override + public RegularExpression trimRight() { + return new Atom(StringUtils.stripEnd(this.string, null)); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 41febad48..2959d0aef 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing the sequential composition of two @@ -289,7 +288,7 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asComp().second); } - + @Override public RegularExpression repeat(long n) { if (n == 0) @@ -303,11 +302,32 @@ public RegularExpression repeat(long n) { @Override public RegularExpression trim() { - - RegularExpression trimFirst = first.trim().simplify(); - if (trimFirst.isEmpty()) + RegularExpression trimLeftFirst = first.trimLeft().simplify(); + if (trimLeftFirst.isEmpty()) return second.trim(); + + RegularExpression trimRightSecond = second.trimRight().simplify(); + if (trimRightSecond.isEmpty()) + return first.trim(); + + return new Comp(trimLeftFirst, trimRightSecond).simplify(); + } + + @Override + public RegularExpression trimLeft() { + RegularExpression trimLeftFirst = this.first.trimLeft().simplify(); + if (trimLeftFirst.isEmpty()) + return this.second.trimLeft(); + else + return new Comp(trimLeftFirst, second); + } + + @Override + public RegularExpression trimRight() { + RegularExpression trimRightSecond = this.second.trimRight().simplify(); + if (trimRightSecond.isEmpty()) + return this.first.trimRight(); else - return new Comp(trimFirst, second).simplify(); + return new Comp(this.first, trimRightSecond); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index 99cfc1e76..9bff11fe8 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -1,11 +1,10 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.Collections; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; +import java.util.Collections; +import java.util.Set; /** * A {@link RegularExpression} representing the empty set of strings. @@ -142,4 +141,14 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return this; } + + @Override + public RegularExpression trimLeft() { + return this; + } + + @Override + public RegularExpression trimRight() { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index 13f93308e..92a1db1fe 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -1,11 +1,10 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing an or between two other regular @@ -288,4 +287,15 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return new Or(first.trim(), second.trim()); } + + @Override + public RegularExpression trimLeft() { + return new Or(first.trimLeft(), second.trimLeft()); + + } + + @Override + public RegularExpression trimRight() { + return new Or(first.trimRight(), second.trimRight()); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index ef22b4f72..de8db3966 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.Set; -import java.util.stream.Collectors; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.Set; +import java.util.stream.Collectors; /** * A regular expression that can be recognized by an {@link Automaton}, or that @@ -312,8 +311,13 @@ public String toString() { protected abstract Set substringAux(int charsToSkip, int missingChars); public abstract RegularExpression repeat(long n); - + public abstract RegularExpression trim(); + + public abstract RegularExpression trimLeft(); + + public abstract RegularExpression trimRight(); + /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index 489910e08..c5a1acf8a 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing a loop, repeated an arbitrary number @@ -92,7 +91,7 @@ else if (op.isStar()) @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return op.toAutomaton(factory).star(); } @@ -243,10 +242,20 @@ public RegularExpression repeat(long n) { @Override public RegularExpression trim() { - RegularExpression trimOp = op.trim().simplify(); - if (trimOp.isEmpty()) - return trimOp; - else - return new Star(trimOp); + RegularExpression left = this.op.trimLeft().simplify(); + if (left.isEmpty()) + return Atom.EPSILON; + RegularExpression right = this.op.trimRight().simplify(); + return new Star(new Comp(new Comp(left, this), right)).simplify(); + } + + @Override + public RegularExpression trimLeft() { + return new Star(new Comp(this.op.trimLeft(), op)); + } + + @Override + public RegularExpression trimRight() { + return new Comp(this, op.trimRight()); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java index 1c476303a..446c1a867 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing a sequence of unknown characters of