From dbb0c48969ba9cc067a637053f0b99c7ea46260c Mon Sep 17 00:00:00 2001 From: "v.arceri" Date: Thu, 6 Jul 2023 16:43:33 +0200 Subject: [PATCH 01/18] Abstract semantics for repeat in Tarsis --- .../string/tarsis/RegexAutomaton.java | 11 +++++++++++ .../lisa/util/datastructures/regex/Atom.java | 19 ++++++++++++++++--- .../lisa/util/datastructures/regex/Comp.java | 17 +++++++++++++++-- .../util/datastructures/regex/EmptySet.java | 10 ++++++++-- .../lisa/util/datastructures/regex/Or.java | 10 ++++++++-- .../regex/RegularExpression.java | 7 +++++-- .../lisa/util/datastructures/regex/Star.java | 10 ++++++++-- .../util/datastructures/regex/TopAtom.java | 5 +++-- 8 files changed, 74 insertions(+), 15 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 33a522f0d..70a635483 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -455,4 +455,15 @@ private RegexAutomaton union(RegexAutomaton... automata) { return result; } + + /** + * Yields an automaton that corresponds to the {@code n}-time concatenation of {@code this}. + * @param n the number of repetitions + * @return an automaton that corresponds to the {@code n}-time concatenation of {@code this} + */ + public RegexAutomaton repeat(int n) { + if (n == 0) + return emptyString(); + return toRegex().repeat(n).toAutomaton(this); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 05109736e..8c865188d 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a single string. @@ -78,7 +79,7 @@ public RegularExpression simplify() { @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return isEmpty() ? factory.emptyString() : factory.singleString(string); } @@ -181,4 +182,16 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return string.compareTo(other.asAtom().string); } + + @Override + public RegularExpression repeat(int n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (int i = 0; i < n; i++) + r = new Comp(r, this); + r.simplify(); + return r; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 976822e88..e90e5dd5f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing the sequential composition of two @@ -288,4 +289,16 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asComp().second); } + + @Override + public RegularExpression repeat(int n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (int i = 0; i < n; i++) + r = new Comp(r, this); + r.simplify(); + return r; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index f9aff003b..5cc07edd7 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -1,10 +1,11 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.Collections; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; -import java.util.Collections; -import java.util.Set; /** * A {@link RegularExpression} representing the empty set of strings. @@ -131,4 +132,9 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return 0; } + + @Override + public RegularExpression repeat(int n) { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index f8ef614ef..d1d601557 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -1,10 +1,11 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing an or between two other regular @@ -277,4 +278,9 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asOr().second); } + + @Override + public RegularExpression repeat(int n) { + return new Or(first.repeat(n), second.repeat(n)); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 111dbe9c1..abafb5a68 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.Set; +import java.util.stream.Collectors; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.Set; -import java.util.stream.Collectors; /** * A regular expression that can be recognized by an {@link Automaton}, or that @@ -310,6 +311,8 @@ public String toString() { */ protected abstract Set substringAux(int charsToSkip, int missingChars); + public abstract RegularExpression repeat(int n); + /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index d0ff6e2fd..8fd977fac 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a loop, repeated an arbitrary number @@ -234,4 +235,9 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return op.compareTo(other.asStar().op); } + + @Override + public RegularExpression repeat(int n) { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java index 446c1a867..1c476303a 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a sequence of unknown characters of From 1e186287d4d144099cde26747d29551297a735b9 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Fri, 7 Jul 2023 12:20:20 +0200 Subject: [PATCH 02/18] Fix stack overflow error in RegexAutomaton's emptyLanguage, abstract semantics of repeat for Tarsis, testing single string repetition --- .../string/tarsis/RegexAutomaton.java | 4 +- .../lisa/analysis/string/tarsis/Tarsis.java | 20 ++++++++++ .../analysis/string/tarsis/RepeatTest.java | 40 +++++++++++++++++++ .../datastructures/automaton/Automaton.java | 11 +++++ .../lisa/util/datastructures/regex/Atom.java | 4 +- .../lisa/util/datastructures/regex/Comp.java | 4 +- .../util/datastructures/regex/EmptySet.java | 2 +- .../lisa/util/datastructures/regex/Or.java | 2 +- .../regex/RegularExpression.java | 2 +- .../lisa/util/datastructures/regex/Star.java | 2 +- 10 files changed, 81 insertions(+), 10 deletions(-) create mode 100644 lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 70a635483..8197d030a 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -183,7 +183,7 @@ public RegexAutomaton unknownString() { @Override public RegexAutomaton emptyLanguage() { - return emptyLanguage(); + return emptyLang(); } @Override @@ -461,7 +461,7 @@ private RegexAutomaton union(RegexAutomaton... automata) { * @param n the number of repetitions * @return an automaton that corresponds to the {@code n}-time concatenation of {@code this} */ - public RegexAutomaton repeat(int n) { + public RegexAutomaton repeat(long n) { if (n == 0) return emptyString(); return toRegex().repeat(n).toAutomaton(this); diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 7700eaee8..bbec08bbf 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -5,6 +5,7 @@ import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; import it.unive.lisa.analysis.nonrelational.value.BaseNonRelationalValueDomain; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.analysis.representation.DomainRepresentation; import it.unive.lisa.analysis.representation.StringRepresentation; import it.unive.lisa.analysis.string.ContainsCharProvider; @@ -25,6 +26,8 @@ import it.unive.lisa.util.datastructures.regex.TopAtom; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; + import java.util.Objects; import java.util.SortedSet; import java.util.TreeSet; @@ -404,4 +407,21 @@ public Satisfiability containsChar(char c) throws SemanticException { return satisfiesBinaryExpression(StringContains.INSTANCE, this, new Tarsis(RegexAutomaton.string(String.valueOf(c))), null); } + + public Tarsis repeat(Interval intv) throws MathNumberConversionException { + if (intv.isTop() || a.hasCycle()) + return new Tarsis(a.kleene()); + else if (intv.interval.isFinite()) { + if (intv.interval.isSingleton()) + return new Tarsis(a.repeat(intv.interval.getHigh().toLong())); + else { + RegexAutomaton result = a.emptyLanguage(); + + for (Long i : intv.interval) + result = result.union(a.repeat(i)); + return new Tarsis(result); + } + } else + return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.kleene())); + } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java new file mode 100644 index 000000000..55668028e --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java @@ -0,0 +1,40 @@ +package it.unive.lisa.analysis.string.tarsis; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import it.unive.lisa.analysis.numeric.Interval; +import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; + +public class RepeatTest { + + @Test + public void repeatSingleString() throws MathNumberConversionException { + RegexAutomaton abc = RegexAutomaton.string("abc"); + RegexAutomaton abcabc = RegexAutomaton.string("abcabc"); + RegexAutomaton abc_star = RegexAutomaton.string("abc").kleene(); + + Tarsis a = new Tarsis(abc); + + // "abc".repeat(1) = "abc" + assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(abc)); + + // "abc".repeat(2) = "abcabc" + assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abcabc)); + + // "abc".repeat(0) = "" + assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + + // "abc".repeat([1,2]) = {"abc", "abcabc"} + assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(abc.union(abcabc))); + + // "abc".repeat([0,+infty]) = (abc)* + assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); + + // "abc".repeat([1,+infty]) = abc(abc)* + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc.concat(abc_star))); + } +} diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java index b10a67490..3bce54456 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java @@ -1866,6 +1866,17 @@ public A factorsChangingInitialState(State s) { t.getSymbol())); return from(newStates, newDelta).minimize(); } + + + public A kleene() { + SortedSet newStates = new TreeSet<>(this.states); + SortedSet> newDelta = new TreeSet<>(this.transitions); + + for (State f : getFinalStates()) + newDelta.add(new Transition<>(f, getInitialState(), epsilon())); + + return from(newStates, newDelta).minimize(); + } @Override public int hashCode() { diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 8c865188d..ee5a179cf 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -184,12 +184,12 @@ protected int compareToAux(RegularExpression other) { } @Override - public RegularExpression repeat(int n) { + public RegularExpression repeat(long n) { if (n == 0) return Atom.EPSILON; RegularExpression r = Atom.EPSILON; - for (int i = 0; i < n; i++) + for (long i = 0; i < n; i++) r = new Comp(r, this); r.simplify(); return r; diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index e90e5dd5f..8464e3628 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -291,12 +291,12 @@ protected int compareToAux(RegularExpression other) { } @Override - public RegularExpression repeat(int n) { + public RegularExpression repeat(long n) { if (n == 0) return Atom.EPSILON; RegularExpression r = Atom.EPSILON; - for (int i = 0; i < n; i++) + for (long i = 0; i < n; i++) r = new Comp(r, this); r.simplify(); return r; diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index 5cc07edd7..68bda350d 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -134,7 +134,7 @@ protected int compareToAux(RegularExpression other) { } @Override - public RegularExpression repeat(int n) { + public RegularExpression repeat(long n) { return this; } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index d1d601557..b215423a7 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -280,7 +280,7 @@ protected int compareToAux(RegularExpression other) { } @Override - public RegularExpression repeat(int n) { + public RegularExpression repeat(long n) { return new Or(first.repeat(n), second.repeat(n)); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index abafb5a68..7bb255ffb 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -311,7 +311,7 @@ public String toString() { */ protected abstract Set substringAux(int charsToSkip, int missingChars); - public abstract RegularExpression repeat(int n); + public abstract RegularExpression repeat(long n); /** * Yields {@code true} if and only if this regular expression corresponds to diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index 8fd977fac..8be07f871 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -237,7 +237,7 @@ protected int compareToAux(RegularExpression other) { } @Override - public RegularExpression repeat(int n) { + public RegularExpression repeat(long n) { return this; } } From b61aee0e26e459f16fcb7ce97ad4105973d1afd1 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Sat, 8 Jul 2023 13:25:29 +0200 Subject: [PATCH 03/18] Tests on repeat, removed kleene method (duplicated) --- .../string/tarsis/RegexAutomaton.java | 2 +- .../lisa/analysis/string/tarsis/Tarsis.java | 4 +-- .../analysis/string/tarsis/RepeatTest.java | 33 ++++++++++++++++++- .../datastructures/automaton/Automaton.java | 15 ++------- .../lisa/util/datastructures/regex/Comp.java | 5 ++- .../lisa/util/datastructures/regex/Or.java | 4 +-- .../lisa/util/datastructures/regex/Star.java | 2 +- 7 files changed, 42 insertions(+), 23 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 8197d030a..f658523ed 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -464,6 +464,6 @@ private RegexAutomaton union(RegexAutomaton... automata) { public RegexAutomaton repeat(long n) { if (n == 0) return emptyString(); - return toRegex().repeat(n).toAutomaton(this); + return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index bbec08bbf..2b9ecff46 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -410,7 +410,7 @@ public Satisfiability containsChar(char c) throws SemanticException { public Tarsis repeat(Interval intv) throws MathNumberConversionException { if (intv.isTop() || a.hasCycle()) - return new Tarsis(a.kleene()); + return new Tarsis(a.star()); else if (intv.interval.isFinite()) { if (intv.interval.isSingleton()) return new Tarsis(a.repeat(intv.interval.getHigh().toLong())); @@ -422,6 +422,6 @@ else if (intv.interval.isFinite()) { return new Tarsis(result); } } else - return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.kleene())); + return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java index 55668028e..979f978d2 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java @@ -5,6 +5,8 @@ import org.junit.Test; import it.unive.lisa.analysis.numeric.Interval; +import it.unive.lisa.util.datastructures.regex.Atom; +import it.unive.lisa.util.datastructures.regex.Or; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; @@ -14,7 +16,7 @@ public class RepeatTest { public void repeatSingleString() throws MathNumberConversionException { RegexAutomaton abc = RegexAutomaton.string("abc"); RegexAutomaton abcabc = RegexAutomaton.string("abcabc"); - RegexAutomaton abc_star = RegexAutomaton.string("abc").kleene(); + RegexAutomaton abc_star = RegexAutomaton.string("abc").star(); Tarsis a = new Tarsis(abc); @@ -37,4 +39,33 @@ public void repeatSingleString() throws MathNumberConversionException { // "abc".repeat([1,+infty]) = abc(abc)* assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc.concat(abc_star))); } + + @Test + public void repeatTwoStrings() throws MathNumberConversionException { + + RegexAutomaton ab_or_cd = new Or(new Atom("ab"), new Atom("cd")).toAutomaton(RegexAutomaton.emptyLang()); + RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")).toAutomaton(RegexAutomaton.emptyLang()); + + Tarsis a = new Tarsis(ab_or_cd); + + // {"ab", "cd"}.repeat(1) = {"ab", "cd"} + assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(ab_or_cd)); + + + // {"ab", "cd"}.repeat(2) = {"abab", "cdcd"} + assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abab_or_cdcd)); + + // {"ab", "cd"}.repeat(0) = {""} + assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + + // {"ab", "cd"}.repeat([1,2]) = {"ab", "cd", "abab", "cdcd"} + assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(ab_or_cd.union(abab_or_cdcd))); + + // {"ab", "cd"}.repeat([0,+infty]) = (ab|cd)* + assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.star())); + + // {"ab", "cd"}.repeat([1,+infty]) = (ab|cd)(ab|cd)* + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java index 3bce54456..1cd96ca76 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java @@ -37,7 +37,7 @@ * this class have on their transitions */ public abstract class Automaton, T extends TransitionSymbol> - implements AutomataFactory { +implements AutomataFactory { /** * The states of this automaton. @@ -1053,7 +1053,7 @@ public String prettyPrint() { for (Transition t : transitions) result.append("\t").append(st).append(" [").append(t.getSymbol()).append("] -> ") - .append(t.getDestination()).append("\n"); + .append(t.getDestination()).append("\n"); } } @@ -1866,17 +1866,6 @@ public A factorsChangingInitialState(State s) { t.getSymbol())); return from(newStates, newDelta).minimize(); } - - - public A kleene() { - SortedSet newStates = new TreeSet<>(this.states); - SortedSet> newDelta = new TreeSet<>(this.transitions); - - for (State f : getFinalStates()) - newDelta.add(new Transition<>(f, getInitialState(), epsilon())); - - return from(newStates, newDelta).minimize(); - } @Override public int hashCode() { diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 8464e3628..2815e6ca2 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -32,7 +32,7 @@ public final class Comp extends RegularExpression { * @param first the first regular expression * @param second the second regular expression */ - Comp(RegularExpression first, RegularExpression second) { + public Comp(RegularExpression first, RegularExpression second) { this.first = first; this.second = second; } @@ -298,7 +298,6 @@ public RegularExpression repeat(long n) { RegularExpression r = Atom.EPSILON; for (long i = 0; i < n; i++) r = new Comp(r, this); - r.simplify(); - return r; + return r.simplify(); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index b215423a7..4f590aedc 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -31,7 +31,7 @@ public final class Or extends RegularExpression { * @param first the first regular expression * @param second the second regular expression */ - Or(RegularExpression first, RegularExpression second) { + public Or(RegularExpression first, RegularExpression second) { // make things deterministic: order the branches if (first.compareTo(second) <= 0) { this.first = first; @@ -281,6 +281,6 @@ protected int compareToAux(RegularExpression other) { @Override public RegularExpression repeat(long n) { - return new Or(first.repeat(n), second.repeat(n)); + return new Or(first.repeat(n), second.repeat(n)).simplify(); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index 8be07f871..ddce789b1 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -23,7 +23,7 @@ public final class Star extends RegularExpression { * * @param op the inner regular expression */ - Star(RegularExpression op) { + public Star(RegularExpression op) { this.op = op; } From 5976f94f1f4af559e2bcb0cee5c99f8d7db5dad4 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Sun, 9 Jul 2023 11:22:46 +0200 Subject: [PATCH 04/18] trim abstract semantics --- .../string/tarsis/RegexAutomaton.java | 4 +++ .../lisa/analysis/string/tarsis/Tarsis.java | 4 +++ .../lisa/analysis/string/tarsis/TrimTest.java | 31 +++++++++++++++++++ .../lisa/util/datastructures/regex/Atom.java | 5 +++ .../lisa/util/datastructures/regex/Comp.java | 10 ++++++ .../util/datastructures/regex/EmptySet.java | 5 +++ .../lisa/util/datastructures/regex/Or.java | 5 +++ .../regex/RegularExpression.java | 1 + .../lisa/util/datastructures/regex/Star.java | 5 +++ 9 files changed, 70 insertions(+) create mode 100644 lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index f658523ed..cf14b3ddf 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -466,4 +466,8 @@ public RegexAutomaton repeat(long n) { return emptyString(); return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } + + public RegexAutomaton trim() { + return toRegex().trim().toAutomaton(this); + } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 2b9ecff46..482634193 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -424,4 +424,8 @@ else if (intv.interval.isFinite()) { } else return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); } + + public Tarsis trim() { + return new Tarsis(this.a.trim()); + } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java new file mode 100644 index 000000000..a318a3401 --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -0,0 +1,31 @@ +package it.unive.lisa.analysis.string.tarsis; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +import it.unive.lisa.util.numeric.MathNumberConversionException; + +public class TrimTest { + + @Test + public void repeatSingleString() throws MathNumberConversionException { + RegexAutomaton abc = RegexAutomaton.string("abc"); + RegexAutomaton ws = RegexAutomaton.string(" "); + RegexAutomaton epsilon = RegexAutomaton.string(""); + RegexAutomaton abc_with_ws = RegexAutomaton.string(" a b c "); + + Tarsis a = new Tarsis(abc); + Tarsis b = new Tarsis(ws); + Tarsis c = new Tarsis(abc_with_ws); + + // "abc".trim() = "abc" + assertTrue(a.trim().getAutomaton().isEqualTo(abc)); + + // " ".trim() = "" + assertTrue(b.trim().getAutomaton().isEqualTo(epsilon)); + + // " a b c ".trim() = "a b c" + assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); + } +} diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index ee5a179cf..7be4e987a 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -194,4 +194,9 @@ public RegularExpression repeat(long n) { r.simplify(); return r; } + + @Override + public RegularExpression trim() { + return new Atom(this.string.trim()); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 2815e6ca2..41febad48 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -300,4 +300,14 @@ public RegularExpression repeat(long n) { r = new Comp(r, this); return r.simplify(); } + + @Override + public RegularExpression trim() { + + RegularExpression trimFirst = first.trim().simplify(); + if (trimFirst.isEmpty()) + return second.trim(); + else + return new Comp(trimFirst, second).simplify(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index 68bda350d..99cfc1e76 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -137,4 +137,9 @@ protected int compareToAux(RegularExpression other) { public RegularExpression repeat(long n) { return this; } + + @Override + public RegularExpression trim() { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index 4f590aedc..13f93308e 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -283,4 +283,9 @@ protected int compareToAux(RegularExpression other) { public RegularExpression repeat(long n) { return new Or(first.repeat(n), second.repeat(n)).simplify(); } + + @Override + public RegularExpression trim() { + return new Or(first.trim(), second.trim()); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 7bb255ffb..ef22b4f72 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -313,6 +313,7 @@ public String toString() { public abstract RegularExpression repeat(long n); + public abstract RegularExpression trim(); /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index ddce789b1..b86f16689 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -240,4 +240,9 @@ protected int compareToAux(RegularExpression other) { public RegularExpression repeat(long n) { return this; } + + @Override + public RegularExpression trim() { + return new Star(op.trim()); + } } From ec852b263d4a988ce0d906bdc357e46e516e1489 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Tue, 11 Jul 2023 10:44:49 +0200 Subject: [PATCH 05/18] Improved trim semantics of Kleene star regex --- .../it/unive/lisa/analysis/string/tarsis/TrimTest.java | 5 +++++ .../it/unive/lisa/util/datastructures/regex/Star.java | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java index a318a3401..34f7b690b 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -14,10 +14,12 @@ public void repeatSingleString() throws MathNumberConversionException { RegexAutomaton ws = RegexAutomaton.string(" "); RegexAutomaton epsilon = RegexAutomaton.string(""); RegexAutomaton abc_with_ws = RegexAutomaton.string(" a b c "); + RegexAutomaton empty_star = RegexAutomaton.string(" ").star(); Tarsis a = new Tarsis(abc); Tarsis b = new Tarsis(ws); Tarsis c = new Tarsis(abc_with_ws); + Tarsis d = new Tarsis(empty_star); // "abc".trim() = "abc" assertTrue(a.trim().getAutomaton().isEqualTo(abc)); @@ -27,5 +29,8 @@ public void repeatSingleString() throws MathNumberConversionException { // " a b c ".trim() = "a b c" assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); + + // (" ")*.trim() = "" + assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index b86f16689..489910e08 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -92,7 +92,7 @@ else if (op.isStar()) @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return op.toAutomaton(factory).star(); } @@ -243,6 +243,10 @@ public RegularExpression repeat(long n) { @Override public RegularExpression trim() { - return new Star(op.trim()); + RegularExpression trimOp = op.trim().simplify(); + if (trimOp.isEmpty()) + return trimOp; + else + return new Star(trimOp); } } From e8b5c34cc0b5099a1d57b6000b1151f86a346df1 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Tue, 11 Jul 2023 10:52:00 +0200 Subject: [PATCH 06/18] Some tests on trim --- .../lisa/analysis/string/tarsis/TrimTest.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java index 34f7b690b..5faaa66fc 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -15,22 +15,31 @@ public void repeatSingleString() throws MathNumberConversionException { RegexAutomaton epsilon = RegexAutomaton.string(""); RegexAutomaton abc_with_ws = RegexAutomaton.string(" a b c "); RegexAutomaton empty_star = RegexAutomaton.string(" ").star(); - - Tarsis a = new Tarsis(abc); - Tarsis b = new Tarsis(ws); - Tarsis c = new Tarsis(abc_with_ws); - Tarsis d = new Tarsis(empty_star); + RegexAutomaton comp1 = RegexAutomaton.string(" ").concat(RegexAutomaton.string(" abc")); + RegexAutomaton comp2 = RegexAutomaton.string(" a").concat(RegexAutomaton.string(" b ")); // "abc".trim() = "abc" + Tarsis a = new Tarsis(abc); assertTrue(a.trim().getAutomaton().isEqualTo(abc)); // " ".trim() = "" + Tarsis b = new Tarsis(ws); assertTrue(b.trim().getAutomaton().isEqualTo(epsilon)); // " a b c ".trim() = "a b c" + Tarsis c = new Tarsis(abc_with_ws); assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); // (" ")*.trim() = "" + Tarsis d = new Tarsis(empty_star); assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + // " " + " abc".trim() = "abc" + Tarsis e = new Tarsis(comp1); + assertTrue(e.trim().getAutomaton().isEqualTo(abc)); + + // " a" + " b ".trim() = "a b " + Tarsis f = new Tarsis(comp2); + assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b "))); } } From 3fac1660f71af7c2a5b8c4f8054b323dc931eb66 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 19 Jul 2023 09:47:39 +0200 Subject: [PATCH 07/18] Fixed trim abstract semantics for Tarsis and added some tests --- .../string/tarsis/RegexAutomaton.java | 14 +++++--- .../lisa/analysis/string/tarsis/Tarsis.java | 3 +- .../analysis/string/tarsis/RepeatTest.java | 29 +++++++-------- .../lisa/analysis/string/tarsis/TrimTest.java | 31 ++++++++++------ .../datastructures/automaton/Automaton.java | 4 +-- .../lisa/util/datastructures/regex/Atom.java | 18 +++++++--- .../lisa/util/datastructures/regex/Comp.java | 36 ++++++++++++++----- .../util/datastructures/regex/EmptySet.java | 15 ++++++-- .../lisa/util/datastructures/regex/Or.java | 16 +++++++-- .../regex/RegularExpression.java | 12 ++++--- .../lisa/util/datastructures/regex/Star.java | 27 +++++++++----- .../util/datastructures/regex/TopAtom.java | 5 ++- 12 files changed, 142 insertions(+), 68 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index cf14b3ddf..3412e1908 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -457,17 +457,21 @@ private RegexAutomaton union(RegexAutomaton... automata) { } /** - * Yields an automaton that corresponds to the {@code n}-time concatenation of {@code this}. + * Yields an automaton that corresponds to the {@code n}-time concatenation + * of {@code this}. + * * @param n the number of repetitions - * @return an automaton that corresponds to the {@code n}-time concatenation of {@code this} + * + * @return an automaton that corresponds to the {@code n}-time concatenation + * of {@code this} */ public RegexAutomaton repeat(long n) { if (n == 0) - return emptyString(); + return emptyString(); return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } - + public RegexAutomaton trim() { - return toRegex().trim().toAutomaton(this); + return toRegex().simplify().trim().toAutomaton(this); } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 482634193..ec17bf0b7 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -27,7 +27,6 @@ import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; - import java.util.Objects; import java.util.SortedSet; import java.util.TreeSet; @@ -424,7 +423,7 @@ else if (intv.interval.isFinite()) { } else return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); } - + public Tarsis trim() { return new Tarsis(this.a.trim()); } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java index 979f978d2..2e5c4ed01 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java @@ -2,13 +2,12 @@ import static org.junit.Assert.assertTrue; -import org.junit.Test; - import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.datastructures.regex.Atom; import it.unive.lisa.util.datastructures.regex.Or; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; public class RepeatTest { @@ -22,50 +21,52 @@ public void repeatSingleString() throws MathNumberConversionException { // "abc".repeat(1) = "abc" assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(abc)); - + // "abc".repeat(2) = "abcabc" assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abcabc)); // "abc".repeat(0) = "" assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - // "abc".repeat([1,2]) = {"abc", "abcabc"} assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(abc.union(abcabc))); // "abc".repeat([0,+infty]) = (abc)* - assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); + assertTrue( + a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); // "abc".repeat([1,+infty]) = abc(abc)* - assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc.concat(abc_star))); + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(abc.concat(abc_star))); } - + @Test public void repeatTwoStrings() throws MathNumberConversionException { - + RegexAutomaton ab_or_cd = new Or(new Atom("ab"), new Atom("cd")).toAutomaton(RegexAutomaton.emptyLang()); - RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")).toAutomaton(RegexAutomaton.emptyLang()); + RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")) + .toAutomaton(RegexAutomaton.emptyLang()); Tarsis a = new Tarsis(ab_or_cd); // {"ab", "cd"}.repeat(1) = {"ab", "cd"} assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(ab_or_cd)); - - + // {"ab", "cd"}.repeat(2) = {"abab", "cdcd"} assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abab_or_cdcd)); // {"ab", "cd"}.repeat(0) = {""} assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - // {"ab", "cd"}.repeat([1,2]) = {"ab", "cd", "abab", "cdcd"} assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(ab_or_cd.union(abab_or_cdcd))); // {"ab", "cd"}.repeat([0,+infty]) = (ab|cd)* - assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.star())); + assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.star())); // {"ab", "cd"}.repeat([1,+infty]) = (ab|cd)(ab|cd)* - assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java index 5faaa66fc..6619d51bd 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -2,14 +2,13 @@ import static org.junit.Assert.assertTrue; -import org.junit.Test; - import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; public class TrimTest { @Test - public void repeatSingleString() throws MathNumberConversionException { + public void trimSingleString() throws MathNumberConversionException { RegexAutomaton abc = RegexAutomaton.string("abc"); RegexAutomaton ws = RegexAutomaton.string(" "); RegexAutomaton epsilon = RegexAutomaton.string(""); @@ -17,29 +16,39 @@ public void repeatSingleString() throws MathNumberConversionException { RegexAutomaton empty_star = RegexAutomaton.string(" ").star(); RegexAutomaton comp1 = RegexAutomaton.string(" ").concat(RegexAutomaton.string(" abc")); RegexAutomaton comp2 = RegexAutomaton.string(" a").concat(RegexAutomaton.string(" b ")); + RegexAutomaton comp3 = RegexAutomaton.string(" abc ").concat(RegexAutomaton.string(" ")); // "abc".trim() = "abc" Tarsis a = new Tarsis(abc); assertTrue(a.trim().getAutomaton().isEqualTo(abc)); - // " ".trim() = "" + // " ".trim() = "" Tarsis b = new Tarsis(ws); assertTrue(b.trim().getAutomaton().isEqualTo(epsilon)); - // " a b c ".trim() = "a b c" + // " a b c ".trim() = "a b c" Tarsis c = new Tarsis(abc_with_ws); assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); - // (" ")*.trim() = "" + // (" ")*.trim() = "" Tarsis d = new Tarsis(empty_star); assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); - - // " " + " abc".trim() = "abc" + + // " " + " abc".trim() = "abc" Tarsis e = new Tarsis(comp1); assertTrue(e.trim().getAutomaton().isEqualTo(abc)); - - // " a" + " b ".trim() = "a b " + + // " a" + " b ".trim() = "a b" Tarsis f = new Tarsis(comp2); - assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b "))); + assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b"))); + + // " a b c "*.trim() = "a b c " + (" a b c ")* + " a b c" + Tarsis g = new Tarsis(abc_with_ws.star()); + assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c ").concat(abc_with_ws.star()) + .concat(RegexAutomaton.string(" a b c")).star())); + + // " abc " + " ".trim() = "abc" + Tarsis h = new Tarsis(comp3); + assertTrue(h.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc"))); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java index 1cd96ca76..b10a67490 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Automaton.java @@ -37,7 +37,7 @@ * this class have on their transitions */ public abstract class Automaton, T extends TransitionSymbol> -implements AutomataFactory { + implements AutomataFactory { /** * The states of this automaton. @@ -1053,7 +1053,7 @@ public String prettyPrint() { for (Transition t : transitions) result.append("\t").append(st).append(" [").append(t.getSymbol()).append("] -> ") - .append(t.getDestination()).append("\n"); + .append(t.getDestination()).append("\n"); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 7be4e987a..4a9e7626e 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -1,12 +1,12 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; +import org.apache.commons.lang3.StringUtils; /** * A {@link RegularExpression} representing a single string. @@ -79,7 +79,7 @@ public RegularExpression simplify() { @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return isEmpty() ? factory.emptyString() : factory.singleString(string); } @@ -199,4 +199,14 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return new Atom(this.string.trim()); } + + @Override + public RegularExpression trimLeft() { + return new Atom(StringUtils.stripStart(this.string, null)); + } + + @Override + public RegularExpression trimRight() { + return new Atom(StringUtils.stripEnd(this.string, null)); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 41febad48..2959d0aef 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing the sequential composition of two @@ -289,7 +288,7 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asComp().second); } - + @Override public RegularExpression repeat(long n) { if (n == 0) @@ -303,11 +302,32 @@ public RegularExpression repeat(long n) { @Override public RegularExpression trim() { - - RegularExpression trimFirst = first.trim().simplify(); - if (trimFirst.isEmpty()) + RegularExpression trimLeftFirst = first.trimLeft().simplify(); + if (trimLeftFirst.isEmpty()) return second.trim(); + + RegularExpression trimRightSecond = second.trimRight().simplify(); + if (trimRightSecond.isEmpty()) + return first.trim(); + + return new Comp(trimLeftFirst, trimRightSecond).simplify(); + } + + @Override + public RegularExpression trimLeft() { + RegularExpression trimLeftFirst = this.first.trimLeft().simplify(); + if (trimLeftFirst.isEmpty()) + return this.second.trimLeft(); + else + return new Comp(trimLeftFirst, second); + } + + @Override + public RegularExpression trimRight() { + RegularExpression trimRightSecond = this.second.trimRight().simplify(); + if (trimRightSecond.isEmpty()) + return this.first.trimRight(); else - return new Comp(trimFirst, second).simplify(); + return new Comp(this.first, trimRightSecond); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index 99cfc1e76..9bff11fe8 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -1,11 +1,10 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.Collections; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; +import java.util.Collections; +import java.util.Set; /** * A {@link RegularExpression} representing the empty set of strings. @@ -142,4 +141,14 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return this; } + + @Override + public RegularExpression trimLeft() { + return this; + } + + @Override + public RegularExpression trimRight() { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index 13f93308e..92a1db1fe 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -1,11 +1,10 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing an or between two other regular @@ -288,4 +287,15 @@ public RegularExpression repeat(long n) { public RegularExpression trim() { return new Or(first.trim(), second.trim()); } + + @Override + public RegularExpression trimLeft() { + return new Or(first.trimLeft(), second.trimLeft()); + + } + + @Override + public RegularExpression trimRight() { + return new Or(first.trimRight(), second.trimRight()); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index ef22b4f72..de8db3966 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.Set; -import java.util.stream.Collectors; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.Set; +import java.util.stream.Collectors; /** * A regular expression that can be recognized by an {@link Automaton}, or that @@ -312,8 +311,13 @@ public String toString() { protected abstract Set substringAux(int charsToSkip, int missingChars); public abstract RegularExpression repeat(long n); - + public abstract RegularExpression trim(); + + public abstract RegularExpression trimLeft(); + + public abstract RegularExpression trimRight(); + /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index 489910e08..c5a1acf8a 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing a loop, repeated an arbitrary number @@ -92,7 +91,7 @@ else if (op.isStar()) @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return op.toAutomaton(factory).star(); } @@ -243,10 +242,20 @@ public RegularExpression repeat(long n) { @Override public RegularExpression trim() { - RegularExpression trimOp = op.trim().simplify(); - if (trimOp.isEmpty()) - return trimOp; - else - return new Star(trimOp); + RegularExpression left = this.op.trimLeft().simplify(); + if (left.isEmpty()) + return Atom.EPSILON; + RegularExpression right = this.op.trimRight().simplify(); + return new Star(new Comp(new Comp(left, this), right)).simplify(); + } + + @Override + public RegularExpression trimLeft() { + return new Star(new Comp(this.op.trimLeft(), op)); + } + + @Override + public RegularExpression trimRight() { + return new Comp(this, op.trimRight()); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java index 1c476303a..446c1a867 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java @@ -1,12 +1,11 @@ package it.unive.lisa.util.datastructures.regex; -import java.util.HashSet; -import java.util.Set; - import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; +import java.util.HashSet; +import java.util.Set; /** * A {@link RegularExpression} representing a sequence of unknown characters of From cbd3a37919ed988356e6268644dbcaa97e7c55c5 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Thu, 20 Jul 2023 16:17:33 +0200 Subject: [PATCH 08/18] trim abstract semantics defined on regexes --- .../string/tarsis/RegexAutomaton.java | 10 +++++- .../lisa/analysis/string/tarsis/TrimTest.java | 9 ++--- .../lisa/util/datastructures/regex/Atom.java | 18 ++++++---- .../lisa/util/datastructures/regex/Comp.java | 34 +++++++++++-------- .../util/datastructures/regex/EmptySet.java | 8 ++--- .../lisa/util/datastructures/regex/Or.java | 10 +++--- .../regex/RegularExpression.java | 4 +-- .../lisa/util/datastructures/regex/Star.java | 20 ++++++----- 8 files changed, 64 insertions(+), 49 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 3412e1908..b07061f0d 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -471,7 +471,15 @@ public RegexAutomaton repeat(long n) { return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } + public RegexAutomaton trimLeft() { + return this.toRegex().trimLeft().simplify().toAutomaton(this); + } + + public RegexAutomaton trimRight() { + return this.toRegex().trimRight().simplify().toAutomaton(this); + } + public RegexAutomaton trim() { - return toRegex().simplify().trim().toAutomaton(this); + return this.toRegex().trimRight().simplify().trimLeft().simplify().toAutomaton(this); } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java index 6619d51bd..992d15fe8 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -42,13 +42,8 @@ public void trimSingleString() throws MathNumberConversionException { Tarsis f = new Tarsis(comp2); assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b"))); - // " a b c "*.trim() = "a b c " + (" a b c ")* + " a b c" - Tarsis g = new Tarsis(abc_with_ws.star()); - assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c ").concat(abc_with_ws.star()) - .concat(RegexAutomaton.string(" a b c")).star())); - // " abc " + " ".trim() = "abc" - Tarsis h = new Tarsis(comp3); - assertTrue(h.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc"))); + Tarsis g = new Tarsis(comp3); + assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc"))); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 4a9e7626e..fd8f92ae7 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -196,17 +196,23 @@ public RegularExpression repeat(long n) { } @Override - public RegularExpression trim() { - return new Atom(this.string.trim()); + public RegularExpression trimLeft() { + String trimLeft = StringUtils.stripStart(this.string, null); + if (trimLeft.isEmpty()) + return Atom.EPSILON; + return new Atom(trimLeft); } @Override - public RegularExpression trimLeft() { - return new Atom(StringUtils.stripStart(this.string, null)); + public RegularExpression trimRight() { + String trimRight = StringUtils.stripEnd(this.string, null); + if (trimRight.isEmpty()) + return Atom.EPSILON; + return new Atom(trimRight); } @Override - public RegularExpression trimRight() { - return new Atom(StringUtils.stripEnd(this.string, null)); + protected boolean readsWhiteSpaceString() { + return string.trim().isEmpty(); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 2959d0aef..7555a8308 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -300,34 +300,38 @@ public RegularExpression repeat(long n) { return r.simplify(); } - @Override - public RegularExpression trim() { - RegularExpression trimLeftFirst = first.trimLeft().simplify(); - if (trimLeftFirst.isEmpty()) - return second.trim(); - - RegularExpression trimRightSecond = second.trimRight().simplify(); - if (trimRightSecond.isEmpty()) - return first.trim(); - - return new Comp(trimLeftFirst, trimRightSecond).simplify(); - } - @Override public RegularExpression trimLeft() { - RegularExpression trimLeftFirst = this.first.trimLeft().simplify(); + RegularExpression trimLeftFirst = first.trimLeft().simplify(); + // it means that first reads just whitespaces strings (L(first) + // \subseteq { }*) if (trimLeftFirst.isEmpty()) return this.second.trimLeft(); + // first reads at least a whitespace string + else if (first.readsWhiteSpaceString()) + return new Or(new Comp(trimLeftFirst, second), second.trimLeft()); +// return new Comp(trimLeftFirst, new Or(second, second.trimLeft())); else return new Comp(trimLeftFirst, second); } @Override public RegularExpression trimRight() { - RegularExpression trimRightSecond = this.second.trimRight().simplify(); + RegularExpression trimRightSecond = second.trimRight().simplify(); + // it means that second reads just whitespaces strings (L(second) + // \subseteq { }*) if (trimRightSecond.isEmpty()) return this.first.trimRight(); + // second reads at least a whitespace string + else if (second.readsWhiteSpaceString()) + return new Or(new Comp(first, trimRightSecond), first.trimRight()); +// return new Comp(new Or(first, first.trimRight()), trimRightSecond); else return new Comp(this.first, trimRightSecond); } + + @Override + protected boolean readsWhiteSpaceString() { + return first.readsWhiteSpaceString() && second.readsWhiteSpaceString(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index 9bff11fe8..b5b07d41f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -138,17 +138,17 @@ public RegularExpression repeat(long n) { } @Override - public RegularExpression trim() { + public RegularExpression trimLeft() { return this; } @Override - public RegularExpression trimLeft() { + public RegularExpression trimRight() { return this; } @Override - public RegularExpression trimRight() { - return this; + protected boolean readsWhiteSpaceString() { + return false; } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index 92a1db1fe..734c3740f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -283,11 +283,6 @@ public RegularExpression repeat(long n) { return new Or(first.repeat(n), second.repeat(n)).simplify(); } - @Override - public RegularExpression trim() { - return new Or(first.trim(), second.trim()); - } - @Override public RegularExpression trimLeft() { return new Or(first.trimLeft(), second.trimLeft()); @@ -298,4 +293,9 @@ public RegularExpression trimLeft() { public RegularExpression trimRight() { return new Or(first.trimRight(), second.trimRight()); } + + @Override + protected boolean readsWhiteSpaceString() { + return first.readsWhiteSpaceString() || second.readsWhiteSpaceString(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index de8db3966..cd4344a9f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -312,8 +312,6 @@ public String toString() { public abstract RegularExpression repeat(long n); - public abstract RegularExpression trim(); - public abstract RegularExpression trimLeft(); public abstract RegularExpression trimRight(); @@ -431,6 +429,8 @@ public String toString() { */ public abstract RegularExpression[] explode(); + protected abstract boolean readsWhiteSpaceString(); + /** * Builds the Kleene closure of this regular expression ({@link Star}), * recognizing {@code this} zero or more times. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index c5a1acf8a..71a198f5d 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -241,21 +241,23 @@ public RegularExpression repeat(long n) { } @Override - public RegularExpression trim() { - RegularExpression left = this.op.trimLeft().simplify(); - if (left.isEmpty()) + public RegularExpression trimLeft() { + RegularExpression trimLeft = op.trimLeft().simplify(); + if (trimLeft.isEmpty()) return Atom.EPSILON; - RegularExpression right = this.op.trimRight().simplify(); - return new Star(new Comp(new Comp(left, this), right)).simplify(); + return new Or(Atom.EPSILON, new Comp(trimLeft, this)); } @Override - public RegularExpression trimLeft() { - return new Star(new Comp(this.op.trimLeft(), op)); + public RegularExpression trimRight() { + RegularExpression trimRight = op.trimRight(); + if (trimRight.isEmpty()) + return Atom.EPSILON; + return new Or(Atom.EPSILON, new Comp(this, trimRight)); } @Override - public RegularExpression trimRight() { - return new Comp(this, op.trimRight()); + protected boolean readsWhiteSpaceString() { + return op.readsWhiteSpaceString(); } } From b9e66c9baad04b3b7fe402f88af4981d80d4cbe4 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Mon, 24 Jul 2023 17:02:23 +0200 Subject: [PATCH 09/18] Apply spotless and Javadoc --- .../string/tarsis/RegexAutomaton.java | 21 +++++++++++++++++ .../lisa/analysis/string/tarsis/Tarsis.java | 19 +++++++++++++++ .../regex/RegularExpression.java | 23 +++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index b07061f0d..fafe115a8 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -471,14 +471,35 @@ public RegexAutomaton repeat(long n) { return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); } + /** + * Yields a new automaton where leading whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where leading whitespaces have been removed from + * {@code this} + */ public RegexAutomaton trimLeft() { return this.toRegex().trimLeft().simplify().toAutomaton(this); } + /** + * Yields a new automaton where trailing whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where trailing whitespaces have been removed from + * {@code this} + */ public RegexAutomaton trimRight() { return this.toRegex().trimRight().simplify().toAutomaton(this); } + /** + * Yields a new automaton where trailing and leading whitespaces have been + * removed from {@code this}. + * + * @return a new automaton where trailing and leading whitespaces have been + * removed from {@code this} + */ public RegexAutomaton trim() { return this.toRegex().trimRight().simplify().trimLeft().simplify().toAutomaton(this); } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index ec17bf0b7..710edb96d 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -407,6 +407,18 @@ public Satisfiability containsChar(char c) throws SemanticException { new Tarsis(RegexAutomaton.string(String.valueOf(c))), null); } + /** + * Yields a new Tarsis's instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param intv the interval + * + * @return a new Tarsis's instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ public Tarsis repeat(Interval intv) throws MathNumberConversionException { if (intv.isTop() || a.hasCycle()) return new Tarsis(a.star()); @@ -424,6 +436,13 @@ else if (intv.interval.isFinite()) { return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); } + /** + * Yields a new Tarsis's instance where trailing and leading whitespaces + * have been removed from {@code this}. + * + * @return a new Tarsis's instance where trailing and leading whitespaces + * have been removed from {@code this} + */ public Tarsis trim() { return new Tarsis(this.a.trim()); } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index cd4344a9f..a876aef12 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -310,10 +310,33 @@ public String toString() { */ protected abstract Set substringAux(int charsToSkip, int missingChars); + /** + * Yields a new regular expression corresponding to the {@code n}-repetition + * of {@code this}. + * + * @param n number of repetitions + * + * @return a new regular expression corresponding to the + * {@code n}-repetition of {@code this} + */ public abstract RegularExpression repeat(long n); + /** + * Yields a new regular expression where leading whitespaces have been + * removed from {@code this}. + * + * @return a new regular expression where leading whitespaces have been + * removed from {@code this} + */ public abstract RegularExpression trimLeft(); + /** + * Yields a new regular expression where trailing whitespaces have been + * removed from {@code this}. + * + * @return a new regular expression where trailing whitespaces have been + * removed from {@code this} + */ public abstract RegularExpression trimRight(); /** From 18fdb0364ad24ab71b15899d20d1700c7713791c Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Mon, 21 Aug 2023 11:21:45 +0200 Subject: [PATCH 10/18] Abstract semantics of trims for FSA --- .../unive/lisa/analysis/string/fsa/FSA.java | 17 ++-- .../analysis/string/fsa/SimpleAutomaton.java | 88 +++++++++++++++++-- .../lisa/analysis/string/fsa/TrimTest.java | 32 +++++++ 3 files changed, 125 insertions(+), 12 deletions(-) create mode 100644 lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java index 7433e4cce..feae68c58 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java @@ -1,5 +1,11 @@ package it.unive.lisa.analysis.string.fsa; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; + import it.unive.lisa.analysis.Lattice; import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; @@ -21,11 +27,6 @@ import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; /** * A class that represent the Finite State Automaton domain for strings, @@ -373,7 +374,7 @@ public FSA replace(FSA search, FSA repl) { return TOP; } } - + @Override public Satisfiability containsChar(char c) throws SemanticException { if (isTop()) @@ -417,4 +418,8 @@ public Satisfiability containsChar(char c) throws SemanticException { return Satisfiability.NOT_SATISFIED; } + + public FSA trim() { + return new FSA(this.a.trim()); + } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 76cda7128..84bb4e457 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -1,18 +1,20 @@ package it.unive.lisa.analysis.string.fsa; -import it.unive.lisa.util.datastructures.automaton.Automaton; -import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException; -import it.unive.lisa.util.datastructures.automaton.State; -import it.unive.lisa.util.datastructures.automaton.Transition; -import it.unive.lisa.util.datastructures.regex.Atom; -import it.unive.lisa.util.datastructures.regex.RegularExpression; import java.util.Collections; +import java.util.HashSet; import java.util.Optional; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.stream.Collectors; +import it.unive.lisa.util.datastructures.automaton.Automaton; +import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException; +import it.unive.lisa.util.datastructures.automaton.State; +import it.unive.lisa.util.datastructures.automaton.Transition; +import it.unive.lisa.util.datastructures.regex.Atom; +import it.unive.lisa.util.datastructures.regex.RegularExpression; + /** * A class that describes an generic automaton(dfa, nfa, epsilon nfa) using a * standard alphabet of single characters. @@ -180,4 +182,78 @@ public SimpleAutomaton replace(SimpleAutomaton toReplace, SimpleAutomaton str) t result = result.union(new SimpleAutomaton(a.replace(b, c))); return result; } + + public SimpleAutomaton trimLeft() { + SimpleAutomaton result = copy(); + + Set> toAdd = new HashSet<>(); + Set> toRemove = new HashSet<>(); + + while (true) { + for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())) + if (t.getSymbol().getSymbol().equals(" ")) { + toRemove.add(t); + toAdd.add(new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); + } + + result.removeTransitions(toRemove); + result.getTransitions().addAll(toAdd); + + result.minimize(); + + boolean b = false; + for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())){ + if (t.getSymbol().getSymbol().equals(" ")) + b = true; + } + + if (!b) + break; + } + + return result; + } + + public SimpleAutomaton trimRight() { + + SimpleAutomaton result = copy(); + + + Set> toAdd = new HashSet<>(); + Set> toRemove = new HashSet<>(); + + while (true) { + + for (State qf : result.getFinalStates()) { + for (Transition t : result.getIngoingTransitionsFrom(qf)) + if (t.getSymbol().getSymbol().equals(" ")) { + toRemove.add(t); + toAdd.add(new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); + } + } + + result.removeTransitions(toRemove); + result.getTransitions().addAll(toAdd); + + result.minimize(); + + boolean b = false; + for (State qf : result.getFinalStates()) { + + for (Transition t : result.getIngoingTransitionsFrom(qf)){ + if (t.getSymbol().equals(new StringSymbol(" "))) + b = true; + } + } + + if (!b) + break; + } + + return result; + } + + public SimpleAutomaton trim() { + return trimLeft().trimRight(); + } } \ No newline at end of file diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java new file mode 100644 index 000000000..e779fcf3a --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java @@ -0,0 +1,32 @@ +package it.unive.lisa.analysis.string.fsa; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class TrimTest { + + @Test + public void test01() { + SimpleAutomaton a = new SimpleAutomaton(" a "); + assertTrue(a.trim().isEqualTo(new SimpleAutomaton("a"))); + } + + @Test + public void test02() { + SimpleAutomaton b = new SimpleAutomaton(" a ").union(new SimpleAutomaton(" b ")); + assertTrue(b.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); + } + + @Test + public void test03() { + SimpleAutomaton c = new SimpleAutomaton("a").union(new SimpleAutomaton(" b ")); + assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); + } + + @Test + public void test04() { + SimpleAutomaton c = new SimpleAutomaton(" a ").star(); + assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").star())); + } +} From c1891887b65bf525e26b8280744f8750f65821b9 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Mon, 21 Aug 2023 13:13:36 +0200 Subject: [PATCH 11/18] Abstract semantics of repeat for FSA --- .../analysis/string/fsa/SimpleAutomaton.java | 187 +++++++++++++++++- .../lisa/analysis/string/fsa/RepeatTest.java | 35 ++++ .../datastructures/automaton/Transition.java | 2 + 3 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 84bb4e457..99089d467 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -1,6 +1,7 @@ package it.unive.lisa.analysis.string.fsa; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Optional; import java.util.Set; @@ -8,12 +9,16 @@ import java.util.TreeSet; import java.util.stream.Collectors; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException; import it.unive.lisa.util.datastructures.automaton.State; import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.datastructures.regex.Atom; import it.unive.lisa.util.datastructures.regex.RegularExpression; +import it.unive.lisa.util.numeric.IntInterval; +import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; /** * A class that describes an generic automaton(dfa, nfa, epsilon nfa) using a @@ -234,7 +239,7 @@ public SimpleAutomaton trimRight() { result.removeTransitions(toRemove); result.getTransitions().addAll(toAdd); - + result.minimize(); boolean b = false; @@ -252,8 +257,186 @@ public SimpleAutomaton trimRight() { return result; } - + public SimpleAutomaton trim() { return trimLeft().trimRight(); } + + public SimpleAutomaton repeat(Interval i) throws MathNumberConversionException{ + if (equals(emptyString())) + return this; + else if (hasCycle()) + return star(); + + + MathNumber high = i.interval.getHigh(); + MathNumber low = i.interval.getLow(); + SimpleAutomaton epsilon = emptyString(); + + if(low.isMinusInfinity()) { + if(high.isPlusInfinity()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + + if(high.isZero()) + return emptyString(); + else + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + } + + + long lowInt = low.toLong(); + + if(high.isPlusInfinity()){ + //need exception + if(lowInt < 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + if(low.isZero()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + if(lowInt > 0) + return epsilon.union(auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage())); + } + + long highInt = high.toLong(); + + if(lowInt < 0){ + if(highInt < 0) + return emptyLanguage(); + + if(high.isZero()) + return emptyString(); + else + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + } + + if(low.isZero()) { + if(high.isZero()) + return emptyString(); + + if(highInt > 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + } + + if(lowInt > 0 && highInt > 0) + return auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage()); + + return emptyLanguage(); + } + + public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, SimpleAutomaton result) throws MathNumberConversionException{ + + if(currentState.isFinal()){ + + SortedSet states = new TreeSet<>(); + + for(State s: getStates()) { + if (!s.equals(currentState)) + states.add(new State(s.getId(), s.isInitial(), false)); + else + states.add(new State(s.getId(), s.isInitial(), s.isFinal())); + } + + SimpleAutomaton temp = new SimpleAutomaton(states, delta); + SimpleAutomaton tempResult = temp.copy(); + + for(long k = 1; k < i.getLow().toLong(); k++) + tempResult = tempResult.connectAutomaton(temp, tempResult.getFinalStates(), false); + + if(i.getHigh().isPlusInfinity()){ + tempResult = tempResult.connectAutomaton(temp.copy().star(), tempResult.getFinalStates(), true); + } else { + for (long k = i.getLow().toLong(); k < i.getHigh().toLong(); k++) + tempResult = tempResult.connectAutomaton(temp, tempResult.getFinalStates(), true); + } + + tempResult = tempResult.minimize(); + result = result.union(tempResult); + + } + + for(Transition t: getOutgoingTransitionsFrom(currentState)){ + SortedSet> clone = new TreeSet>(delta); + clone.add(t); + result = auxRepeat(i, t.getDestination(), clone, result).union(result); + } + + return result; + } + + public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet connectOn, boolean b){ + SortedSet> delta = new TreeSet<>(); + SortedSet states = new TreeSet<>(); + HashMap firstMapping = new HashMap<>(); + HashMap secondMapping = new HashMap<>(); + int c = 0; + + if(equals(emptyString())){ + return second; + } + + if(second.equals(emptyString())){ + return this; + } + + for(State s : getStates()) { + State newState = null; + if (b) { + newState = new State(c++, s.isInitial(), s.isFinal()); + }else{ + if (connectOn.contains(s)) { + newState = new State(c++, s.isInitial(), false); + } else { + newState = new State(c++, s.isInitial(), s.isFinal()); + } + } + states.add(newState); + firstMapping.put(s, newState); + } + + for(Transition t: getTransitions()){ + delta.add(new Transition<>(firstMapping.get(t.getSource()), firstMapping.get(t.getDestination()), t.getSymbol())); + } + + + if(second.getStates().size() == 1 && second.getInitialState().isFinal()) { + for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { + for (State s : connectOn) { + State newState = new State(firstMapping.get(s).getId(), s.isInitial(), true); + states.remove(firstMapping.get(s)); + states.add(newState); + delta.add(new Transition<>(newState, newState, t.getSymbol())); + } + second.minimize(); + } + }else{ + for (State s : second.getStates()) { + State newState = new State(c++, s.isInitial(), s.isFinal()); + states.add(newState); + secondMapping.put(s, newState); + } + + states.remove(secondMapping.get(second.getInitialState())); + + for (Transition t : second.getTransitions()) { + if (!t.getSource().isInitial() && !t.getDestination().isInitial()) { + delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), t.getSymbol())); + } + if (t.getSource().isInitial()) { + //TODO better recheck this function + for (State s : connectOn) { + if (states.contains(secondMapping.get(t.getSource()))) + delta.add(new Transition<>(secondMapping.get(t.getSource()), firstMapping.get(s), t.getSymbol())); + } + } + } + + for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { + for (State s : connectOn) { + delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), t.getSymbol())); + } + } + + } + + return new SimpleAutomaton(states, delta); + } } \ No newline at end of file diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java new file mode 100644 index 000000000..6b664221c --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java @@ -0,0 +1,35 @@ +package it.unive.lisa.analysis.string.fsa; + +import static org.junit.Assert.assertEquals; + +import org.junit.Test; + +import it.unive.lisa.analysis.numeric.Interval; +import it.unive.lisa.util.numeric.MathNumberConversionException; + +public class RepeatTest { + + @Test + public void repeatTest001() throws MathNumberConversionException{ + SimpleAutomaton a = new SimpleAutomaton("a"); + assertEquals(a.repeat(new Interval(2, 2)), new SimpleAutomaton("aa")); + } + + @Test + public void repeatTest002() throws MathNumberConversionException{ + SimpleAutomaton a = new SimpleAutomaton("alpha"); + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("alphaalphaalpha")); + } + + @Test + public void repeatTest003() throws MathNumberConversionException{ + SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("ababab").union(new SimpleAutomaton("cdcdcd"))); + } + + @Test + public void repeatTest004() throws MathNumberConversionException{ + SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); + assertEquals(a.repeat(new Interval(0, 0)), a.emptyString()); + } +} diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java index 83484a8a5..2627c66be 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java @@ -28,6 +28,8 @@ public final class Transition> implements Comparab * transition. */ public Transition(State source, State destination, T symbol) { + Objects.requireNonNull(source); + Objects.requireNonNull(destination); this.source = source; this.destination = destination; this.symbol = symbol; From 9d233323bf2bf914b84a5c3c7bf71c9c61a3cc07 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Mon, 21 Aug 2023 13:14:23 +0200 Subject: [PATCH 12/18] Apply spotless --- .../unive/lisa/analysis/string/fsa/FSA.java | 15 +- .../analysis/string/fsa/SimpleAutomaton.java | 128 ++++++++++-------- .../lisa/analysis/string/fsa/RepeatTest.java | 35 +++-- .../lisa/analysis/string/fsa/TrimTest.java | 2 +- 4 files changed, 93 insertions(+), 87 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java index feae68c58..9255acea1 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java @@ -1,11 +1,5 @@ package it.unive.lisa.analysis.string.fsa; -import java.util.HashSet; -import java.util.Objects; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; - import it.unive.lisa.analysis.Lattice; import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; @@ -27,6 +21,11 @@ import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; /** * A class that represent the Finite State Automaton domain for strings, @@ -374,7 +373,7 @@ public FSA replace(FSA search, FSA repl) { return TOP; } } - + @Override public Satisfiability containsChar(char c) throws SemanticException { if (isTop()) @@ -418,7 +417,7 @@ public Satisfiability containsChar(char c) throws SemanticException { return Satisfiability.NOT_SATISFIED; } - + public FSA trim() { return new FSA(this.a.trim()); } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 99089d467..6daccc5b3 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -1,14 +1,5 @@ package it.unive.lisa.analysis.string.fsa; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Optional; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.stream.Collectors; - import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException; @@ -19,6 +10,14 @@ import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.stream.Collectors; /** * A class that describes an generic automaton(dfa, nfa, epsilon nfa) using a @@ -207,7 +206,7 @@ public SimpleAutomaton trimLeft() { result.minimize(); boolean b = false; - for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())){ + for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())) { if (t.getSymbol().getSymbol().equals(" ")) b = true; } @@ -223,7 +222,6 @@ public SimpleAutomaton trimRight() { SimpleAutomaton result = copy(); - Set> toAdd = new HashSet<>(); Set> toRemove = new HashSet<>(); @@ -233,7 +231,8 @@ public SimpleAutomaton trimRight() { for (Transition t : result.getIngoingTransitionsFrom(qf)) if (t.getSymbol().getSymbol().equals(" ")) { toRemove.add(t); - toAdd.add(new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); + toAdd.add( + new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); } } @@ -245,7 +244,7 @@ public SimpleAutomaton trimRight() { boolean b = false; for (State qf : result.getFinalStates()) { - for (Transition t : result.getIngoingTransitionsFrom(qf)){ + for (Transition t : result.getIngoingTransitionsFrom(qf)) { if (t.getSymbol().equals(new StringSymbol(" "))) b = true; } @@ -262,73 +261,79 @@ public SimpleAutomaton trim() { return trimLeft().trimRight(); } - public SimpleAutomaton repeat(Interval i) throws MathNumberConversionException{ + public SimpleAutomaton repeat(Interval i) throws MathNumberConversionException { if (equals(emptyString())) return this; else if (hasCycle()) return star(); - MathNumber high = i.interval.getHigh(); MathNumber low = i.interval.getLow(); SimpleAutomaton epsilon = emptyString(); - if(low.isMinusInfinity()) { - if(high.isPlusInfinity()) - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + if (low.isMinusInfinity()) { + if (high.isPlusInfinity()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); - if(high.isZero()) + if (high.isZero()) return emptyString(); else - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); } - long lowInt = low.toLong(); - if(high.isPlusInfinity()){ - //need exception - if(lowInt < 0) - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); - if(low.isZero()) - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); - if(lowInt > 0) - return epsilon.union(auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage())); + if (high.isPlusInfinity()) { + // need exception + if (lowInt < 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + if (low.isZero()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + if (lowInt > 0) + return epsilon.union(auxRepeat(i.interval, getInitialState(), new TreeSet>(), + emptyLanguage())); } long highInt = high.toLong(); - if(lowInt < 0){ - if(highInt < 0) + if (lowInt < 0) { + if (highInt < 0) return emptyLanguage(); - if(high.isZero()) + if (high.isZero()) return emptyString(); else - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); } - if(low.isZero()) { - if(high.isZero()) + if (low.isZero()) { + if (high.isZero()) return emptyString(); - if(highInt > 0) - return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), new TreeSet>(), emptyLanguage())); + if (highInt > 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); } - if(lowInt > 0 && highInt > 0) - return auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage()); + if (lowInt > 0 && highInt > 0) + return auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage()); return emptyLanguage(); } - public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, SimpleAutomaton result) throws MathNumberConversionException{ + public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, + SimpleAutomaton result) throws MathNumberConversionException { - if(currentState.isFinal()){ + if (currentState.isFinal()) { SortedSet states = new TreeSet<>(); - for(State s: getStates()) { + for (State s : getStates()) { if (!s.equals(currentState)) states.add(new State(s.getId(), s.isInitial(), false)); else @@ -338,13 +343,13 @@ public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet t: getOutgoingTransitionsFrom(currentState)){ + for (Transition t : getOutgoingTransitionsFrom(currentState)) { SortedSet> clone = new TreeSet>(delta); clone.add(t); result = auxRepeat(i, t.getDestination(), clone, result).union(result); @@ -362,26 +367,26 @@ public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet connectOn, boolean b){ + public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet connectOn, boolean b) { SortedSet> delta = new TreeSet<>(); SortedSet states = new TreeSet<>(); HashMap firstMapping = new HashMap<>(); HashMap secondMapping = new HashMap<>(); int c = 0; - if(equals(emptyString())){ + if (equals(emptyString())) { return second; } - if(second.equals(emptyString())){ + if (second.equals(emptyString())) { return this; } - for(State s : getStates()) { + for (State s : getStates()) { State newState = null; if (b) { newState = new State(c++, s.isInitial(), s.isFinal()); - }else{ + } else { if (connectOn.contains(s)) { newState = new State(c++, s.isInitial(), false); } else { @@ -392,12 +397,12 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet firstMapping.put(s, newState); } - for(Transition t: getTransitions()){ - delta.add(new Transition<>(firstMapping.get(t.getSource()), firstMapping.get(t.getDestination()), t.getSymbol())); + for (Transition t : getTransitions()) { + delta.add(new Transition<>(firstMapping.get(t.getSource()), firstMapping.get(t.getDestination()), + t.getSymbol())); } - - if(second.getStates().size() == 1 && second.getInitialState().isFinal()) { + if (second.getStates().size() == 1 && second.getInitialState().isFinal()) { for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { for (State s : connectOn) { State newState = new State(firstMapping.get(s).getId(), s.isInitial(), true); @@ -407,7 +412,7 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet } second.minimize(); } - }else{ + } else { for (State s : second.getStates()) { State newState = new State(c++, s.isInitial(), s.isFinal()); states.add(newState); @@ -418,20 +423,23 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet for (Transition t : second.getTransitions()) { if (!t.getSource().isInitial() && !t.getDestination().isInitial()) { - delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), t.getSymbol())); + delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), + t.getSymbol())); } if (t.getSource().isInitial()) { - //TODO better recheck this function + // TODO better recheck this function for (State s : connectOn) { if (states.contains(secondMapping.get(t.getSource()))) - delta.add(new Transition<>(secondMapping.get(t.getSource()), firstMapping.get(s), t.getSymbol())); + delta.add(new Transition<>(secondMapping.get(t.getSource()), firstMapping.get(s), + t.getSymbol())); } } } for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { for (State s : connectOn) { - delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), t.getSymbol())); + delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), + t.getSymbol())); } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java index 6b664221c..ff4abbc62 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java @@ -2,34 +2,33 @@ import static org.junit.Assert.assertEquals; -import org.junit.Test; - import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; public class RepeatTest { @Test - public void repeatTest001() throws MathNumberConversionException{ - SimpleAutomaton a = new SimpleAutomaton("a"); - assertEquals(a.repeat(new Interval(2, 2)), new SimpleAutomaton("aa")); - } - + public void repeatTest001() throws MathNumberConversionException { + SimpleAutomaton a = new SimpleAutomaton("a"); + assertEquals(a.repeat(new Interval(2, 2)), new SimpleAutomaton("aa")); + } + @Test - public void repeatTest002() throws MathNumberConversionException{ + public void repeatTest002() throws MathNumberConversionException { SimpleAutomaton a = new SimpleAutomaton("alpha"); - assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("alphaalphaalpha")); - } - + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("alphaalphaalpha")); + } + @Test - public void repeatTest003() throws MathNumberConversionException{ + public void repeatTest003() throws MathNumberConversionException { SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); - assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("ababab").union(new SimpleAutomaton("cdcdcd"))); - } - + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("ababab").union(new SimpleAutomaton("cdcdcd"))); + } + @Test - public void repeatTest004() throws MathNumberConversionException{ + public void repeatTest004() throws MathNumberConversionException { SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); - assertEquals(a.repeat(new Interval(0, 0)), a.emptyString()); - } + assertEquals(a.repeat(new Interval(0, 0)), a.emptyString()); + } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java index e779fcf3a..072993ae2 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java @@ -23,7 +23,7 @@ public void test03() { SimpleAutomaton c = new SimpleAutomaton("a").union(new SimpleAutomaton(" b ")); assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); } - + @Test public void test04() { SimpleAutomaton c = new SimpleAutomaton(" a ").star(); From 2028460493ca3ff54b8f39d15976d401a42b54dd Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Mon, 21 Aug 2023 16:29:55 +0200 Subject: [PATCH 13/18] toFSA fixed, repeat method for FSA --- .../main/java/it/unive/lisa/analysis/string/fsa/FSA.java | 7 +++++++ .../unive/lisa/analysis/string/fsa/SimpleAutomaton.java | 9 ++++++--- .../it/unive/lisa/analysis/string/tarsis/Tarsis.java | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java index 9255acea1..4ff443c94 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java @@ -4,6 +4,7 @@ import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; import it.unive.lisa.analysis.nonrelational.value.BaseNonRelationalValueDomain; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.analysis.representation.DomainRepresentation; import it.unive.lisa.analysis.representation.StringRepresentation; import it.unive.lisa.analysis.string.ContainsCharProvider; @@ -21,6 +22,8 @@ import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; + import java.util.HashSet; import java.util.Objects; import java.util.Set; @@ -421,4 +424,8 @@ public Satisfiability containsChar(char c) throws SemanticException { public FSA trim() { return new FSA(this.a.trim()); } + + public FSA repeat(Interval i) throws MathNumberConversionException { + return new FSA(this.a.repeat(i)); + } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 6daccc5b3..507fbb3e8 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -420,8 +420,12 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet } states.remove(secondMapping.get(second.getInitialState())); + secondMapping.remove(second.getInitialState()); for (Transition t : second.getTransitions()) { + if (t.getSource().equals(second.getInitialState()) || t.getDestination().equals(second.getInitialState()) || !secondMapping.containsKey(t.getSource()) || !secondMapping.containsKey(t.getDestination())) + continue; + if (!t.getSource().isInitial() && !t.getDestination().isInitial()) { delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), t.getSymbol())); @@ -441,10 +445,9 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), t.getSymbol())); } - } - + } } - + return new SimpleAutomaton(states, delta); } } \ No newline at end of file diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 710edb96d..e23610b42 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -387,8 +387,8 @@ public FSA toFSA() { new StringSymbol(t.getSymbol().toString()))); else { for (char c = 32; c <= 123; c++) - fsaDelta.add(new Transition<>(t.getSource(), t.getSource(), new StringSymbol(c))); - fsaDelta.add(new Transition<>(t.getSource(), t.getSource(), StringSymbol.EPSILON)); + fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), new StringSymbol(c))); + fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); } } From 87a748e02b82ea15695de17c7532122a62628dfc Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Tue, 22 Aug 2023 15:09:31 +0200 Subject: [PATCH 14/18] Fixed empty string replacement --- .../it/unive/lisa/analysis/string/tarsis/StringReplacer.java | 4 +++- .../java/it/unive/lisa/analysis/string/tarsis/Tarsis.java | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java index 07c7ef706..69f9ac7a0 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java @@ -122,7 +122,8 @@ public RegexAutomaton replace(String toReplace, RegexAutomaton str, boolean must } private RegexAutomaton emptyStringReplace(RegexAutomaton str) { - AtomicInteger counter = new AtomicInteger(); + int maxId = origin.getStates().stream().mapToInt(s -> s.getId()).max().getAsInt(); + AtomicInteger counter = new AtomicInteger(maxId + 1); SortedSet states = new TreeSet<>(); SortedSet> delta = new TreeSet<>(); @@ -132,6 +133,7 @@ private RegexAutomaton emptyStringReplace(RegexAutomaton str) { Map mapper = new HashMap<>(); origin.getStates().forEach(s -> mapper.put(s, new State(s.getId(), s.isInitial(), false))); states.addAll(mapper.values()); + Function maker = s -> new State(counter.getAndIncrement(), false, false); diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index e23610b42..ea2591e7a 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -354,6 +354,9 @@ public Tarsis concat(Tarsis other) { * @return the domain instance containing the replaced automaton */ public Tarsis replace(Tarsis search, Tarsis repl) { + if (isBottom() || search.isBottom() || repl.isBottom()) + return bottom(); + try { return new Tarsis(this.a.replace(search.a, repl.a)); } catch (CyclicAutomatonException e) { From ad48a0aa3748d9179fbe5571531ca3c6b734107a Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 23 Aug 2023 11:59:21 +0200 Subject: [PATCH 15/18] Apply spotless and Javadoc --- .../unive/lisa/analysis/string/fsa/FSA.java | 26 +++++++++- .../analysis/string/fsa/SimpleAutomaton.java | 49 ++++++++++++++++--- .../string/tarsis/StringReplacer.java | 1 - .../lisa/analysis/string/tarsis/Tarsis.java | 9 +++- .../regex/RegularExpression.java | 7 +++ 5 files changed, 80 insertions(+), 12 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java index 4ff443c94..77f7aab7f 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java @@ -23,7 +23,6 @@ import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; import it.unive.lisa.util.numeric.MathNumberConversionException; - import java.util.HashSet; import java.util.Objects; import java.util.Set; @@ -421,11 +420,34 @@ public Satisfiability containsChar(char c) throws SemanticException { return Satisfiability.NOT_SATISFIED; } + /** + * Yields a new FSA where trailing and leading whitespaces have been removed + * from {@code this}. + * + * @return a new FSA where trailing and leading whitespaces have been + * removed from {@code this} + */ public FSA trim() { + if (isBottom() || isTop()) + return this; return new FSA(this.a.trim()); } - + + /** + * Yields a new FSA instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param i the interval + * + * @return a new FSA instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ public FSA repeat(Interval i) throws MathNumberConversionException { + if (isBottom() || isTop()) + return this; return new FSA(this.a.repeat(i)); } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 507fbb3e8..29ca79951 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -187,6 +187,13 @@ public SimpleAutomaton replace(SimpleAutomaton toReplace, SimpleAutomaton str) t return result; } + /** + * Yields a new automaton where leading whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where leading whitespaces have been removed from + * {@code this} + */ public SimpleAutomaton trimLeft() { SimpleAutomaton result = copy(); @@ -218,6 +225,13 @@ public SimpleAutomaton trimLeft() { return result; } + /** + * Yields a new automaton where trailing whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where trailing whitespaces have been removed from + * {@code this} + */ public SimpleAutomaton trimRight() { SimpleAutomaton result = copy(); @@ -257,10 +271,29 @@ public SimpleAutomaton trimRight() { return result; } + /** + * Yields a new automaton where leading and trailing whitespaces have been + * removed from {@code this}. + * + * @return a new automaton where leading trailing whitespaces have been + * removed from {@code this} + */ public SimpleAutomaton trim() { - return trimLeft().trimRight(); + return this.toRegex().trimRight().simplify().trimLeft().simplify().toAutomaton(this); } + /** + * Yields a new automaton instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param i the interval + * + * @return a new automaton instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ public SimpleAutomaton repeat(Interval i) throws MathNumberConversionException { if (equals(emptyString())) return this; @@ -326,7 +359,7 @@ else if (hasCycle()) return emptyLanguage(); } - public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, + private SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, SimpleAutomaton result) throws MathNumberConversionException { if (currentState.isFinal()) { @@ -367,7 +400,7 @@ public SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet connectOn, boolean b) { + private SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet connectOn, boolean b) { SortedSet> delta = new TreeSet<>(); SortedSet states = new TreeSet<>(); HashMap firstMapping = new HashMap<>(); @@ -423,9 +456,11 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet secondMapping.remove(second.getInitialState()); for (Transition t : second.getTransitions()) { - if (t.getSource().equals(second.getInitialState()) || t.getDestination().equals(second.getInitialState()) || !secondMapping.containsKey(t.getSource()) || !secondMapping.containsKey(t.getDestination())) + if (t.getSource().equals(second.getInitialState()) + || t.getDestination().equals(second.getInitialState()) + || !secondMapping.containsKey(t.getSource()) || !secondMapping.containsKey(t.getDestination())) continue; - + if (!t.getSource().isInitial() && !t.getDestination().isInitial()) { delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), t.getSymbol())); @@ -445,9 +480,9 @@ public SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), t.getSymbol())); } - } + } } - + return new SimpleAutomaton(states, delta); } } \ No newline at end of file diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java index 69f9ac7a0..2229c9873 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java @@ -133,7 +133,6 @@ private RegexAutomaton emptyStringReplace(RegexAutomaton str) { Map mapper = new HashMap<>(); origin.getStates().forEach(s -> mapper.put(s, new State(s.getId(), s.isInitial(), false))); states.addAll(mapper.values()); - Function maker = s -> new State(counter.getAndIncrement(), false, false); diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index ea2591e7a..723223a80 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -356,7 +356,7 @@ public Tarsis concat(Tarsis other) { public Tarsis replace(Tarsis search, Tarsis repl) { if (isBottom() || search.isBottom() || repl.isBottom()) return bottom(); - + try { return new Tarsis(this.a.replace(search.a, repl.a)); } catch (CyclicAutomatonException e) { @@ -423,7 +423,9 @@ public Satisfiability containsChar(char c) throws SemanticException { * not finite */ public Tarsis repeat(Interval intv) throws MathNumberConversionException { - if (intv.isTop() || a.hasCycle()) + if (isBottom()) + return this; + else if (intv.isTop() || a.hasCycle()) return new Tarsis(a.star()); else if (intv.interval.isFinite()) { if (intv.interval.isSingleton()) @@ -447,6 +449,9 @@ else if (intv.interval.isFinite()) { * have been removed from {@code this} */ public Tarsis trim() { + if (isBottom() || isTop()) + return this; + return new Tarsis(this.a.trim()); } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index a876aef12..32194cfc0 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -452,6 +452,13 @@ public String toString() { */ public abstract RegularExpression[] explode(); + /** + * Checks whether this regular expression recognize a string made just of + * whitespaces. + * + * @return {@true} if this regular expression recognize a string made just + * of whitespaces, {@code false} otherwise. + */ protected abstract boolean readsWhiteSpaceString(); /** From dad9923329fd38f8ec0e6c6150975fa586b9336b Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 23 Aug 2023 12:13:01 +0200 Subject: [PATCH 16/18] Removed star test --- .../it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java | 1 - .../java/it/unive/lisa/analysis/string/fsa/TrimTest.java | 6 ------ 2 files changed, 7 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 29ca79951..bd7cb7962 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -466,7 +466,6 @@ private SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet(secondMapping.get(t.getSource()), firstMapping.get(s), diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java index 072993ae2..33ca8e085 100644 --- a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java @@ -23,10 +23,4 @@ public void test03() { SimpleAutomaton c = new SimpleAutomaton("a").union(new SimpleAutomaton(" b ")); assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); } - - @Test - public void test04() { - SimpleAutomaton c = new SimpleAutomaton(" a ").star(); - assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").star())); - } } From 12d3f43109fc3252cf4f304b4b7bb943766e3790 Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 23 Aug 2023 12:16:02 +0200 Subject: [PATCH 17/18] Javadoc --- .../unive/lisa/util/datastructures/regex/RegularExpression.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 32194cfc0..819de7af0 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -456,7 +456,7 @@ public String toString() { * Checks whether this regular expression recognize a string made just of * whitespaces. * - * @return {@true} if this regular expression recognize a string made just + * @return {@code true} if this regular expression recognize a string made just * of whitespaces, {@code false} otherwise. */ protected abstract boolean readsWhiteSpaceString(); From 83cc1c14ddb62944984b3a68e65003d36c3a6dba Mon Sep 17 00:00:00 2001 From: VincenzoArceri Date: Wed, 23 Aug 2023 12:20:08 +0200 Subject: [PATCH 18/18] Apply spotless --- .../lisa/util/datastructures/regex/RegularExpression.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 819de7af0..d61a57486 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -456,8 +456,8 @@ public String toString() { * Checks whether this regular expression recognize a string made just of * whitespaces. * - * @return {@code true} if this regular expression recognize a string made just - * of whitespaces, {@code false} otherwise. + * @return {@code true} if this regular expression recognize a string made + * just of whitespaces, {@code false} otherwise. */ protected abstract boolean readsWhiteSpaceString();