From dbb0c48969ba9cc067a637053f0b99c7ea46260c Mon Sep 17 00:00:00 2001 From: "v.arceri" Date: Thu, 6 Jul 2023 16:43:33 +0200 Subject: [PATCH] Abstract semantics for repeat in Tarsis --- .../string/tarsis/RegexAutomaton.java | 11 +++++++++++ .../lisa/util/datastructures/regex/Atom.java | 19 ++++++++++++++++--- .../lisa/util/datastructures/regex/Comp.java | 17 +++++++++++++++-- .../util/datastructures/regex/EmptySet.java | 10 ++++++++-- .../lisa/util/datastructures/regex/Or.java | 10 ++++++++-- .../regex/RegularExpression.java | 7 +++++-- .../lisa/util/datastructures/regex/Star.java | 10 ++++++++-- .../util/datastructures/regex/TopAtom.java | 5 +++-- 8 files changed, 74 insertions(+), 15 deletions(-) diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 33a522f0d..70a635483 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -455,4 +455,15 @@ private RegexAutomaton union(RegexAutomaton... automata) { return result; } + + /** + * Yields an automaton that corresponds to the {@code n}-time concatenation of {@code this}. + * @param n the number of repetitions + * @return an automaton that corresponds to the {@code n}-time concatenation of {@code this} + */ + public RegexAutomaton repeat(int n) { + if (n == 0) + return emptyString(); + return toRegex().repeat(n).toAutomaton(this); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 05109736e..8c865188d 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a single string. @@ -78,7 +79,7 @@ public RegularExpression simplify() { @Override public , - T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { + T extends TransitionSymbol> A toAutomaton(AutomataFactory factory) { return isEmpty() ? factory.emptyString() : factory.singleString(string); } @@ -181,4 +182,16 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return string.compareTo(other.asAtom().string); } + + @Override + public RegularExpression repeat(int n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (int i = 0; i < n; i++) + r = new Comp(r, this); + r.simplify(); + return r; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 976822e88..e90e5dd5f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing the sequential composition of two @@ -288,4 +289,16 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asComp().second); } + + @Override + public RegularExpression repeat(int n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (int i = 0; i < n; i++) + r = new Comp(r, this); + r.simplify(); + return r; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index f9aff003b..5cc07edd7 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -1,10 +1,11 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.Collections; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; -import java.util.Collections; -import java.util.Set; /** * A {@link RegularExpression} representing the empty set of strings. @@ -131,4 +132,9 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return 0; } + + @Override + public RegularExpression repeat(int n) { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index f8ef614ef..d1d601557 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -1,10 +1,11 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing an or between two other regular @@ -277,4 +278,9 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asOr().second); } + + @Override + public RegularExpression repeat(int n) { + return new Or(first.repeat(n), second.repeat(n)); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 111dbe9c1..abafb5a68 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.Set; +import java.util.stream.Collectors; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.Set; -import java.util.stream.Collectors; /** * A regular expression that can be recognized by an {@link Automaton}, or that @@ -310,6 +311,8 @@ public String toString() { */ protected abstract Set substringAux(int charsToSkip, int missingChars); + public abstract RegularExpression repeat(int n); + /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index d0ff6e2fd..8fd977fac 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a loop, repeated an arbitrary number @@ -234,4 +235,9 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return op.compareTo(other.asStar().op); } + + @Override + public RegularExpression repeat(int n) { + return this; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java index 446c1a867..1c476303a 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/TopAtom.java @@ -1,11 +1,12 @@ package it.unive.lisa.util.datastructures.regex; +import java.util.HashSet; +import java.util.Set; + import it.unive.lisa.util.datastructures.automaton.AutomataFactory; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.TransitionSymbol; import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; -import java.util.HashSet; -import java.util.Set; /** * A {@link RegularExpression} representing a sequence of unknown characters of