diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java index 7433e4cce..77f7aab7f 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/FSA.java @@ -4,6 +4,7 @@ import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; import it.unive.lisa.analysis.nonrelational.value.BaseNonRelationalValueDomain; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.analysis.representation.DomainRepresentation; import it.unive.lisa.analysis.representation.StringRepresentation; import it.unive.lisa.analysis.string.ContainsCharProvider; @@ -21,6 +22,7 @@ import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; import java.util.HashSet; import java.util.Objects; import java.util.Set; @@ -417,4 +419,35 @@ public Satisfiability containsChar(char c) throws SemanticException { return Satisfiability.NOT_SATISFIED; } + + /** + * Yields a new FSA where trailing and leading whitespaces have been removed + * from {@code this}. + * + * @return a new FSA where trailing and leading whitespaces have been + * removed from {@code this} + */ + public FSA trim() { + if (isBottom() || isTop()) + return this; + return new FSA(this.a.trim()); + } + + /** + * Yields a new FSA instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param i the interval + * + * @return a new FSA instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ + public FSA repeat(Interval i) throws MathNumberConversionException { + if (isBottom() || isTop()) + return this; + return new FSA(this.a.repeat(i)); + } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java index 76cda7128..bd7cb7962 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/fsa/SimpleAutomaton.java @@ -1,12 +1,18 @@ package it.unive.lisa.analysis.string.fsa; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.util.datastructures.automaton.Automaton; import it.unive.lisa.util.datastructures.automaton.CyclicAutomatonException; import it.unive.lisa.util.datastructures.automaton.State; import it.unive.lisa.util.datastructures.automaton.Transition; import it.unive.lisa.util.datastructures.regex.Atom; import it.unive.lisa.util.datastructures.regex.RegularExpression; +import it.unive.lisa.util.numeric.IntInterval; +import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.Optional; import java.util.Set; import java.util.SortedSet; @@ -180,4 +186,302 @@ public SimpleAutomaton replace(SimpleAutomaton toReplace, SimpleAutomaton str) t result = result.union(new SimpleAutomaton(a.replace(b, c))); return result; } + + /** + * Yields a new automaton where leading whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where leading whitespaces have been removed from + * {@code this} + */ + public SimpleAutomaton trimLeft() { + SimpleAutomaton result = copy(); + + Set> toAdd = new HashSet<>(); + Set> toRemove = new HashSet<>(); + + while (true) { + for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())) + if (t.getSymbol().getSymbol().equals(" ")) { + toRemove.add(t); + toAdd.add(new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); + } + + result.removeTransitions(toRemove); + result.getTransitions().addAll(toAdd); + + result.minimize(); + + boolean b = false; + for (Transition t : result.getOutgoingTransitionsFrom(result.getInitialState())) { + if (t.getSymbol().getSymbol().equals(" ")) + b = true; + } + + if (!b) + break; + } + + return result; + } + + /** + * Yields a new automaton where trailing whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where trailing whitespaces have been removed from + * {@code this} + */ + public SimpleAutomaton trimRight() { + + SimpleAutomaton result = copy(); + + Set> toAdd = new HashSet<>(); + Set> toRemove = new HashSet<>(); + + while (true) { + + for (State qf : result.getFinalStates()) { + for (Transition t : result.getIngoingTransitionsFrom(qf)) + if (t.getSymbol().getSymbol().equals(" ")) { + toRemove.add(t); + toAdd.add( + new Transition(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); + } + } + + result.removeTransitions(toRemove); + result.getTransitions().addAll(toAdd); + + result.minimize(); + + boolean b = false; + for (State qf : result.getFinalStates()) { + + for (Transition t : result.getIngoingTransitionsFrom(qf)) { + if (t.getSymbol().equals(new StringSymbol(" "))) + b = true; + } + } + + if (!b) + break; + } + + return result; + } + + /** + * Yields a new automaton where leading and trailing whitespaces have been + * removed from {@code this}. + * + * @return a new automaton where leading trailing whitespaces have been + * removed from {@code this} + */ + public SimpleAutomaton trim() { + return this.toRegex().trimRight().simplify().trimLeft().simplify().toAutomaton(this); + } + + /** + * Yields a new automaton instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param i the interval + * + * @return a new automaton instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ + public SimpleAutomaton repeat(Interval i) throws MathNumberConversionException { + if (equals(emptyString())) + return this; + else if (hasCycle()) + return star(); + + MathNumber high = i.interval.getHigh(); + MathNumber low = i.interval.getLow(); + SimpleAutomaton epsilon = emptyString(); + + if (low.isMinusInfinity()) { + if (high.isPlusInfinity()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + + if (high.isZero()) + return emptyString(); + else + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + } + + long lowInt = low.toLong(); + + if (high.isPlusInfinity()) { + // need exception + if (lowInt < 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + if (low.isZero()) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + if (lowInt > 0) + return epsilon.union(auxRepeat(i.interval, getInitialState(), new TreeSet>(), + emptyLanguage())); + } + + long highInt = high.toLong(); + + if (lowInt < 0) { + if (highInt < 0) + return emptyLanguage(); + + if (high.isZero()) + return emptyString(); + else + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + } + + if (low.isZero()) { + if (high.isZero()) + return emptyString(); + + if (highInt > 0) + return epsilon.union(auxRepeat(new IntInterval(MathNumber.ONE, high), getInitialState(), + new TreeSet>(), emptyLanguage())); + } + + if (lowInt > 0 && highInt > 0) + return auxRepeat(i.interval, getInitialState(), new TreeSet>(), emptyLanguage()); + + return emptyLanguage(); + } + + private SimpleAutomaton auxRepeat(IntInterval i, State currentState, SortedSet> delta, + SimpleAutomaton result) throws MathNumberConversionException { + + if (currentState.isFinal()) { + + SortedSet states = new TreeSet<>(); + + for (State s : getStates()) { + if (!s.equals(currentState)) + states.add(new State(s.getId(), s.isInitial(), false)); + else + states.add(new State(s.getId(), s.isInitial(), s.isFinal())); + } + + SimpleAutomaton temp = new SimpleAutomaton(states, delta); + SimpleAutomaton tempResult = temp.copy(); + + for (long k = 1; k < i.getLow().toLong(); k++) + tempResult = tempResult.connectAutomaton(temp, tempResult.getFinalStates(), false); + + if (i.getHigh().isPlusInfinity()) { + tempResult = tempResult.connectAutomaton(temp.copy().star(), tempResult.getFinalStates(), true); + } else { + for (long k = i.getLow().toLong(); k < i.getHigh().toLong(); k++) + tempResult = tempResult.connectAutomaton(temp, tempResult.getFinalStates(), true); + } + + tempResult = tempResult.minimize(); + result = result.union(tempResult); + + } + + for (Transition t : getOutgoingTransitionsFrom(currentState)) { + SortedSet> clone = new TreeSet>(delta); + clone.add(t); + result = auxRepeat(i, t.getDestination(), clone, result).union(result); + } + + return result; + } + + private SimpleAutomaton connectAutomaton(SimpleAutomaton second, SortedSet connectOn, boolean b) { + SortedSet> delta = new TreeSet<>(); + SortedSet states = new TreeSet<>(); + HashMap firstMapping = new HashMap<>(); + HashMap secondMapping = new HashMap<>(); + int c = 0; + + if (equals(emptyString())) { + return second; + } + + if (second.equals(emptyString())) { + return this; + } + + for (State s : getStates()) { + State newState = null; + if (b) { + newState = new State(c++, s.isInitial(), s.isFinal()); + } else { + if (connectOn.contains(s)) { + newState = new State(c++, s.isInitial(), false); + } else { + newState = new State(c++, s.isInitial(), s.isFinal()); + } + } + states.add(newState); + firstMapping.put(s, newState); + } + + for (Transition t : getTransitions()) { + delta.add(new Transition<>(firstMapping.get(t.getSource()), firstMapping.get(t.getDestination()), + t.getSymbol())); + } + + if (second.getStates().size() == 1 && second.getInitialState().isFinal()) { + for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { + for (State s : connectOn) { + State newState = new State(firstMapping.get(s).getId(), s.isInitial(), true); + states.remove(firstMapping.get(s)); + states.add(newState); + delta.add(new Transition<>(newState, newState, t.getSymbol())); + } + second.minimize(); + } + } else { + for (State s : second.getStates()) { + State newState = new State(c++, s.isInitial(), s.isFinal()); + states.add(newState); + secondMapping.put(s, newState); + } + + states.remove(secondMapping.get(second.getInitialState())); + secondMapping.remove(second.getInitialState()); + + for (Transition t : second.getTransitions()) { + if (t.getSource().equals(second.getInitialState()) + || t.getDestination().equals(second.getInitialState()) + || !secondMapping.containsKey(t.getSource()) || !secondMapping.containsKey(t.getDestination())) + continue; + + if (!t.getSource().isInitial() && !t.getDestination().isInitial()) { + delta.add(new Transition<>(secondMapping.get(t.getSource()), secondMapping.get(t.getDestination()), + t.getSymbol())); + } + if (t.getSource().isInitial()) { + for (State s : connectOn) { + if (states.contains(secondMapping.get(t.getSource()))) + delta.add(new Transition<>(secondMapping.get(t.getSource()), firstMapping.get(s), + t.getSymbol())); + } + } + } + + for (Transition t : second.getOutgoingTransitionsFrom(second.getInitialState())) { + for (State s : connectOn) { + delta.add(new Transition<>(firstMapping.get(s), secondMapping.get(t.getDestination()), + t.getSymbol())); + } + } + } + + return new SimpleAutomaton(states, delta); + } } \ No newline at end of file diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java index 33a522f0d..fafe115a8 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/RegexAutomaton.java @@ -183,7 +183,7 @@ public RegexAutomaton unknownString() { @Override public RegexAutomaton emptyLanguage() { - return emptyLanguage(); + return emptyLang(); } @Override @@ -455,4 +455,52 @@ private RegexAutomaton union(RegexAutomaton... automata) { return result; } + + /** + * Yields an automaton that corresponds to the {@code n}-time concatenation + * of {@code this}. + * + * @param n the number of repetitions + * + * @return an automaton that corresponds to the {@code n}-time concatenation + * of {@code this} + */ + public RegexAutomaton repeat(long n) { + if (n == 0) + return emptyString(); + return toRegex().simplify().repeat(n).toAutomaton(this).minimize(); + } + + /** + * Yields a new automaton where leading whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where leading whitespaces have been removed from + * {@code this} + */ + public RegexAutomaton trimLeft() { + return this.toRegex().trimLeft().simplify().toAutomaton(this); + } + + /** + * Yields a new automaton where trailing whitespaces have been removed from + * {@code this}. + * + * @return a new automaton where trailing whitespaces have been removed from + * {@code this} + */ + public RegexAutomaton trimRight() { + return this.toRegex().trimRight().simplify().toAutomaton(this); + } + + /** + * Yields a new automaton where trailing and leading whitespaces have been + * removed from {@code this}. + * + * @return a new automaton where trailing and leading whitespaces have been + * removed from {@code this} + */ + public RegexAutomaton trim() { + return this.toRegex().trimRight().simplify().trimLeft().simplify().toAutomaton(this); + } } diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java index 07c7ef706..2229c9873 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/StringReplacer.java @@ -122,7 +122,8 @@ public RegexAutomaton replace(String toReplace, RegexAutomaton str, boolean must } private RegexAutomaton emptyStringReplace(RegexAutomaton str) { - AtomicInteger counter = new AtomicInteger(); + int maxId = origin.getStates().stream().mapToInt(s -> s.getId()).max().getAsInt(); + AtomicInteger counter = new AtomicInteger(maxId + 1); SortedSet states = new TreeSet<>(); SortedSet> delta = new TreeSet<>(); diff --git a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java index 7700eaee8..723223a80 100644 --- a/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java +++ b/lisa/lisa-analyses/src/main/java/it/unive/lisa/analysis/string/tarsis/Tarsis.java @@ -5,6 +5,7 @@ import it.unive.lisa.analysis.SemanticDomain.Satisfiability; import it.unive.lisa.analysis.SemanticException; import it.unive.lisa.analysis.nonrelational.value.BaseNonRelationalValueDomain; +import it.unive.lisa.analysis.numeric.Interval; import it.unive.lisa.analysis.representation.DomainRepresentation; import it.unive.lisa.analysis.representation.StringRepresentation; import it.unive.lisa.analysis.string.ContainsCharProvider; @@ -25,6 +26,7 @@ import it.unive.lisa.util.datastructures.regex.TopAtom; import it.unive.lisa.util.numeric.IntInterval; import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; import java.util.Objects; import java.util.SortedSet; import java.util.TreeSet; @@ -352,6 +354,9 @@ public Tarsis concat(Tarsis other) { * @return the domain instance containing the replaced automaton */ public Tarsis replace(Tarsis search, Tarsis repl) { + if (isBottom() || search.isBottom() || repl.isBottom()) + return bottom(); + try { return new Tarsis(this.a.replace(search.a, repl.a)); } catch (CyclicAutomatonException e) { @@ -385,8 +390,8 @@ public FSA toFSA() { new StringSymbol(t.getSymbol().toString()))); else { for (char c = 32; c <= 123; c++) - fsaDelta.add(new Transition<>(t.getSource(), t.getSource(), new StringSymbol(c))); - fsaDelta.add(new Transition<>(t.getSource(), t.getSource(), StringSymbol.EPSILON)); + fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), new StringSymbol(c))); + fsaDelta.add(new Transition<>(t.getSource(), t.getDestination(), StringSymbol.EPSILON)); } } @@ -404,4 +409,49 @@ public Satisfiability containsChar(char c) throws SemanticException { return satisfiesBinaryExpression(StringContains.INSTANCE, this, new Tarsis(RegexAutomaton.string(String.valueOf(c))), null); } + + /** + * Yields a new Tarsis's instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv}. + * + * @param intv the interval + * + * @return a new Tarsis's instance recognizing each string of {@code this} + * automaton repeated k-times, with k belonging to {@code intv} + * + * @throws MathNumberConversionException if {@code intv} is iterated but is + * not finite + */ + public Tarsis repeat(Interval intv) throws MathNumberConversionException { + if (isBottom()) + return this; + else if (intv.isTop() || a.hasCycle()) + return new Tarsis(a.star()); + else if (intv.interval.isFinite()) { + if (intv.interval.isSingleton()) + return new Tarsis(a.repeat(intv.interval.getHigh().toLong())); + else { + RegexAutomaton result = a.emptyLanguage(); + + for (Long i : intv.interval) + result = result.union(a.repeat(i)); + return new Tarsis(result); + } + } else + return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star())); + } + + /** + * Yields a new Tarsis's instance where trailing and leading whitespaces + * have been removed from {@code this}. + * + * @return a new Tarsis's instance where trailing and leading whitespaces + * have been removed from {@code this} + */ + public Tarsis trim() { + if (isBottom() || isTop()) + return this; + + return new Tarsis(this.a.trim()); + } } diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java new file mode 100644 index 000000000..ff4abbc62 --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/RepeatTest.java @@ -0,0 +1,34 @@ +package it.unive.lisa.analysis.string.fsa; + +import static org.junit.Assert.assertEquals; + +import it.unive.lisa.analysis.numeric.Interval; +import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; + +public class RepeatTest { + + @Test + public void repeatTest001() throws MathNumberConversionException { + SimpleAutomaton a = new SimpleAutomaton("a"); + assertEquals(a.repeat(new Interval(2, 2)), new SimpleAutomaton("aa")); + } + + @Test + public void repeatTest002() throws MathNumberConversionException { + SimpleAutomaton a = new SimpleAutomaton("alpha"); + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("alphaalphaalpha")); + } + + @Test + public void repeatTest003() throws MathNumberConversionException { + SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); + assertEquals(a.repeat(new Interval(3, 3)), new SimpleAutomaton("ababab").union(new SimpleAutomaton("cdcdcd"))); + } + + @Test + public void repeatTest004() throws MathNumberConversionException { + SimpleAutomaton a = new SimpleAutomaton("ab").union(new SimpleAutomaton("cd")); + assertEquals(a.repeat(new Interval(0, 0)), a.emptyString()); + } +} diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java new file mode 100644 index 000000000..33ca8e085 --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/fsa/TrimTest.java @@ -0,0 +1,26 @@ +package it.unive.lisa.analysis.string.fsa; + +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +public class TrimTest { + + @Test + public void test01() { + SimpleAutomaton a = new SimpleAutomaton(" a "); + assertTrue(a.trim().isEqualTo(new SimpleAutomaton("a"))); + } + + @Test + public void test02() { + SimpleAutomaton b = new SimpleAutomaton(" a ").union(new SimpleAutomaton(" b ")); + assertTrue(b.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); + } + + @Test + public void test03() { + SimpleAutomaton c = new SimpleAutomaton("a").union(new SimpleAutomaton(" b ")); + assertTrue(c.trim().isEqualTo(new SimpleAutomaton("a").union(new SimpleAutomaton("b")))); + } +} diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java new file mode 100644 index 000000000..2e5c4ed01 --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/RepeatTest.java @@ -0,0 +1,72 @@ +package it.unive.lisa.analysis.string.tarsis; + +import static org.junit.Assert.assertTrue; + +import it.unive.lisa.analysis.numeric.Interval; +import it.unive.lisa.util.datastructures.regex.Atom; +import it.unive.lisa.util.datastructures.regex.Or; +import it.unive.lisa.util.numeric.MathNumber; +import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; + +public class RepeatTest { + + @Test + public void repeatSingleString() throws MathNumberConversionException { + RegexAutomaton abc = RegexAutomaton.string("abc"); + RegexAutomaton abcabc = RegexAutomaton.string("abcabc"); + RegexAutomaton abc_star = RegexAutomaton.string("abc").star(); + + Tarsis a = new Tarsis(abc); + + // "abc".repeat(1) = "abc" + assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(abc)); + + // "abc".repeat(2) = "abcabc" + assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abcabc)); + + // "abc".repeat(0) = "" + assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + // "abc".repeat([1,2]) = {"abc", "abcabc"} + assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(abc.union(abcabc))); + + // "abc".repeat([0,+infty]) = (abc)* + assertTrue( + a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star)); + + // "abc".repeat([1,+infty]) = abc(abc)* + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(abc.concat(abc_star))); + } + + @Test + public void repeatTwoStrings() throws MathNumberConversionException { + + RegexAutomaton ab_or_cd = new Or(new Atom("ab"), new Atom("cd")).toAutomaton(RegexAutomaton.emptyLang()); + RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")) + .toAutomaton(RegexAutomaton.emptyLang()); + + Tarsis a = new Tarsis(ab_or_cd); + + // {"ab", "cd"}.repeat(1) = {"ab", "cd"} + assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(ab_or_cd)); + + // {"ab", "cd"}.repeat(2) = {"abab", "cdcd"} + assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abab_or_cdcd)); + + // {"ab", "cd"}.repeat(0) = {""} + assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + // {"ab", "cd"}.repeat([1,2]) = {"ab", "cd", "abab", "cdcd"} + assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(ab_or_cd.union(abab_or_cdcd))); + + // {"ab", "cd"}.repeat([0,+infty]) = (ab|cd)* + assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.star())); + + // {"ab", "cd"}.repeat([1,+infty]) = (ab|cd)(ab|cd)* + assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton() + .isEqualTo(ab_or_cd.concat(ab_or_cd.star()))); + } +} diff --git a/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java new file mode 100644 index 000000000..992d15fe8 --- /dev/null +++ b/lisa/lisa-analyses/src/test/java/it/unive/lisa/analysis/string/tarsis/TrimTest.java @@ -0,0 +1,49 @@ +package it.unive.lisa.analysis.string.tarsis; + +import static org.junit.Assert.assertTrue; + +import it.unive.lisa.util.numeric.MathNumberConversionException; +import org.junit.Test; + +public class TrimTest { + + @Test + public void trimSingleString() throws MathNumberConversionException { + RegexAutomaton abc = RegexAutomaton.string("abc"); + RegexAutomaton ws = RegexAutomaton.string(" "); + RegexAutomaton epsilon = RegexAutomaton.string(""); + RegexAutomaton abc_with_ws = RegexAutomaton.string(" a b c "); + RegexAutomaton empty_star = RegexAutomaton.string(" ").star(); + RegexAutomaton comp1 = RegexAutomaton.string(" ").concat(RegexAutomaton.string(" abc")); + RegexAutomaton comp2 = RegexAutomaton.string(" a").concat(RegexAutomaton.string(" b ")); + RegexAutomaton comp3 = RegexAutomaton.string(" abc ").concat(RegexAutomaton.string(" ")); + + // "abc".trim() = "abc" + Tarsis a = new Tarsis(abc); + assertTrue(a.trim().getAutomaton().isEqualTo(abc)); + + // " ".trim() = "" + Tarsis b = new Tarsis(ws); + assertTrue(b.trim().getAutomaton().isEqualTo(epsilon)); + + // " a b c ".trim() = "a b c" + Tarsis c = new Tarsis(abc_with_ws); + assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c"))); + + // (" ")*.trim() = "" + Tarsis d = new Tarsis(empty_star); + assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr())); + + // " " + " abc".trim() = "abc" + Tarsis e = new Tarsis(comp1); + assertTrue(e.trim().getAutomaton().isEqualTo(abc)); + + // " a" + " b ".trim() = "a b" + Tarsis f = new Tarsis(comp2); + assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b"))); + + // " abc " + " ".trim() = "abc" + Tarsis g = new Tarsis(comp3); + assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc"))); + } +} diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java index 83484a8a5..2627c66be 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/automaton/Transition.java @@ -28,6 +28,8 @@ public final class Transition> implements Comparab * transition. */ public Transition(State source, State destination, T symbol) { + Objects.requireNonNull(source); + Objects.requireNonNull(destination); this.source = source; this.destination = destination; this.symbol = symbol; diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java index 05109736e..fd8f92ae7 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Atom.java @@ -6,6 +6,7 @@ import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString; import java.util.HashSet; import java.util.Set; +import org.apache.commons.lang3.StringUtils; /** * A {@link RegularExpression} representing a single string. @@ -181,4 +182,37 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return string.compareTo(other.asAtom().string); } + + @Override + public RegularExpression repeat(long n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (long i = 0; i < n; i++) + r = new Comp(r, this); + r.simplify(); + return r; + } + + @Override + public RegularExpression trimLeft() { + String trimLeft = StringUtils.stripStart(this.string, null); + if (trimLeft.isEmpty()) + return Atom.EPSILON; + return new Atom(trimLeft); + } + + @Override + public RegularExpression trimRight() { + String trimRight = StringUtils.stripEnd(this.string, null); + if (trimRight.isEmpty()) + return Atom.EPSILON; + return new Atom(trimRight); + } + + @Override + protected boolean readsWhiteSpaceString() { + return string.trim().isEmpty(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java index 976822e88..7555a8308 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Comp.java @@ -31,7 +31,7 @@ public final class Comp extends RegularExpression { * @param first the first regular expression * @param second the second regular expression */ - Comp(RegularExpression first, RegularExpression second) { + public Comp(RegularExpression first, RegularExpression second) { this.first = first; this.second = second; } @@ -288,4 +288,50 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asComp().second); } + + @Override + public RegularExpression repeat(long n) { + if (n == 0) + return Atom.EPSILON; + + RegularExpression r = Atom.EPSILON; + for (long i = 0; i < n; i++) + r = new Comp(r, this); + return r.simplify(); + } + + @Override + public RegularExpression trimLeft() { + RegularExpression trimLeftFirst = first.trimLeft().simplify(); + // it means that first reads just whitespaces strings (L(first) + // \subseteq { }*) + if (trimLeftFirst.isEmpty()) + return this.second.trimLeft(); + // first reads at least a whitespace string + else if (first.readsWhiteSpaceString()) + return new Or(new Comp(trimLeftFirst, second), second.trimLeft()); +// return new Comp(trimLeftFirst, new Or(second, second.trimLeft())); + else + return new Comp(trimLeftFirst, second); + } + + @Override + public RegularExpression trimRight() { + RegularExpression trimRightSecond = second.trimRight().simplify(); + // it means that second reads just whitespaces strings (L(second) + // \subseteq { }*) + if (trimRightSecond.isEmpty()) + return this.first.trimRight(); + // second reads at least a whitespace string + else if (second.readsWhiteSpaceString()) + return new Or(new Comp(first, trimRightSecond), first.trimRight()); +// return new Comp(new Or(first, first.trimRight()), trimRightSecond); + else + return new Comp(this.first, trimRightSecond); + } + + @Override + protected boolean readsWhiteSpaceString() { + return first.readsWhiteSpaceString() && second.readsWhiteSpaceString(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java index f9aff003b..b5b07d41f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/EmptySet.java @@ -131,4 +131,24 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return 0; } + + @Override + public RegularExpression repeat(long n) { + return this; + } + + @Override + public RegularExpression trimLeft() { + return this; + } + + @Override + public RegularExpression trimRight() { + return this; + } + + @Override + protected boolean readsWhiteSpaceString() { + return false; + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java index f8ef614ef..734c3740f 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Or.java @@ -30,7 +30,7 @@ public final class Or extends RegularExpression { * @param first the first regular expression * @param second the second regular expression */ - Or(RegularExpression first, RegularExpression second) { + public Or(RegularExpression first, RegularExpression second) { // make things deterministic: order the branches if (first.compareTo(second) <= 0) { this.first = first; @@ -277,4 +277,25 @@ protected int compareToAux(RegularExpression other) { return cmp; return second.compareTo(other.asOr().second); } + + @Override + public RegularExpression repeat(long n) { + return new Or(first.repeat(n), second.repeat(n)).simplify(); + } + + @Override + public RegularExpression trimLeft() { + return new Or(first.trimLeft(), second.trimLeft()); + + } + + @Override + public RegularExpression trimRight() { + return new Or(first.trimRight(), second.trimRight()); + } + + @Override + protected boolean readsWhiteSpaceString() { + return first.readsWhiteSpaceString() || second.readsWhiteSpaceString(); + } } diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java index 111dbe9c1..d61a57486 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/RegularExpression.java @@ -310,6 +310,35 @@ public String toString() { */ protected abstract Set substringAux(int charsToSkip, int missingChars); + /** + * Yields a new regular expression corresponding to the {@code n}-repetition + * of {@code this}. + * + * @param n number of repetitions + * + * @return a new regular expression corresponding to the + * {@code n}-repetition of {@code this} + */ + public abstract RegularExpression repeat(long n); + + /** + * Yields a new regular expression where leading whitespaces have been + * removed from {@code this}. + * + * @return a new regular expression where leading whitespaces have been + * removed from {@code this} + */ + public abstract RegularExpression trimLeft(); + + /** + * Yields a new regular expression where trailing whitespaces have been + * removed from {@code this}. + * + * @return a new regular expression where trailing whitespaces have been + * removed from {@code this} + */ + public abstract RegularExpression trimRight(); + /** * Yields {@code true} if and only if this regular expression corresponds to * the empty string or to no strings at all. @@ -423,6 +452,15 @@ public String toString() { */ public abstract RegularExpression[] explode(); + /** + * Checks whether this regular expression recognize a string made just of + * whitespaces. + * + * @return {@code true} if this regular expression recognize a string made + * just of whitespaces, {@code false} otherwise. + */ + protected abstract boolean readsWhiteSpaceString(); + /** * Builds the Kleene closure of this regular expression ({@link Star}), * recognizing {@code this} zero or more times. diff --git a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java index d0ff6e2fd..71a198f5d 100644 --- a/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java +++ b/lisa/lisa-sdk/src/main/java/it/unive/lisa/util/datastructures/regex/Star.java @@ -22,7 +22,7 @@ public final class Star extends RegularExpression { * * @param op the inner regular expression */ - Star(RegularExpression op) { + public Star(RegularExpression op) { this.op = op; } @@ -234,4 +234,30 @@ public RegularExpression[] explode() { protected int compareToAux(RegularExpression other) { return op.compareTo(other.asStar().op); } + + @Override + public RegularExpression repeat(long n) { + return this; + } + + @Override + public RegularExpression trimLeft() { + RegularExpression trimLeft = op.trimLeft().simplify(); + if (trimLeft.isEmpty()) + return Atom.EPSILON; + return new Or(Atom.EPSILON, new Comp(trimLeft, this)); + } + + @Override + public RegularExpression trimRight() { + RegularExpression trimRight = op.trimRight(); + if (trimRight.isEmpty()) + return Atom.EPSILON; + return new Or(Atom.EPSILON, new Comp(this, trimRight)); + } + + @Override + protected boolean readsWhiteSpaceString() { + return op.readsWhiteSpaceString(); + } }