Skip to content

Commit

Permalink
Fixed trim abstract semantics for Tarsis and added some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
VincenzoArceri committed Jul 19, 2023
1 parent e8b5c34 commit 3fac166
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -457,17 +457,21 @@ private RegexAutomaton union(RegexAutomaton... automata) {
}

/**
* Yields an automaton that corresponds to the {@code n}-time concatenation of {@code this}.
* Yields an automaton that corresponds to the {@code n}-time concatenation
* of {@code this}.
*
* @param n the number of repetitions
* @return an automaton that corresponds to the {@code n}-time concatenation of {@code this}
*
* @return an automaton that corresponds to the {@code n}-time concatenation
* of {@code this}
*/
public RegexAutomaton repeat(long n) {
if (n == 0)
return emptyString();
return emptyString();
return toRegex().simplify().repeat(n).toAutomaton(this).minimize();
}

public RegexAutomaton trim() {
return toRegex().trim().toAutomaton(this);
return toRegex().simplify().trim().toAutomaton(this);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import it.unive.lisa.util.numeric.IntInterval;
import it.unive.lisa.util.numeric.MathNumber;
import it.unive.lisa.util.numeric.MathNumberConversionException;

import java.util.Objects;
import java.util.SortedSet;
import java.util.TreeSet;
Expand Down Expand Up @@ -424,7 +423,7 @@ else if (intv.interval.isFinite()) {
} else
return new Tarsis(a.repeat(intv.interval.getLow().toLong()).concat(a.star()));
}

public Tarsis trim() {
return new Tarsis(this.a.trim());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@

import static org.junit.Assert.assertTrue;

import org.junit.Test;

import it.unive.lisa.analysis.numeric.Interval;
import it.unive.lisa.util.datastructures.regex.Atom;
import it.unive.lisa.util.datastructures.regex.Or;
import it.unive.lisa.util.numeric.MathNumber;
import it.unive.lisa.util.numeric.MathNumberConversionException;
import org.junit.Test;

public class RepeatTest {

Expand All @@ -22,50 +21,52 @@ public void repeatSingleString() throws MathNumberConversionException {

// "abc".repeat(1) = "abc"
assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(abc));

// "abc".repeat(2) = "abcabc"
assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abcabc));

// "abc".repeat(0) = ""
assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr()));


// "abc".repeat([1,2]) = {"abc", "abcabc"}
assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(abc.union(abcabc)));

// "abc".repeat([0,+infty]) = (abc)*
assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star));
assertTrue(
a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc_star));

// "abc".repeat([1,+infty]) = abc(abc)*
assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(abc.concat(abc_star)));
assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton()
.isEqualTo(abc.concat(abc_star)));
}

@Test
public void repeatTwoStrings() throws MathNumberConversionException {

RegexAutomaton ab_or_cd = new Or(new Atom("ab"), new Atom("cd")).toAutomaton(RegexAutomaton.emptyLang());
RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd")).toAutomaton(RegexAutomaton.emptyLang());
RegexAutomaton abab_or_cdcd = new Or(new Atom("abab"), new Atom("cdcd"))
.toAutomaton(RegexAutomaton.emptyLang());

Tarsis a = new Tarsis(ab_or_cd);

// {"ab", "cd"}.repeat(1) = {"ab", "cd"}
assertTrue(a.repeat(new Interval(1, 1)).getAutomaton().isEqualTo(ab_or_cd));



// {"ab", "cd"}.repeat(2) = {"abab", "cdcd"}
assertTrue(a.repeat(new Interval(2, 2)).getAutomaton().isEqualTo(abab_or_cdcd));

// {"ab", "cd"}.repeat(0) = {""}
assertTrue(a.repeat(new Interval(0, 0)).getAutomaton().isEqualTo(RegexAutomaton.emptyStr()));


// {"ab", "cd"}.repeat([1,2]) = {"ab", "cd", "abab", "cdcd"}
assertTrue(a.repeat(new Interval(1, 2)).getAutomaton().isEqualTo(ab_or_cd.union(abab_or_cdcd)));

// {"ab", "cd"}.repeat([0,+infty]) = (ab|cd)*
assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.star()));
assertTrue(a.repeat(new Interval(MathNumber.ZERO, MathNumber.PLUS_INFINITY)).getAutomaton()
.isEqualTo(ab_or_cd.star()));

// {"ab", "cd"}.repeat([1,+infty]) = (ab|cd)(ab|cd)*
assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton().isEqualTo(ab_or_cd.concat(ab_or_cd.star())));
assertTrue(a.repeat(new Interval(MathNumber.ONE, MathNumber.PLUS_INFINITY)).getAutomaton()
.isEqualTo(ab_or_cd.concat(ab_or_cd.star())));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,53 @@

import static org.junit.Assert.assertTrue;

import org.junit.Test;

import it.unive.lisa.util.numeric.MathNumberConversionException;
import org.junit.Test;

public class TrimTest {

@Test
public void repeatSingleString() throws MathNumberConversionException {
public void trimSingleString() throws MathNumberConversionException {
RegexAutomaton abc = RegexAutomaton.string("abc");
RegexAutomaton ws = RegexAutomaton.string(" ");
RegexAutomaton epsilon = RegexAutomaton.string("");
RegexAutomaton abc_with_ws = RegexAutomaton.string(" a b c ");
RegexAutomaton empty_star = RegexAutomaton.string(" ").star();
RegexAutomaton comp1 = RegexAutomaton.string(" ").concat(RegexAutomaton.string(" abc"));
RegexAutomaton comp2 = RegexAutomaton.string(" a").concat(RegexAutomaton.string(" b "));
RegexAutomaton comp3 = RegexAutomaton.string(" abc ").concat(RegexAutomaton.string(" "));

// "abc".trim() = "abc"
Tarsis a = new Tarsis(abc);
assertTrue(a.trim().getAutomaton().isEqualTo(abc));

// " ".trim() = ""
// " ".trim() = ""
Tarsis b = new Tarsis(ws);
assertTrue(b.trim().getAutomaton().isEqualTo(epsilon));

// " a b c ".trim() = "a b c"
// " a b c ".trim() = "a b c"
Tarsis c = new Tarsis(abc_with_ws);
assertTrue(c.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c")));

// (" ")*.trim() = ""
// (" ")*.trim() = ""
Tarsis d = new Tarsis(empty_star);
assertTrue(d.trim().getAutomaton().isEqualTo(RegexAutomaton.emptyStr()));
// " " + " abc".trim() = "abc"

// " " + " abc".trim() = "abc"
Tarsis e = new Tarsis(comp1);
assertTrue(e.trim().getAutomaton().isEqualTo(abc));
// " a" + " b ".trim() = "a b "

// " a" + " b ".trim() = "a b"
Tarsis f = new Tarsis(comp2);
assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b ")));
assertTrue(f.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b")));

// " a b c "*.trim() = "a b c " + (" a b c ")* + " a b c"
Tarsis g = new Tarsis(abc_with_ws.star());
assertTrue(g.trim().getAutomaton().isEqualTo(RegexAutomaton.string("a b c ").concat(abc_with_ws.star())
.concat(RegexAutomaton.string(" a b c")).star()));

// " abc " + " ".trim() = "abc"
Tarsis h = new Tarsis(comp3);
assertTrue(h.trim().getAutomaton().isEqualTo(RegexAutomaton.string("abc")));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* this class have on their transitions
*/
public abstract class Automaton<A extends Automaton<A, T>, T extends TransitionSymbol<T>>
implements AutomataFactory<A, T> {
implements AutomataFactory<A, T> {

/**
* The states of this automaton.
Expand Down Expand Up @@ -1053,7 +1053,7 @@ public String prettyPrint() {

for (Transition<T> t : transitions)
result.append("\t").append(st).append(" [").append(t.getSymbol()).append("] -> ")
.append(t.getDestination()).append("\n");
.append(t.getDestination()).append("\n");
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package it.unive.lisa.util.datastructures.regex;

import java.util.HashSet;
import java.util.Set;

import it.unive.lisa.util.datastructures.automaton.AutomataFactory;
import it.unive.lisa.util.datastructures.automaton.Automaton;
import it.unive.lisa.util.datastructures.automaton.TransitionSymbol;
import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;

/**
* A {@link RegularExpression} representing a single string.
Expand Down Expand Up @@ -79,7 +79,7 @@ public RegularExpression simplify() {

@Override
public <A extends Automaton<A, T>,
T extends TransitionSymbol<T>> A toAutomaton(AutomataFactory<A, T> factory) {
T extends TransitionSymbol<T>> A toAutomaton(AutomataFactory<A, T> factory) {
return isEmpty() ? factory.emptyString() : factory.singleString(string);
}

Expand Down Expand Up @@ -199,4 +199,14 @@ public RegularExpression repeat(long n) {
public RegularExpression trim() {
return new Atom(this.string.trim());
}

@Override
public RegularExpression trimLeft() {
return new Atom(StringUtils.stripStart(this.string, null));
}

@Override
public RegularExpression trimRight() {
return new Atom(StringUtils.stripEnd(this.string, null));
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package it.unive.lisa.util.datastructures.regex;

import java.util.HashSet;
import java.util.Set;

import it.unive.lisa.util.datastructures.automaton.AutomataFactory;
import it.unive.lisa.util.datastructures.automaton.Automaton;
import it.unive.lisa.util.datastructures.automaton.TransitionSymbol;
import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString;
import java.util.HashSet;
import java.util.Set;

/**
* A {@link RegularExpression} representing the sequential composition of two
Expand Down Expand Up @@ -289,7 +288,7 @@ protected int compareToAux(RegularExpression other) {
return cmp;
return second.compareTo(other.asComp().second);
}

@Override
public RegularExpression repeat(long n) {
if (n == 0)
Expand All @@ -303,11 +302,32 @@ public RegularExpression repeat(long n) {

@Override
public RegularExpression trim() {

RegularExpression trimFirst = first.trim().simplify();
if (trimFirst.isEmpty())
RegularExpression trimLeftFirst = first.trimLeft().simplify();
if (trimLeftFirst.isEmpty())
return second.trim();

RegularExpression trimRightSecond = second.trimRight().simplify();
if (trimRightSecond.isEmpty())
return first.trim();

return new Comp(trimLeftFirst, trimRightSecond).simplify();
}

@Override
public RegularExpression trimLeft() {
RegularExpression trimLeftFirst = this.first.trimLeft().simplify();
if (trimLeftFirst.isEmpty())
return this.second.trimLeft();
else
return new Comp(trimLeftFirst, second);
}

@Override
public RegularExpression trimRight() {
RegularExpression trimRightSecond = this.second.trimRight().simplify();
if (trimRightSecond.isEmpty())
return this.first.trimRight();
else
return new Comp(trimFirst, second).simplify();
return new Comp(this.first, trimRightSecond);
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package it.unive.lisa.util.datastructures.regex;

import java.util.Collections;
import java.util.Set;

import it.unive.lisa.util.datastructures.automaton.AutomataFactory;
import it.unive.lisa.util.datastructures.automaton.Automaton;
import it.unive.lisa.util.datastructures.automaton.TransitionSymbol;
import java.util.Collections;
import java.util.Set;

/**
* A {@link RegularExpression} representing the empty set of strings.
Expand Down Expand Up @@ -142,4 +141,14 @@ public RegularExpression repeat(long n) {
public RegularExpression trim() {
return this;
}

@Override
public RegularExpression trimLeft() {
return this;
}

@Override
public RegularExpression trimRight() {
return this;
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package it.unive.lisa.util.datastructures.regex;

import java.util.HashSet;
import java.util.Set;

import it.unive.lisa.util.datastructures.automaton.AutomataFactory;
import it.unive.lisa.util.datastructures.automaton.Automaton;
import it.unive.lisa.util.datastructures.automaton.TransitionSymbol;
import java.util.HashSet;
import java.util.Set;

/**
* A {@link RegularExpression} representing an or between two other regular
Expand Down Expand Up @@ -288,4 +287,15 @@ public RegularExpression repeat(long n) {
public RegularExpression trim() {
return new Or(first.trim(), second.trim());
}

@Override
public RegularExpression trimLeft() {
return new Or(first.trimLeft(), second.trimLeft());

}

@Override
public RegularExpression trimRight() {
return new Or(first.trimRight(), second.trimRight());
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package it.unive.lisa.util.datastructures.regex;

import java.util.Set;
import java.util.stream.Collectors;

import it.unive.lisa.util.datastructures.automaton.AutomataFactory;
import it.unive.lisa.util.datastructures.automaton.Automaton;
import it.unive.lisa.util.datastructures.automaton.TransitionSymbol;
import it.unive.lisa.util.datastructures.regex.symbolic.SymbolicString;
import java.util.Set;
import java.util.stream.Collectors;

/**
* A regular expression that can be recognized by an {@link Automaton}, or that
Expand Down Expand Up @@ -312,8 +311,13 @@ public String toString() {
protected abstract Set<PartialSubstring> substringAux(int charsToSkip, int missingChars);

public abstract RegularExpression repeat(long n);

public abstract RegularExpression trim();

public abstract RegularExpression trimLeft();

public abstract RegularExpression trimRight();

/**
* Yields {@code true} if and only if this regular expression corresponds to
* the empty string or to no strings at all.
Expand Down
Loading

0 comments on commit 3fac166

Please sign in to comment.