Skip to content

Commit 8b21baf

Browse files
Refined String-escaping behaviour (#22)
## What is the goal of this PR? During the recent refactor of Graql grammar, we accidentally the lexer for `STRING_` to be too strict: we disallowed `/` character. The reason for this was that the unescape-and-escape behaviour over `/` was not symmetrical. `unescape('/') -> '/'` and `escape('/') -> '\/'`, which means the user reads back characters that they did not write, and that's not acceptable. However, we need to bring back `/` characters in strings (issue #21) because it is widely used. During the investigation, we discovered that there is no need for Graql to be escaping/unescaping strings that are provided by the user in the first place. Given that the String that is parsed by ANTLR is already a valid String, we can store it "as is" (with the exception for Regular Expression, that still requires a special escape operation). I.e. what the user writes, is what the user reads, for any Unicode character accepted by ANTLR. Fixing the above also resulted in fixing our tests to expect the correct behaviour from Graql. ## What are the changes implemented in this PR? 1) Relaxed Graql grammar to accept `/` (fixes #21) 2) Remove `escapeString()` and `unescapeString()` from `StringUtil`, and all it's usage. 3) Introduced `escapeRegex()` and `escapeRegex()` in `StringUtil`, and used them in parsing and printing regex. 4) Fixed tests to comply with the newly refined behaviour of Graql string and regex parsing/printing. 5) Additionally, we reimplement `StringUtils.repeat()` with a simple `spaces(int len)` in `SyntaxError`, so we can remove the dependency to `org.apache.commons.lang`
1 parent 2dbf9b5 commit 8b21baf

File tree

14 files changed

+80
-61
lines changed

14 files changed

+80
-61
lines changed

dependencies/maven/artifacts/commons-lang/BUILD

-11
This file was deleted.

dependencies/maven/dependencies.bzl

-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ def list_dependencies():
7272
{"artifact": "com.google.errorprone:error_prone_annotations:2.0.18", "lang": "java", "sha1": "5f65affce1684999e2f4024983835efc3504012e", "sha256": "cb4cfad870bf563a07199f3ebea5763f0dec440fcda0b318640b1feaa788656b", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/errorprone/error_prone_annotations/2.0.18/error_prone_annotations-2.0.18.jar", "source": {"sha1": "220c1232fa6d13b427c10ccc1243a87f5f501d31", "sha256": "dbe7b49dd0584704d5c306b4ac7273556353ea3c0c6c3e50adeeca8df47047be", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/errorprone/error_prone_annotations/2.0.18/error_prone_annotations-2.0.18-sources.jar"} , "name": "com-google-errorprone-error_prone_annotations", "actual": "@com-google-errorprone-error_prone_annotations//jar", "bind": "jar/com/google/errorprone/error-prone-annotations"},
7373
{"artifact": "com.google.guava:guava:23.0", "lang": "java", "sha1": "c947004bb13d18182be60077ade044099e4f26f1", "sha256": "7baa80df284117e5b945b19b98d367a85ea7b7801bd358ff657946c3bd1b6596", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/guava/guava/23.0/guava-23.0.jar", "source": {"sha1": "ed233607c5c11e1a13a3fd760033ed5d9fe525c2", "sha256": "37fe8ba804fb3898c3c8f0cbac319cc9daa58400e5f0226a380ac94fb2c3ca14", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/guava/guava/23.0/guava-23.0-sources.jar"} , "name": "com-google-guava-guava", "actual": "@com-google-guava-guava//jar", "bind": "jar/com/google/guava/guava"},
7474
{"artifact": "com.google.j2objc:j2objc-annotations:1.1", "lang": "java", "sha1": "ed28ded51a8b1c6b112568def5f4b455e6809019", "sha256": "2994a7eb78f2710bd3d3bfb639b2c94e219cedac0d4d084d516e78c16dddecf6", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1.jar", "source": {"sha1": "1efdf5b737b02f9b72ebdec4f72c37ec411302ff", "sha256": "2cd9022a77151d0b574887635cdfcdf3b78155b602abc89d7f8e62aba55cfb4f", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1-sources.jar"} , "name": "com-google-j2objc-j2objc-annotations", "actual": "@com-google-j2objc-j2objc-annotations//jar", "bind": "jar/com/google/j2objc/j2objc-annotations"},
75-
{"artifact": "commons-lang:commons-lang:2.6", "lang": "java", "sha1": "0ce1edb914c94ebc388f086c6827e8bdeec71ac2", "sha256": "50f11b09f877c294d56f24463f47d28f929cf5044f648661c0f0cfbae9a2f49c", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/commons-lang/commons-lang/2.6/commons-lang-2.6.jar", "source": {"sha1": "67313d715fbf0ea4fd0bdb69217fb77f807a8ce5", "sha256": "66c2760945cec226f26286ddf3f6ffe38544c4a69aade89700a9a689c9b92380", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/commons-lang/commons-lang/2.6/commons-lang-2.6-sources.jar"} , "name": "commons-lang-commons-lang", "actual": "@commons-lang-commons-lang//jar", "bind": "jar/commons-lang/commons-lang"},
7675
{"artifact": "org.antlr:antlr4-runtime:4.7.1", "lang": "java", "sha1": "946f8aa9daa917dd81a8b818111bec7e288f821a", "sha256": "43516d19beae35909e04d06af6c0c58c17bc94e0070c85e8dc9929ca640dc91d", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/antlr/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1.jar", "source": {"sha1": "1e68e18aa14f3229b95820d354a594846134af38", "sha256": "a33d52d0d64e68c60d5e3ae2c1098fe7200d57cff59032c19930fd9d487fc7d4", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/antlr/antlr4-runtime/4.7.1/antlr4-runtime-4.7.1-sources.jar"} , "name": "org-antlr-antlr4-runtime", "actual": "@org-antlr-antlr4-runtime//jar", "bind": "jar/org/antlr/antlr4-runtime"},
7776
{"artifact": "org.codehaus.mojo:animal-sniffer-annotations:1.14", "lang": "java", "sha1": "775b7e22fb10026eed3f86e8dc556dfafe35f2d5", "sha256": "2068320bd6bad744c3673ab048f67e30bef8f518996fa380033556600669905d", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14.jar", "source": {"sha1": "886474da3f761d39fcbb723d97ecc5089e731f42", "sha256": "d821ae1f706db2c1b9c88d4b7b0746b01039dac63762745ef3fe5579967dd16b", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14-sources.jar"} , "name": "org-codehaus-mojo-animal-sniffer-annotations", "actual": "@org-codehaus-mojo-animal-sniffer-annotations//jar", "bind": "jar/org/codehaus/mojo/animal-sniffer-annotations"},
7877
{"artifact": "org.hamcrest:hamcrest-core:1.3", "lang": "java", "sha1": "42a25dc3219429f0e5d060061f71acb49bf010a0", "sha256": "66fdef91e9739348df7a096aa384a5685f4e875584cce89386a7a47251c4d8e9", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar", "source": {"sha1": "1dc37250fbc78e23a65a67fbbaf71d2e9cbc3c0b", "sha256": "e223d2d8fbafd66057a8848cc94222d63c3cedd652cc48eddc0ab5c39c0f84df", "repository": "https://repo.maven.apache.org/maven2/", "url": "https://repo.maven.apache.org/maven2/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar"} , "name": "org-hamcrest-hamcrest-core", "actual": "@org-hamcrest-hamcrest-core//jar", "bind": "jar/org/hamcrest/hamcrest-core"},

dependencies/maven/dependencies.yaml

-5
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,6 @@ dependencies:
4343
version: "23.0"
4444
lang: java
4545

46-
commons-lang:
47-
commons-lang:
48-
version: "2.6"
49-
lang: java
50-
5146
org.antlr:
5247
antlr4-runtime:
5348
version: "4.7.1" # sync version with @antlr4_runtime//jar

grammar/Graql.g4

+2-2
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ DATE : 'date' ;
302302
BOOLEAN_ : TRUE | FALSE ; // order of lexer declaration matters
303303
TRUE : 'true' ;
304304
FALSE : 'false' ;
305-
STRING_ : '"' (~["\\/] | ESCAPE_SEQ_ )* '"'
306-
| '\'' (~['\\/] | ESCAPE_SEQ_ )* '\'' ;
305+
STRING_ : '"' (~["\\] | ESCAPE_SEQ_ )* '"'
306+
| '\'' (~['\\] | ESCAPE_SEQ_ )* '\'' ;
307307
INTEGER_ : ('+' | '-')? [0-9]+ ;
308308
REAL_ : ('+' | '-')? [0-9]+ '.' [0-9]+ ;
309309
DATE_ : DATE_FRAGMENT_ ;

java/BUILD

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ java_library(
2626
srcs = ["//grammar:graql-java"] + glob(["**/*.java"], exclude = ["**/test/**"]),
2727
deps = [
2828
# External dependencies
29-
"//dependencies/maven/artifacts/commons-lang:commons-lang",
3029
"//dependencies/maven/artifacts/com/google/guava:guava",
3130
"//dependencies/maven/artifacts/com/google/code/findbugs:jsr305",
3231
"//dependencies/maven/artifacts/org/antlr:antlr4-runtime", # sync version with @antlr4_runtime//jar

java/parser/Parser.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import static graql.lang.Graql.not;
6868
import static graql.lang.Graql.type;
6969
import static graql.lang.util.Collections.triple;
70+
import static graql.lang.util.StringUtil.unescapeRegex;
7071
import static java.util.stream.Collectors.toList;
7172

7273
/**
@@ -973,9 +974,7 @@ public ValueProperty.Operation<?> visitComparison(GraqlParser.ComparisonContext
973974

974975
@Override
975976
public String visitRegex(GraqlParser.RegexContext ctx) {
976-
// Remove surrounding /.../
977-
String unquoted = unquoteString(ctx.STRING_());
978-
return unquoted.replaceAll("\\\\/", "/");
977+
return unescapeRegex(unquoteString(ctx.STRING_()));
979978
}
980979

981980
@Override
@@ -1022,8 +1021,7 @@ public Object visitLiteral(GraqlParser.LiteralContext ctx) {
10221021

10231022
private String getString(TerminalNode string) {
10241023
// Remove surrounding quotes
1025-
String unquoted = unquoteString(string);
1026-
return StringUtil.unescapeString(unquoted);
1024+
return unquoteString(string);
10271025
}
10281026

10291027
private String unquoteString(TerminalNode string) {

java/parser/SyntaxError.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
package graql.lang.parser;
2020

2121
import graql.lang.exception.ErrorMessage;
22-
import org.apache.commons.lang.StringUtils;
2322

2423
public class SyntaxError {
2524

@@ -38,6 +37,15 @@ public SyntaxError(String queryLine, int line, int charPositionInLine, String ms
3837
this.msg = msg;
3938
}
4039

40+
private String spaces(int len) {
41+
char ch = ' ';
42+
char[] output = new char[len];
43+
for (int i = len - 1; i >= 0; i--) {
44+
output[i] = ch;
45+
}
46+
return new String(output);
47+
}
48+
4149
@Override
4250
public String toString() {
4351
if (queryLine == null) {
@@ -49,7 +57,7 @@ public String toString() {
4957
// match $
5058
// ^
5159
// blah blah antlr blah
52-
String pointer = StringUtils.repeat(" ", charPositionInLine) + "^";
60+
String pointer = spaces(charPositionInLine) + "^";
5361
return ErrorMessage.SYNTAX_ERROR.getMessage(line, queryLine, pointer, msg);
5462
}
5563
}

java/parser/test/ParserTest.java

+42-9
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,15 @@ public void testSimpleQuery() {
9999
assertQueryEquals(expected, parsed, query);
100100
}
101101

102+
@Test
103+
public void testParseStringWithSlash() {
104+
String query = "match $x isa person, has name 'alice/bob'; get;";
105+
GraqlGet parsed = Graql.parse(query).asGet();
106+
GraqlGet expected = match(var("x").isa("person").has("name", "alice/bob")).get();
107+
108+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
109+
}
110+
102111
@Test
103112
public void testRelationQuery() {
104113
String query = "match\n" +
@@ -664,12 +673,13 @@ public void testDefineDataTypeQuery() {
664673

665674
@Test
666675
public void testEscapeString() {
667-
String unescaped = "This has \"double quotes\" and a single-quoted backslash: '\\'";
668-
String escaped = "This has \\\"double quotes\\\" and a single-quoted backslash: \\'\\\\\\'";
676+
// ANTLR will see this as a string that looks like:
677+
// "This has \"double quotes\" and a single-quoted backslash: '\\'"
678+
String input = "This has \\\"double quotes\\\" and a single-quoted backslash: \\'\\\\\\'";
669679

670-
String query = "insert $_ isa movie, has title \"" + escaped + "\";";
680+
String query = "insert $_ isa movie, has title \"" + input + "\";";
671681
GraqlInsert parsed = Graql.parse(query).asInsert();
672-
GraqlInsert expected = insert(var().isa("movie").has("title", unescaped));
682+
GraqlInsert expected = insert(var().isa("movie").has("title", input));
673683

674684
assertQueryEquals(expected, parsed, query);
675685
}
@@ -1096,29 +1106,52 @@ public void whenParsingAggregateWithWrongName_Throw() {
10961106
parse("match $x isa name; get; hello $x;");
10971107
}
10981108

1109+
@Test
1110+
public void regexAttributeProperty() {
1111+
String query = "define digit sub attribute, regex '\\d';";
1112+
GraqlDefine parsed = parse(query);
1113+
GraqlDefine expected = define(type("digit").sub("attribute").regex("\\d"));
1114+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
1115+
}
1116+
10991117
@Test
11001118
public void regexPredicateParsesCharacterClassesCorrectly() {
1101-
assertEquals(match(var("x").like("\\d")).get(), parse("match $x like '\\d'; get;"));
1119+
String query = "match $x like '\\d'; get;";
1120+
GraqlGet parsed = parse(query);
1121+
GraqlGet expected = match(var("x").like("\\d")).get();
1122+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
11021123
}
11031124

11041125
@Test
11051126
public void regexPredicateParsesQuotesCorrectly() {
1106-
assertEquals(match(var("x").like("\"")).get(), parse("match $x like '\"'; get;"));
1127+
String query = "match $x like '\\\"'; get;";
1128+
GraqlGet parsed = parse(query);
1129+
GraqlGet expected = match(var("x").like("\\\"")).get();
1130+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
11071131
}
11081132

11091133
@Test
11101134
public void regexPredicateParsesBackslashesCorrectly() {
1111-
assertEquals(match(var("x").like("\\\\")).get(), parse("match $x like '\\\\'; get;"));
1135+
String query = "match $x like '\\\\'; get;";
1136+
GraqlGet parsed = parse(query);
1137+
GraqlGet expected = match(var("x").like("\\\\")).get();
1138+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
11121139
}
11131140

11141141
@Test
11151142
public void regexPredicateParsesNewlineCorrectly() {
1116-
assertEquals(match(var("x").like("\\n")).get(), parse("match $x like '\\n'; get;"));
1143+
String query = "match $x like '\\n'; get;";
1144+
GraqlGet parsed = parse(query);
1145+
GraqlGet expected = match(var("x").like("\\n")).get();
1146+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
11171147
}
11181148

11191149
@Test
11201150
public void regexPredicateParsesForwardSlashesCorrectly() {
1121-
assertEquals(match(var("x").like("/")).get(), parse("match $x like '\\/'; get;"));
1151+
String query = "match $x like '\\/'; get;";
1152+
GraqlGet parsed = parse(query);
1153+
GraqlGet expected = match(var("x").like("/")).get();
1154+
assertQueryEquals(expected, parsed, query.replace("'", "\""));
11221155
}
11231156

11241157
@Test

java/pattern/Disjunction.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818

1919
package graql.lang.pattern;
2020

21-
import com.google.common.collect.ImmutableSet;
2221
import com.google.common.collect.Sets;
2322
import graql.lang.Graql;
2423
import graql.lang.statement.Statement;
2524
import graql.lang.statement.Variable;
25+
import graql.lang.util.Collections;
2626

2727
import javax.annotation.CheckReturnValue;
2828
import java.util.Iterator;
@@ -78,7 +78,7 @@ public Disjunction<Conjunction<Pattern>> getNegationDNF() {
7878

7979
@Override
8080
public Set<Variable> variables() {
81-
return getPatterns().stream().map(Pattern::variables).reduce(Sets::intersection).orElse(ImmutableSet.of());
81+
return getPatterns().stream().map(Pattern::variables).reduce(Sets::intersection).orElse(Collections.set());
8282
}
8383

8484
@Override

java/pattern/Pattern.java

+3
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,7 @@ default Set<Statement> statements() {
7979
*/
8080
@CheckReturnValue
8181
default Negation<?> asNegation(){ throw new UnsupportedOperationException(); }
82+
83+
@Override
84+
String toString();
8285
}

java/property/RegexProperty.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020

2121
import graql.lang.Graql;
2222
import graql.lang.statement.StatementType;
23-
import graql.lang.util.StringUtil;
23+
24+
import static graql.lang.util.StringUtil.escapeRegex;
25+
import static graql.lang.util.StringUtil.quoteString;
2426

2527
/**
2628
* Represents the {@code regex} property on a AttributeType. This property can be queried and inserted.
@@ -49,7 +51,7 @@ public String keyword() {
4951

5052
@Override
5153
public String property() {
52-
return "\"" + StringUtil.escapeString(regex()) + "\"";
54+
return quoteString(escapeRegex(regex()));
5355
}
5456

5557
@Override

java/property/ValueProperty.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
import java.time.LocalDateTime;
2828
import java.util.stream.Stream;
2929

30+
import static graql.lang.util.StringUtil.escapeRegex;
31+
import static graql.lang.util.StringUtil.quoteString;
32+
3033
/**
3134
* Represents the {@code value} property on an attribute.
3235
* This property can be queried or inserted.
@@ -243,9 +246,9 @@ public java.lang.String toString() {
243246

244247
operation.append(comparator()).append(Graql.Token.Char.SPACE);
245248
if (comparator().equals(Graql.Token.Comparator.LIKE)) {
246-
operation.append("\"").append(value().replaceAll("/", "\\\\/")).append("\"");
249+
operation.append(quoteString(escapeRegex(value())));
247250
} else {
248-
operation.append(StringUtil.quoteString(value()));
251+
operation.append(quoteString(value()));
249252
}
250253

251254
return operation.toString();

java/query/test/GraqlQueryTest.java

+4-5
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,14 @@ public void testInsertQueryToString() {
116116

117117
@Test
118118
public void testEscapeStrings() {
119-
assertEquals("insert $x \"hello\\nworld\";", Graql.insert(var("x").val("hello\nworld")).toString());
119+
assertEquals("insert $x \"hello\nworld\";", Graql.insert(var("x").val("hello\nworld")).toString());
120+
assertEquals("insert $x \"hello\\nworld\";", Graql.insert(var("x").val("hello\\nworld")).toString());
120121
}
121122

122123
@Test
123124
public void testQuoteIds() {
124-
assertEquals(
125-
"match $a (\"hello\\tworld\");",
126-
match(var("a").rel(type("hello\tworld"))).toString()
127-
);
125+
assertEquals("match $a (\"hello\tworld\");", match(var("a").rel(type("hello\tworld"))).toString());
126+
assertEquals("match $a (\"hello\\tworld\");", match(var("a").rel(type("hello\\tworld"))).toString());
128127
}
129128

130129
@Test

java/util/StringUtil.java

+5-14
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
import graql.grammar.GraqlLexer;
2222
import graql.lang.Graql;
23-
import org.apache.commons.lang.StringEscapeUtils;
2423

2524
import java.text.DecimalFormat;
2625
import java.text.DecimalFormatSymbols;
@@ -39,28 +38,20 @@ public class StringUtil {
3938
Arrays.asList("min", "max", "median", "mean", "std", "sum", "count", "path", "cluster", "degrees", "members", "persist")
4039
);
4140

42-
/**
43-
* @param string the string to unescape
44-
* @return the unescaped string, replacing any backslash escapes with the real characters
45-
*/
46-
public static String unescapeString(String string) {
47-
return StringEscapeUtils.unescapeJavaScript(string);
41+
public static String unescapeRegex(String regex) {
42+
return regex.replaceAll("\\\\/", "/");
4843
}
4944

50-
/**
51-
* @param string the string to escape
52-
* @return the escaped string, replacing any escapable characters with backslashes
53-
*/
54-
public static String escapeString(String string) {
55-
return StringEscapeUtils.escapeJavaScript(string);
45+
public static String escapeRegex(String regex) {
46+
return regex.replaceAll("/", "\\\\/");
5647
}
5748

5849
/**
5950
* @param string a string to quote and escape
6051
* @return a string, surrounded with double quotes and escaped
6152
*/
6253
public static String quoteString(String string) {
63-
return "\"" + escapeString(string) + "\"";
54+
return "\"" + string + "\"";
6455
}
6556

6657
public static String escapeLabelOrId(String value) {

0 commit comments

Comments
 (0)