Skip to content

Commit

Permalink
Add text type.
Browse files Browse the repository at this point in the history
Signed-off-by: Yury-Fridlyand <[email protected]>
  • Loading branch information
Yury-Fridlyand committed Jul 19, 2023
1 parent f4883c9 commit 2fb9a2b
Show file tree
Hide file tree
Showing 18 changed files with 124 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_LONG;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_SHORT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_STRING;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TEXT;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIME;
import static org.opensearch.sql.expression.function.BuiltinFunctionName.CAST_TO_TIMESTAMP;

Expand Down Expand Up @@ -45,6 +46,7 @@ public class Cast extends UnresolvedExpression {
private static final Map<String, FunctionName> CONVERTED_TYPE_FUNCTION_NAME_MAP =
new ImmutableMap.Builder<String, FunctionName>()
.put("string", CAST_TO_STRING.getName())
.put("text", CAST_TO_TEXT.getName()) // TODO do we need this?
.put("byte", CAST_TO_BYTE.getName())
.put("short", CAST_TO_SHORT.getName())
.put("int", CAST_TO_INT.getName())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ public enum ExprCoreType implements ExprType {
/**
* String.
*/
STRING(UNDEFINED),
TEXT(UNDEFINED),
STRING(TEXT),
// TODO why not
//STRING(UNDEFINED),
//TEXT(STRING),

/**
* Boolean.
Expand Down
10 changes: 10 additions & 0 deletions core/src/main/java/org/opensearch/sql/data/type/ExprType.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,14 @@ default List<ExprType> getParent() {
default String legacyTypeName() {
return typeName();
}

// TODO doc
default String convertFieldForSearchQuery(String fieldName) {
return fieldName;

Check warning on line 70 in core/src/main/java/org/opensearch/sql/data/type/ExprType.java

View check run for this annotation

Codecov / codecov/patch

core/src/main/java/org/opensearch/sql/data/type/ExprType.java#L70

Added line #L70 was not covered by tests
}

// TODO doc
default Object convertValueForSearchQuery(ExprValue value) {
return value.value();

Check warning on line 75 in core/src/main/java/org/opensearch/sql/data/type/ExprType.java

View check run for this annotation

Codecov / codecov/patch

core/src/main/java/org/opensearch/sql/data/type/ExprType.java#L75

Added line #L75 was not covered by tests
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ public static int distance(ExprType type1, ExprType type2) {
}

private static int distance(ExprType type1, ExprType type2, int distance) {
if (type1 == type2) {
if (type1.equals(type2)) {
return distance;
} else if (type1 == UNKNOWN) {
} else if (type1.equals(UNKNOWN)) {
return IMPOSSIBLE_WIDENING;
} else {
return type1.getParent().stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ public enum BuiltinFunctionName {
* Data Type Convert Function.
*/
CAST_TO_STRING(FunctionName.of("cast_to_string")),
CAST_TO_TEXT(FunctionName.of("cast_to_text")),
CAST_TO_BYTE(FunctionName.of("cast_to_byte")),
CAST_TO_SHORT(FunctionName.of("cast_to_short")),
CAST_TO_INT(FunctionName.of("cast_to_int")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static org.opensearch.sql.data.type.ExprCoreType.LONG;
import static org.opensearch.sql.data.type.ExprCoreType.SHORT;
import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.TEXT;
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;
import static org.opensearch.sql.expression.function.FunctionDSL.impl;
Expand Down Expand Up @@ -51,6 +52,7 @@ public class TypeCastOperator {
*/
public static void register(BuiltinFunctionRepository repository) {
repository.register(castToString());
repository.register(castToText());
repository.register(castToByte());
repository.register(castToShort());
repository.register(castToInt());
Expand Down Expand Up @@ -78,6 +80,12 @@ private static DefaultFunctionResolver castToString() {
);
}

private static DefaultFunctionResolver castToText() {
return FunctionDSL.define(BuiltinFunctionName.CAST_TO_TEXT.getName(),
impl(nullMissingHandling(v -> v), TEXT, STRING)
);
}

private static DefaultFunctionResolver castToByte() {
return FunctionDSL.define(BuiltinFunctionName.CAST_TO_BYTE.getName(),
impl(nullMissingHandling(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ public void ifnullWithNullInputTest() {
+ " WHERE balance is null limit 2", "jdbc"));

verifySchema(response,
schema("IFNULL(null, firstname)", "IFNULL1", "keyword"),
schema("IFNULL(firstname, null)", "IFNULL2", "keyword"),
schema("IFNULL(null, firstname)", "IFNULL1", "text"),
schema("IFNULL(firstname, null)", "IFNULL2", "text"),
schema("IFNULL(null, null)", "IFNULL3", "byte"));
verifyDataRows(response,
rows("Hattie", "Hattie", LITERAL_NULL.value()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiConsumer;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.apache.commons.lang3.EnumUtils;
import org.opensearch.sql.data.type.ExprCoreType;
Expand All @@ -21,15 +22,51 @@
/**
* The extension of ExprType in OpenSearch.
*/
@EqualsAndHashCode
public class OpenSearchDataType implements ExprType, Serializable {

public boolean equals(final Object o) {
if (o == this) {
return true;
}
if (o instanceof ExprCoreType) {
return exprCoreType.equals(o);
}
if (!(o instanceof OpenSearchDataType)) {
return false;
}
return exprCoreType.equals(((OpenSearchDataType) o).exprCoreType);
}

public int hashCode() {
return 42 + exprCoreType.hashCode();
}

@Override
public List<ExprType> getParent() {
return exprCoreType == ExprCoreType.UNKNOWN
? List.of(ExprCoreType.UNKNOWN)
: exprCoreType.getParent();
}

@Override
public boolean shouldCast(ExprType other) {
ExprCoreType otherCoreType = other instanceof ExprCoreType ? (ExprCoreType) other
: (other instanceof OpenSearchDataType
? ((OpenSearchDataType) other).exprCoreType : ExprCoreType.UNKNOWN);
// TODO Copied from BuiltinFunctionRepository.isCastRequired
if (ExprCoreType.numberTypes().contains(exprCoreType)
&& ExprCoreType.numberTypes().contains(otherCoreType)) {
return false;
}
return exprCoreType == ExprCoreType.UNKNOWN || exprCoreType.shouldCast(other);
}

/**
* The mapping (OpenSearch engine) type.
*/
public enum MappingType {
Invalid(null, ExprCoreType.UNKNOWN),
Text("text", ExprCoreType.UNKNOWN),
Text("text", ExprCoreType.TEXT),
Keyword("keyword", ExprCoreType.STRING),
Ip("ip", ExprCoreType.UNKNOWN),
GeoPoint("geo_point", ExprCoreType.UNKNOWN),
Expand Down Expand Up @@ -64,7 +101,6 @@ public String toString() {
}
}

@EqualsAndHashCode.Exclude
@Getter
protected MappingType mappingType;

Expand Down Expand Up @@ -124,6 +160,9 @@ public static Map<String, OpenSearchDataType> parseMapping(Map<String, Object> i
return;
}
// create OpenSearchDataType

// TODO parse `fielddata`

result.put(k, OpenSearchDataType.of(
EnumUtils.getEnumIgnoreCase(OpenSearchDataType.MappingType.class, type),
innerMap)
Expand Down Expand Up @@ -212,15 +251,14 @@ protected OpenSearchDataType(ExprCoreType type) {
// For datatypes with properties (example: object and nested types)
// a read-only collection
@Getter
@EqualsAndHashCode.Exclude
Map<String, OpenSearchDataType> properties = ImmutableMap.of();

@Override
// Called when building TypeEnvironment and when serializing PPL response
public String typeName() {
// To avoid breaking changes return `string` for `typeName` call (PPL) and `text` for
// `legacyTypeName` call (SQL). See more: https://github.com/opensearch-project/sql/issues/1296
if (legacyTypeName().equals("TEXT")) {
if (legacyTypeName().equals("TEXT") || legacyTypeName().equals("KEYWORD")) {
return "STRING";
}
return legacyTypeName();
Expand Down Expand Up @@ -248,7 +286,7 @@ protected OpenSearchDataType cloneEmpty() {
/**
* Flattens mapping tree into a single layer list of objects (pairs of name-types actually),
* which don't have nested types.
* See {@link OpenSearchDataTypeTest#traverseAndFlatten() test} for example.
* See {@see OpenSearchDataTypeTest#traverseAndFlatten() test} for example.
* @param tree A list of `OpenSearchDataType`s - map between field name and its type.
* @return A list of all `OpenSearchDataType`s from given map on the same nesting level (1).
* Nested object names are prefixed by names of their host.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
import lombok.EqualsAndHashCode;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.time.FormatNames;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;

/**
* Date type with support for predefined and custom formats read from the index mapping.
*/
@EqualsAndHashCode(callSuper = true)
public class OpenSearchDateType extends OpenSearchDataType {

private static final OpenSearchDateType instance = new OpenSearchDateType();
Expand Down Expand Up @@ -403,4 +403,20 @@ protected OpenSearchDataType cloneEmpty() {
}
return OpenSearchDateType.of(String.join(" || ", formats));
}

@Override
public String typeName() {
return exprCoreType.toString();
}

@Override
public String legacyTypeName() {
return exprCoreType.toString();
}

@Override
public Object convertValueForSearchQuery(ExprValue value) {
// TODO fix for https://github.com/opensearch-project/sql/issues/1847
return value.timestampValue().toEpochMilli();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
package org.opensearch.sql.opensearch.data.type;

import static org.opensearch.sql.data.type.ExprCoreType.STRING;
import static org.opensearch.sql.data.type.ExprCoreType.UNKNOWN;
import static org.opensearch.sql.data.type.ExprCoreType.TEXT;

import com.google.common.collect.ImmutableMap;
import java.util.List;
import java.util.Map;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprType;

/**
Expand All @@ -24,18 +25,18 @@ public class OpenSearchTextType extends OpenSearchDataType {

// text could have fields
// a read-only collection
@EqualsAndHashCode.Exclude
@Getter
Map<String, OpenSearchDataType> fields = ImmutableMap.of();

private OpenSearchTextType() {
super(MappingType.Text);
exprCoreType = UNKNOWN;
exprCoreType = TEXT;
}

/**
* Constructs a Text Type using the passed in fields argument.
* @param fields The fields to be used to construct the text type.
* @return A new OpenSeachTextTypeObject
* @return A new OpenSearchTextType object
*/
public static OpenSearchTextType of(Map<String, OpenSearchDataType> fields) {
var res = new OpenSearchTextType();
Expand All @@ -57,24 +58,22 @@ public boolean shouldCast(ExprType other) {
return false;
}

public Map<String, OpenSearchDataType> getFields() {
return fields;
}

@Override
protected OpenSearchDataType cloneEmpty() {
return OpenSearchTextType.of(Map.copyOf(this.fields));
}

/**
* Text field doesn't have doc value (exception thrown even when you call "get")
* Limitation: assume inner field name is always "keyword".
*/
public static String convertTextToKeyword(String fieldName, ExprType fieldType) {
if (fieldType instanceof OpenSearchTextType
&& ((OpenSearchTextType) fieldType).getFields().size() > 0) {
return fieldName + ".keyword";
@Override
public String convertFieldForSearchQuery(String fieldName) {
if (fields.size() > 1) {
// TODO or pick first?
throw new RuntimeException("too many text fields");
}
if (fields.size() == 0) {
return fieldName;
}
return fieldName;
// TODO what if field is not a keyword
// https://github.com/opensearch-project/sql/issues/1112
return String.format("%s.%s", fieldName, fields.keySet().toArray()[0]);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public Map<String, ExprType> getFieldTypes() {
cachedFieldTypes = OpenSearchDataType.traverseAndFlatten(cachedFieldOpenSearchTypes)
.entrySet().stream().collect(
LinkedHashMap::new,
(map, item) -> map.put(item.getKey(), item.getValue().getExprType()),
(map, item) -> map.put(item.getKey(), item.getValue()/*.getExprType()*/),
Map::putAll);
}
return cachedFieldTypes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ public <T> T build(Expression expression, Function<String, T> fieldBuilder,
Function<Script, T> scriptBuilder) {
if (expression instanceof ReferenceExpression) {
String fieldName = ((ReferenceExpression) expression).getAttr();
return fieldBuilder.apply(
OpenSearchTextType.convertTextToKeyword(fieldName, expression.type()));
return fieldBuilder.apply(expression.type().convertFieldForSearchQuery(fieldName));
} else if (expression instanceof FunctionExpression
|| expression instanceof LiteralExpression) {
return scriptBuilder.apply(new Script(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.stream.Stream;

import org.apache.commons.lang3.tuple.Triple;
import org.opensearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
import org.opensearch.search.aggregations.bucket.composite.DateHistogramValuesSourceBuilder;
Expand Down Expand Up @@ -71,7 +73,8 @@ private CompositeValuesSourceBuilder<?> buildCompositeValuesSourceBuilder(
.missingOrder(missingOrder)
.order(sortOrder);
// Time types values are converted to LONG in ExpressionAggregationScript::execute
if (List.of(TIMESTAMP, TIME, DATE, DATETIME).contains(expr.getDelegated().type())) {
if (Stream.of(TIMESTAMP, TIME, DATE, DATETIME)
.anyMatch(t -> expr.getDelegated().type().equals(t))) {
sourceBuilder.userValuetypeHint(ValueType.LONG);
}
return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ private Environment<Expression, ExprValue> buildValueEnv(

private Object getDocValue(ReferenceExpression field,
Supplier<Map<String, ScriptDocValues<?>>> docProvider) {
String fieldName = OpenSearchTextType.convertTextToKeyword(field.getAttr(), field.type());
String fieldName = field.type().convertFieldForSearchQuery(field.getAttr());
ScriptDocValues<?> docValue = docProvider.get().get(fieldName);
if (docValue == null || docValue.isEmpty()) {
return null; // No way to differentiate null and missing from doc value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
public class LikeQuery extends LuceneQuery {
@Override
public QueryBuilder doBuild(String fieldName, ExprType fieldType, ExprValue literal) {
String field = OpenSearchTextType.convertTextToKeyword(fieldName, fieldType);
return createBuilder(field, literal.stringValue());
return createBuilder(
fieldType.convertFieldForSearchQuery(fieldName),
literal.stringValue());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ private ExprValue cast(FunctionExpression castFunction) {
return expr.valueOf();
}
})
// TODO CAST_TO_TEXT
.put(BuiltinFunctionName.CAST_TO_BYTE.getName(), expr -> {
if (ExprCoreType.numberTypes().contains(expr.type())) {
return new ExprByteValue(expr.valueOf().byteValue());
Expand Down
Loading

0 comments on commit 2fb9a2b

Please sign in to comment.