diff --git a/pom.xml b/pom.xml index f502287ab..0469d0df7 100644 --- a/pom.xml +++ b/pom.xml @@ -91,7 +91,7 @@ vtl-engine vtl-envs vtl-meta - vtl-parser + vtl-parsers vtl-samples vtl-session vtl-transform @@ -141,7 +141,6 @@ 3.12.1 2.3.4 - 2.2.4 1.12.0 2.0.12 @@ -163,7 +162,7 @@ 4.0.5 3.1.2.RELEASE v20.12.2 - 4.13.1 + 4.9.3 1.0.57 5.3.31 30.1.1-jre @@ -303,10 +302,10 @@ it.bancaditalia.oss.vtl - vtl-parser - parser-js + vtl-jsparser ${project.version} zip + grammar it.bancaditalia.oss.vtl @@ -540,7 +539,7 @@ com.esotericsoftware kryo - 4.0.2 + 4.0.3 test @@ -587,11 +586,6 @@ reflow-velocity-tools ${reflow.version} - - org.asciidoctor - asciidoctor-maven-plugin - ${asciidoctor.version} - org.apache.maven.doxia doxia-module-markdown @@ -834,14 +828,10 @@ SCRIPT_STYLE - ASCIIDOC_STYLE JAVADOC_STYLE XML_STYLE true - - license-files/asciidoc-format.xml - ${project.organization.name} @@ -920,11 +910,6 @@ 18881 ${project.build.directory}/site - - - ${project.basedir}/vtl-bundles/vtl-rest/target/generated-snippets - - diff --git a/src/site/asciidoc/compile.adoc b/src/site/asciidoc/compile.adoc deleted file mode 100644 index 489238200..000000000 --- a/src/site/asciidoc/compile.adoc +++ /dev/null @@ -1,61 +0,0 @@ -//// - // Copyright © 2020 Banca D'Italia - // - // Licensed under the EUPL, Version 1.2 (the "License"); - // You may not use this work except in compliance with the - // License. - // You may obtain a copy of the License at: - // - // https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt - // - // Unless required by applicable law or agreed to in - // writing, software distributed under the License is - // distributed on an "AS IS" basis, - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - // express or implied. - // - // See the License for the specific language governing - // permissions and limitations under the License. -//// -=== `/compile` POST Method === - -Compiles a snippet of VTL code in a new session. - -Returns the uuid of the created session. - -==== HTTP Request ==== - -===== Request fields ===== - -include::{rest-snippets}/compile/request-parameters.adoc[] - -==== HTTP Response ==== - -===== Response fields ===== - -include::{rest-snippets}/compile/response-fields.adoc[] - -===== Response cookies ===== - -include::{rest-snippets}/compile/response-headers.adoc[] - -===== Sample response body ===== - -include::{rest-snippets}/compile/response-body.adoc[] - -==== Invocation examples ==== - -===== Curl command ===== - -include::{rest-snippets}/compile/curl-request.adoc[] - -===== http command ===== - -include::{rest-snippets}/compile/httpie-request.adoc[] - -==== Sample HTTP dump ==== - -include::{rest-snippets}/compile/http-request.adoc[] - -include::{rest-snippets}/compile/http-response.adoc[] - diff --git a/src/site/asciidoc/metadata.adoc b/src/site/asciidoc/metadata.adoc deleted file mode 100644 index 619310b75..000000000 --- a/src/site/asciidoc/metadata.adoc +++ /dev/null @@ -1,59 +0,0 @@ -//// - // Copyright © 2020 Banca D'Italia - // - // Licensed under the EUPL, Version 1.2 (the "License"); - // You may not use this work except in compliance with the - // License. - // You may obtain a copy of the License at: - // - // https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt - // - // Unless required by applicable law or agreed to in - // writing, software distributed under the License is - // distributed on an "AS IS" basis, - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - // express or implied. - // - // See the License for the specific language governing - // permissions and limitations under the License. -//// -=== `/metadata` GET Method === - -Returns the metadata associated with a given object defined in a given session. - -==== HTTP Request ==== - -===== Request fields ===== - -include::{rest-snippets}/metadata/request-parameters.adoc[] - -===== Request cookies ===== - -include::{rest-snippets}/metadata/request-headers.adoc[] - -==== HTTP Response ==== - -===== Response fields ===== - -include::{rest-snippets}/metadata/response-fields.adoc[] - -===== Sample response body ===== - -include::{rest-snippets}/metadata/response-body.adoc[] - -==== Invocation examples ==== - -===== Curl command ===== - -include::{rest-snippets}/metadata/curl-request.adoc[] - -===== http command ===== - -include::{rest-snippets}/metadata/httpie-request.adoc[] - -==== Sample request and response dump ==== - -include::{rest-snippets}/metadata/http-request.adoc[] - -include::{rest-snippets}/metadata/http-response.adoc[] - diff --git a/src/site/asciidoc/resolve.adoc b/src/site/asciidoc/resolve.adoc deleted file mode 100644 index c5028e2b9..000000000 --- a/src/site/asciidoc/resolve.adoc +++ /dev/null @@ -1,59 +0,0 @@ -//// - // Copyright © 2020 Banca D'Italia - // - // Licensed under the EUPL, Version 1.2 (the "License"); - // You may not use this work except in compliance with the - // License. - // You may obtain a copy of the License at: - // - // https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt - // - // Unless required by applicable law or agreed to in - // writing, software distributed under the License is - // distributed on an "AS IS" basis, - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - // express or implied. - // - // See the License for the specific language governing - // permissions and limitations under the License. -//// -=== `/resolve` GET Method === - -Returns the value of a given object defined in a given session. - -==== HTTP Request ==== - -===== Request fields ===== - -include::{rest-snippets}/resolve/request-parameters.adoc[] - -===== Request cookies ===== - -include::{rest-snippets}/resolve/request-headers.adoc[] - -==== HTTP Response ==== - -===== Response fields ===== - -include::{rest-snippets}/resolve/response-fields.adoc[] - -===== Sample response body ===== - -include::{rest-snippets}/resolve/response-body.adoc[] - -==== Invocation examples ==== - -===== Curl command ===== - -include::{rest-snippets}/resolve/curl-request.adoc[] - -===== http command ===== - -include::{rest-snippets}/resolve/httpie-request.adoc[] - -==== HTTP request and response dump ==== - -include::{rest-snippets}/resolve/http-request.adoc[] - -include::{rest-snippets}/resolve/http-response.adoc[] - diff --git a/src/site/markdown/rest-setup.md.vm b/src/site/markdown/rest-setup.md.vm deleted file mode 100644 index 9603742a6..000000000 --- a/src/site/markdown/rest-setup.md.vm +++ /dev/null @@ -1,109 +0,0 @@ -#* - * Copyright © 2020 Banca D'Italia - * - * Licensed under the EUPL, Version 1.2 (the "License"); - * You may not use this work except in compliance with the - * License. - * You may obtain a copy of the License at: - * - * https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt - * - * Unless required by applicable law or agreed to in - * writing, software distributed under the License is - * distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * - * See the License for the specific language governing - * permissions and limitations under the License. - *# -#set($h1 = '#') -#set($h2 = '##') -#set($h3 = '###') -#set($h4 = '####') -#set($h5 = '#####') -#set($RCODE = '```R') -#set($CSTART = '```') -#set($CEND = '```') - -$h1 Setting up VTL RESTful services - -$h2 Deploying the RESTful services web application - -To deploy the application, acquire the war package either by Maven or from the -[GitHub releases](https://github.com/vpinna80/VTL/releases/latest) page. - -To download the package with Maven into Tomcat deployment folder, use: - -$CSTART -$ mvn dependency:copy -Dmdep.stripVersion=true \ - -Dartifact=it.bancaditalia.oss.vtl:vtl-rest:${project.version}:war \ - -DoutputDirectory=/path/to/tomcat/deploy -$CEND - -After the package has been downloaded, Tomcat should unpack and start it automatically. - -Tomcat usually deploys applications under a path that starts with the war file name, so, if the -file was download by Maven, the entry point for the services should be in this case `/vtl-rest/`. - -$h2 System requirements - -The requirements of VTL RESTful services are: - -* Java 8+ -* [Web Container](https://en.wikipedia.org/wiki/Web_container#List_of_Servlet_containers) - compatible with Servlet 3.1 -* Internet connection (optional) - -Please note that [Jakarta EE technology](https://en.wikipedia.org/wiki/Jakarta_EE) is not required, -although the services application could be run in any of the compatible containers. - -$h2 Particular configurations - -Some of the VTL engine features require an internet connection. - -If you are behind a proxy server, you may need additional configuration to enable these features. - -$h3 CSV Environment - -Please note that CSV Environment accesses files located on the container host. - -Although it fails if the requested file is not in a CSV format, it may pose a security risk. - -If this would be the case to a production environment, it can be disabled by setting the -[Java system property](apidocs/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.html#ENVIRONMENT_IMPLEMENTATION). - -This property must be set to a sequence of class names, separated by commas, which must implement -the [`Environment`](apidocs/it/bancaditalia/oss/vtl/environment/Environment.html) interface. - -Available implementation classes are found in the Java package -[`it.bancaditalia.oss.vtl.impl.environment`](apidocs/it/bancaditalia/oss/vtl/impl/environment/package-summary.html). - -**NOTE**: In order for the engine to work correctly, at least one of the chosen classes must -also implement the [`Workspace`](apidocs/it/bancaditalia/oss/vtl/environment/Workspace.html) interface. - -$h3 SDMX Environment - -To enable the use of SDMX environment by the web services, you must follow -[this document](https://github.com/amattioc/SDMX/wiki/System-Configuration#network-settings) -to setup a properties file, which should contain your proxy settings, and then link it with an -[environment variable](https://github.com/amattioc/SDMX/wiki/System-Configuration#configuration-file-location). - -$h3 Choose a VTL metadata repository implementation - -This [Java system property](apidocs/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.html#METADATA_REPOSITORY) -allows you to choose which implementation class you want to use as a metadata repository. - -There are three available implementations, [described here](documentation.html#editor-config_toc_metadata-repository). - -The default implementation is the In-Memory repository. To choose a different one, you can add -a system property in your container configuration. - -It must point to a class that implements the [`MetadataRepository`](apidocs/it/bancaditalia/oss/vtl/session/MetadataRepository.html) -interface, such as: - -* [it.bancaditalia.oss.vtl.impl.domains.CSVMetadataRepository](apidocs/it/bancaditalia/oss/vtl/impl/domains/CSVMetadataRepository.html) -* [it.bancaditalia.oss.vtl.impl.domains.SDMXMetadataRepository](apidocs/it/bancaditalia/oss/vtl/impl/domains/CSVMetadataRepository.html) - -If you want to use the SDMX Metadata Repository, you might also need to set standard -[Java proxy properties](https://docs.oracle.com/javase/8/docs/technotes/guides/net/proxies.html). diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManager.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManager.java index fd1c9dc32..df4839f4c 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManager.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManager.java @@ -37,7 +37,7 @@ * Interface for the service used by the application to load and provide implementations * for the various VTL Engine components. * - * Instances of this interface are provided by {@link ConfigurationManagerFactory#getInstance()}. + * Instances of this interface are provided by {@link ConfigurationManagerFactory#newManager()}. * * @author Valentino Pinna */ diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManagerFactory.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManagerFactory.java index 94c5011a8..37726ddac 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManagerFactory.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/ConfigurationManagerFactory.java @@ -68,7 +68,7 @@ public static void loadConfiguration(Reader input) throws IOException /** * @return an application-wide {@link ConfigurationManager} instance. */ - public static ConfigurationManager getInstance() + public static ConfigurationManager newManager() { if (INSTANCE != null) return INSTANCE; diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.java index 97bc28e2e..a238891d6 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLGeneralProperties.java @@ -22,6 +22,7 @@ import static java.util.stream.Collectors.joining; import java.util.Arrays; +import java.util.Objects; import it.bancaditalia.oss.vtl.engine.Engine; import it.bancaditalia.oss.vtl.environment.Environment; @@ -114,9 +115,9 @@ public String getValue() } @Override - public void setValue(String newValue) + public void setValue(Object newValue) { - value = newValue; + value = Objects.toString(newValue); } @Override diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLProperty.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLProperty.java index 69513bbfe..865a62ced 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLProperty.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/config/VTLProperty.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.List; +import java.util.Objects; /** * This interface provides access to the configuration properties of the VTL Engine components. @@ -46,11 +47,11 @@ public interface VTLProperty public String getValue(); /** - * Change the value for this property + * Change the value for this property. {@link Objects#toString()} is used to determine the object representation. * - * @param newValue The new value for this property + * @param newValue The new value for this property. */ - public void setValue(String newValue); + public void setValue(Object newValue); /** * @return A description of the property diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/DataSet.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/DataSet.java index b04488090..5810d8c38 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/DataSet.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/DataSet.java @@ -19,11 +19,6 @@ */ package it.bancaditalia.oss.vtl.model.data; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; -import static java.util.Collections.emptyMap; -import static java.util.Collections.singleton; -import static java.util.Collections.singletonMap; - import java.util.Collection; import java.util.Iterator; import java.util.List; @@ -216,63 +211,6 @@ public Stream streamByKeys(Set groupCollector, SerBiFunction, ScalarValue>, T> finisher); - /** - * Groups all the datapoints of this DataSet having the same values for the specified identifiers, - * and performs a mutable reduction - * over each of the groups, and applying a final transformation. - * - * The same as {@link #streamByKeys(Set, Map, SerCollector, SerBiFunction)} with an empty filter. - * - * @param the type of the result of the computation. - * @param keys the {@link Identifier}s used to group the datapoints - * @param groupCollector a {@link Collector} applied to each group to produce the result - * @param finisher a {@link BiFunction} to apply to the group key and result to produce the final result - * @return a {@link Stream} of {@code } objects containing the result of the computation for each group. - */ - public default Stream streamByKeys(Set> keys, - SerCollector groupCollector, - SerBiFunction, ScalarValue>, T> finisher) - { - return streamByKeys(keys, emptyMap(), groupCollector, finisher); - } - - /** - * Groups all the datapoints of this DataSet having the same values for the specified identifiers, - * and performs a mutable reduction - * over each of a chosen subset of the groups. - * - * The same as {@link #streamByKeys(Set, Map, SerCollector, SerBiFunction)} with an identity finisher. - * - * @param the type of the result of the computation. - * @param keys the {@link Identifier}s used to group the datapoints - * @param filter a {@code Map} of {@link Identifier}'s values used to exclude matching groups - * @param groupCollector a {@link Collector} applied to each group to produce the result - * @return a {@link Stream} of {@code } objects containing the result of the computation for each group. - */ - public default Stream streamByKeys(Set> keys, - Map, ScalarValue> filter, - SerCollector groupCollector) - { - return streamByKeys(keys, filter, groupCollector, (a, b) -> a); - } - - /** - * Groups all the datapoints of this DataSet having the same values for the specified identifiers, - * and performs a mutable reduction - * over each of the groups. - * - * The same as {@link #streamByKeys(Set, Map, SerCollector, SerBiFunction)} with an empty filter and an identity finisher. - * - * @param the type of the result of the computation. - * @param keys the {@link Identifier}s used to group the datapoints - * @param groupCollector a {@link Collector} applied to each group to produce the result - * @return a {@link Stream} of {@code } objects containing the result of the computation for each group. - */ - public default Stream streamByKeys(Set> keys, SerCollector groupCollector) - { - return streamByKeys(keys, emptyMap(), groupCollector); - } - /** * Perform a reduction over a dataset, producing a result for each group defined common values of the specified identifiers * @@ -290,77 +228,43 @@ public default Stream streamByKeys(Set The intermediate result of a single analytic function on one window * @param components A map from source components to result components - * @param windowSpec The clause specifying the window + * @param clause The clause specifying the window + * @param extractors extractors that extract a value from a datapoint * @param collectors Collectors that compute the intermediate results for each source component and window * @param finishers Finishers to transform intermediate results and partition keys into a collection of new values * - * @return The dataset result of the analytic invocation - */ - public DataSet analytic(SerFunction lineageOp, Map, ? extends DataStructureComponent> components, - WindowClause windowSpec, Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers); - - /** - * Creates a new DataSet by applying a window function over a single component of this DataSet. - * Each application can produce one or more results that will be exploded into multiple datapoints. - * - * @param The intermediate result of the analytic function on one window - * @param component The source component - * @param windowSpec The clause specifying the window - * @param collector Collector that compute the intermediate result for each window - * @param finisher Finisher to transform intermediate result and partition keys into a collection of new values - * - * @return The dataset result of the analytic invocation - */ - public default DataSet analytic(SerFunction lineageOp, DataStructureComponent component, - WindowClause windowSpec, SerCollector, ?, TT> collector, - SerBiFunction, Collection>> finisher) - { - return analytic(lineageOp, singletonMap(component, component), windowSpec, singletonMap(component, collector), singletonMap(component, finisher)); - } - - /** - * Creates a new DataSet by applying a window function over multiple source components of this DataSet. - * Each application can produce one or more results that will be exploded into multiple datapoints. - * * @param The intermediate result of a single analytic function on one window - * @param components The set of source components. Result components will be the same. - * @param windowSpec The clause specifying the window - * @param collectors Collectors that compute the intermediate results for each source component and window - * @param finishers Finishers to transform intermediate results and partition keys into a collection of new values - * * @return The dataset result of the analytic invocation */ - public default DataSet analytic(SerFunction lineageOp, Set> components, - WindowClause windowSpec, Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers) - { - return analytic(lineageOp, components.stream().collect(toMapWithValues(k -> k)), windowSpec, collectors, finishers); - } - + /** - * Creates a new DataSet by applying a window function over multiple source components of this DataSet. - * Each window function will produce a single result value and the result will have the same cardinality as this dataset. - * This is the standard VTL analytic invocation. + * Creates a new DataSet by applying a window function over a component of this DataSet. + * Each application can produce one or more values that will be exploded into multiple datapoints. + * The result values are stored in the destination component of the resulting dataset. * - * @param components The set of source components. Result components will be the same. - * @param windowSpec The clause specifying the window - * @param collectors Collectors that compute the results for each source component and window + * @param The type of elements fed into the window + * @param The type of the result produced by the window function + * @param lineageOp A lineage definition for each result datapoint + * @param sourceComp The source component + * @param destComp The destination component + * @param clause The window clause + * @param extractor Extractor that feeds a value into the window. If null, the value fed + * will be the value assumed by the source component in each datapoint + * @param collector The collector to produce the results for each window + * @param finisher A final mapping from the accumulated result into a collaction of values. + * If null, the collector result will be taken as-is, and it must be a collection of scalar values. * - * @return The dataset result of the analytic invocation + * @return A new dataset resulting from the application of the specified window function. */ - public default DataSet analytic(SerFunction lineageOp, Set> components, WindowClause windowSpec, - Map, SerCollector, ?, ScalarValue>> collectors) + public DataSet analytic(SerFunction lineageOp, DataStructureComponent sourceComp, + DataStructureComponent destComp, WindowClause clause, SerFunction extractor, + SerCollector collector, SerBiFunction>> finisher); + + public default DataSet analytic(SerFunction lineageOp, DataStructureComponent component, + WindowClause clause, SerCollector, ?, ScalarValue> collector) { - Map, DataStructureComponent> measures = components.stream() - .collect(toMapWithValues(measure -> measure)); - Map, SerBiFunction, ScalarValue, Collection>>> finishers = components.stream() - .collect(toMapWithValues(measure -> (value, originalValue) -> singleton(value))); - - return analytic(lineageOp, measures, windowSpec, collectors, finishers); + return analytic(lineageOp, component, component, clause, null, collector, null); } /** diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/ScalarValue.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/ScalarValue.java index 759fa4d25..4d0eb99c2 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/ScalarValue.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/data/ScalarValue.java @@ -42,7 +42,7 @@ public interface ScalarValue, R extends Compar * @return the {@link ValueDomainSubset} of this ScalarValue. */ public S getDomain(); - + @Override public ScalarValueMetadata getMetadata(); diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/domain/ValueDomain.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/domain/ValueDomain.java index cb93c800a..d74777bd6 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/domain/ValueDomain.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/model/domain/ValueDomain.java @@ -48,4 +48,10 @@ public interface ValueDomain extends Serializable * @return true if the comparison is possible */ public boolean isComparableWith(ValueDomain other); + + /** + * + * @return the representation type for this domain + */ + public Class getRepresentation(); } diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/GenericTuple.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/GenericTuple.java new file mode 100644 index 000000000..ca2a2419e --- /dev/null +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/GenericTuple.java @@ -0,0 +1,104 @@ +/* + * Copyright © 2020 Banca D'Italia + * + * Licensed under the EUPL, Version 1.2 (the "License"); + * You may not use this work except in compliance with the + * License. + * You may obtain a copy of the License at: + * + * https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt + * + * Unless required by applicable law or agreed to in + * writing, software distributed under the License is + * distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. + * + * See the License for the specific language governing + * permissions and limitations under the License. + */ +package it.bancaditalia.oss.vtl.util; + +import java.io.Serializable; +import java.util.Arrays; + +public class GenericTuple implements Serializable, Cloneable +{ + private static final long serialVersionUID = 1L; + + private final Serializable[] values; + + public GenericTuple(Serializable[] values) + { + this.values = values; + } + + @Override + public int hashCode() + { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(values); + return result; + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) + return true; + if (!(obj instanceof GenericTuple)) + return false; + GenericTuple other = (GenericTuple) obj; + if (!Arrays.equals(values, other.values)) + return false; + return true; + } + + public int size() + { + return values.length; + } + + public Serializable get1() { return values[0]; } + public Serializable get2() { return values[1]; } + public Serializable get3() { return values[2]; } + public Serializable get4() { return values[3]; } + public Serializable get5() { return values[4]; } + public Serializable get6() { return values[5]; } + public Serializable get7() { return values[6]; } + public Serializable get8() { return values[7]; } + public Serializable get9() { return values[8]; } + public Serializable get10() { return values[9]; } + public Serializable get11() { return values[10]; } + public Serializable get12() { return values[11]; } + public Serializable get13() { return values[12]; } + public Serializable get14() { return values[13]; } + public Serializable get15() { return values[14]; } + public Serializable get16() { return values[15]; } + public Serializable get17() { return values[16]; } + public Serializable get18() { return values[17]; } + public Serializable get19() { return values[18]; } + public Serializable get20() { return values[19]; } + + public void set1(Serializable value) { values[0] = value; } + public void set2(Serializable value) { values[1] = value; } + public void set3(Serializable value) { values[2] = value; } + public void set4(Serializable value) { values[3] = value; } + public void set5(Serializable value) { values[4] = value; } + public void set6(Serializable value) { values[5] = value; } + public void set7(Serializable value) { values[6] = value; } + public void set8(Serializable value) { values[7] = value; } + public void set9(Serializable value) { values[8] = value; } + public void set10(Serializable value) { values[9] = value; } + public void set11(Serializable value) { values[10] = value; } + public void set12(Serializable value) { values[11] = value; } + public void set13(Serializable value) { values[12] = value; } + public void set14(Serializable value) { values[13] = value; } + public void set15(Serializable value) { values[14] = value; } + public void set16(Serializable value) { values[15] = value; } + public void set17(Serializable value) { values[16] = value; } + public void set18(Serializable value) { values[17] = value; } + public void set19(Serializable value) { values[18] = value; } + public void set20(Serializable value) { values[19] = value; } +} diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/OptionalBox.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/OptionalBox.java deleted file mode 100644 index 40db18ac6..000000000 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/OptionalBox.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright © 2020 Banca D'Italia - * - * Licensed under the EUPL, Version 1.2 (the "License"); - * You may not use this work except in compliance with the - * License. - * You may obtain a copy of the License at: - * - * https://joinup.ec.europa.eu/sites/default/files/custom-page/attachment/2020-03/EUPL-1.2%20EN.txt - * - * Unless required by applicable law or agreed to in - * writing, software distributed under the License is - * distributed on an "AS IS" basis, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. - * - * See the License for the specific language governing - * permissions and limitations under the License. - */ -package it.bancaditalia.oss.vtl.util; - -public class OptionalBox implements SerConsumer -{ - private static final long serialVersionUID = 1L; - - private final SerBinaryOperator op; - private T value = null; - private boolean present = false; - - public OptionalBox(SerBinaryOperator op) - { - this.op = op; - } - - @Override - public void accept(T t) - { - if (present) - value = op.apply(value, t); - else - { - value = t; - present = true; - } - } - - public boolean isPresent() - { - return present; - } - - public T get() - { - return value; - } -} \ No newline at end of file diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerBinaryOperator.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerBinaryOperator.java index cff40f78b..eb01a13d8 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerBinaryOperator.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerBinaryOperator.java @@ -27,14 +27,14 @@ public interface SerBinaryOperator extends BinaryOperator, SerBiFunction SerBinaryOperator minBy(Comparator comparator) { - SerIntBiFunction op = comparator::compare; - return (a, b) -> op.apply(a, b) <= 0 ? a : b; + SerToIntBiFunction fn = comparator::compare; + return (a, b) -> fn.applyAsInt(a, b) <= 0 ? a : b; } public static SerBinaryOperator maxBy(Comparator comparator) { - SerIntBiFunction op = comparator::compare; - return (a, b) -> op.apply(a, b) >= 0 ? a : b; + SerToIntBiFunction fn = comparator::compare; + return (a, b) -> fn.applyAsInt(a, b) >= 0 ? a : b; } public default SerBinaryOperator reverseIf(boolean condition) diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerCollectors.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerCollectors.java index 8338d7ef0..f973b6c77 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerCollectors.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerCollectors.java @@ -20,6 +20,7 @@ package it.bancaditalia.oss.vtl.util; import static it.bancaditalia.oss.vtl.util.SerFunction.identity; +import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static java.lang.Boolean.TRUE; import static java.util.Collections.emptySet; import static java.util.Objects.requireNonNull; @@ -44,6 +45,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collector.Characteristics; import it.bancaditalia.oss.vtl.exceptions.VTLNestedException; @@ -107,6 +109,14 @@ public static SerCollector reducing(U identity, SerFunction { a[0] = op.apply(a[0], b[0]); return a; }, a -> a[0], emptySet()); } + + public static SerCollector> reducing(SerBinaryOperator op) + { + return new SerCollector<>(boxSupplier((T) null), + (a, t) -> { if (a[0] == null) a[0] = t; else a[0] = op.apply(a[0], t); }, + (a, b) -> { if (a[0] == null) a[0] = b[0]; else a[0] = op.apply(a[0], coalesce(b[0], a[0])); return a; }, + a -> Optional.ofNullable(a[0]), emptySet()); + } public static SerCollector mapping(SerFunction mapper, SerCollector downstream) { @@ -149,12 +159,18 @@ public static SerCollector reducing(U identity, SerFunction SerCollector, List> toList() { - return new SerCollector<>(ArrayList::new, List::add, (left, right) -> { left.addAll(right); return left; }, identity(), emptySet()); + return new SerCollector<>(ArrayList::new, List::add, (left, right) -> { left.addAll(right); return left; }, identity(), EnumSet.of(IDENTITY_FINISH, CONCURRENT)); + } + + public static SerCollector, List> toList(SerSupplier> supplier) + { + return new SerCollector<>(supplier::get, List::add, (left, right) -> { left.addAll(right); return left; }, identity(), EnumSet.of(IDENTITY_FINISH, CONCURRENT)); } public static SerCollector toArray(T[] result) { - return new SerCollector<>(() -> new ArrayHolder(result), ArrayHolder::accumulate, ArrayHolder::merge, acc -> result, EnumSet.of(CONCURRENT)); + AtomicInteger index = new AtomicInteger(0); + return new SerCollector<>(() -> result, (a, v) -> a[index.getAndIncrement()] = v, (a, b) -> a, identity(), EnumSet.of(CONCURRENT)); } public static SerCollector filtering(SerPredicate predicate, SerCollector downstream) @@ -277,13 +293,6 @@ public static SerCollector peeking(SerConsumer act return SerCollector.of(mangledFactory, accumulator, merger, finisher, EnumSet.of(UNORDERED)); } } - - public static SerCollector, Optional> reducing(SerBinaryOperator op) - { - return new SerCollector<>(() -> new OptionalBox(op), OptionalBox::accept, - (a, b) -> { if (b.isPresent()) a.accept(b.get()); return a; }, - a -> Optional.ofNullable(a.get()), emptySet()); - } public static SerCollector teeing(SerCollector downstream1, SerCollector downstream2, SerBiFunction merger) @@ -338,7 +347,7 @@ public R finish() @SuppressWarnings("unchecked") private static SerSupplier boxSupplier(T identity) { - return () -> (T[]) new Object[] { identity }; + return () -> (T[]) new Serializable[] { (Serializable) identity }; } private static > SerBinaryOperator mapMerger(SerBinaryOperator mergeFunction) @@ -432,27 +441,4 @@ private static > SerBinaryOperator throwingMerger() { return SerCollector.of(mapSupplier::get, throwingPutter(Entry::getKey, Entry::getValue), throwingMerger(), characteristics); } - - private static class ArrayHolder implements Serializable - { - private static final long serialVersionUID = 1L; - - private final T[] result; - private transient volatile int index; - - public ArrayHolder(T[] result) - { - this.result = result; - } - - public void accumulate(T v) - { - result[index++] = v; - } - - public ArrayHolder merge(ArrayHolder other) - { - throw new UnsupportedOperationException(); - } - } } diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerDoubleSumAvgCount.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerDoubleSumAvgCount.java index 775909cc4..ba28ec4c1 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerDoubleSumAvgCount.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerDoubleSumAvgCount.java @@ -78,16 +78,21 @@ public SerDoubleSumAvgCount combine(SerDoubleSumAvgCount other) private void sumWithCompensation(double value) { - double tmp = value - sums[1]; - double velvel = sums[0] + tmp; - sums[1] = (velvel - sums[0]) - tmp; - sums[0] = velvel; + double tmp1 = value - sums[1]; + double tmp2 = sums[0] + tmp1; + sums[1] = (tmp2 - sums[0]) - tmp1; + sums[0] = tmp2; } public final long getCount() { return count; } + + public final double[] getSums() + { + return sums; + } public final OptionalDouble getSum() { diff --git a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntBiFunction.java b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntPredicate.java similarity index 76% rename from vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntBiFunction.java rename to vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntPredicate.java index cff05dcda..7a8eaf2fa 100644 --- a/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntBiFunction.java +++ b/vtl-api/src/main/java/it/bancaditalia/oss/vtl/util/SerIntPredicate.java @@ -20,13 +20,9 @@ package it.bancaditalia.oss.vtl.util; import java.io.Serializable; +import java.util.function.IntPredicate; -public interface SerIntBiFunction extends Serializable +public interface SerIntPredicate extends IntPredicate, Serializable { - public int apply(T t, U u); - - public default SerBiFunction andThen(SerIntFunction mapper) - { - return (t, u) -> mapper.apply(apply(t, u)); - } + } diff --git a/vtl-bundles/vtl-cli/src/main/java/it/bancaditalia/oss/vtl/impl/cli/VTLShell.java b/vtl-bundles/vtl-cli/src/main/java/it/bancaditalia/oss/vtl/impl/cli/VTLShell.java index 136e75fb0..e57e6e7a0 100644 --- a/vtl-bundles/vtl-cli/src/main/java/it/bancaditalia/oss/vtl/impl/cli/VTLShell.java +++ b/vtl-bundles/vtl-cli/src/main/java/it/bancaditalia/oss/vtl/impl/cli/VTLShell.java @@ -77,7 +77,7 @@ public Void call() throws Exception ConfigurationManagerFactory.loadConfiguration(reader); } - ConfigurationManager manager = ConfigurationManagerFactory.getInstance(); + ConfigurationManager manager = ConfigurationManagerFactory.newManager(); VTLSession session; try (Reader reader = file != null ? Files.newBufferedReader(file.toPath(), UTF_8) : new BufferedReader(new InputStreamReader(System.in, UTF_8))) diff --git a/vtl-bundles/vtl-coverage/pom.xml b/vtl-bundles/vtl-coverage/pom.xml index 55e19ad55..1e3a22eab 100644 --- a/vtl-bundles/vtl-coverage/pom.xml +++ b/vtl-bundles/vtl-coverage/pom.xml @@ -36,47 +36,6 @@ vtl-coverage Coverage report aggregator - - - with-spark - - - it.bancaditalia.oss.vtl - vtl-spark - - - - - with-cli - - - it.bancaditalia.oss.vtl - vtl-cli - - - - - with-jupyter - - - it.bancaditalia.oss.vtl - vtl-jupyter - ${project.version} - - - - - with-r - - - it.bancaditalia.oss.vtl - vtl-java2r - ${project.version} - - - - - @@ -126,11 +85,20 @@ it.bancaditalia.oss.vtl vtl-session + + it.bancaditalia.oss.vtl + vtl-spark + org.junit.jupiter junit-jupiter test + + org.apache.spark + spark-sql_${scala.compat.version} + provided + org.junit.jupiter junit-jupiter-params diff --git a/vtl-bundles/vtl-coverage/src/test/java/it/bancaditalia/oss/vtl/coverage/tests/IntegrationTestSuite.java b/vtl-bundles/vtl-coverage/src/test/java/it/bancaditalia/oss/vtl/coverage/tests/IntegrationTestSuite.java index c2fda41b0..cb2db4f76 100644 --- a/vtl-bundles/vtl-coverage/src/test/java/it/bancaditalia/oss/vtl/coverage/tests/IntegrationTestSuite.java +++ b/vtl-bundles/vtl-coverage/src/test/java/it/bancaditalia/oss/vtl/coverage/tests/IntegrationTestSuite.java @@ -22,6 +22,8 @@ import static it.bancaditalia.oss.vtl.config.VTLGeneralProperties.ENVIRONMENT_IMPLEMENTATION; import static it.bancaditalia.oss.vtl.config.VTLGeneralProperties.METADATA_REPOSITORY; import static it.bancaditalia.oss.vtl.impl.environment.CSVPathEnvironment.VTL_CSV_ENVIRONMENT_SEARCH_PATH; +import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkEnvironment.VTL_SPARK_SEARCH_PATH; +import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkEnvironment.VTL_SPARK_UI_ENABLED; import static it.bancaditalia.oss.vtl.impl.meta.json.JsonMetadataRepository.JSON_METADATA_URL; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; import static java.nio.charset.StandardCharsets.UTF_8; @@ -38,6 +40,8 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.hamcrest.Matchers; @@ -48,6 +52,7 @@ import it.bancaditalia.oss.vtl.config.ConfigurationManagerFactory; import it.bancaditalia.oss.vtl.impl.environment.CSVPathEnvironment; import it.bancaditalia.oss.vtl.impl.environment.WorkspaceImpl; +import it.bancaditalia.oss.vtl.impl.environment.spark.SparkEnvironment; import it.bancaditalia.oss.vtl.impl.meta.json.JsonMetadataRepository; import it.bancaditalia.oss.vtl.model.data.DataPoint; import it.bancaditalia.oss.vtl.model.data.DataSet; @@ -67,16 +72,175 @@ public static Stream test() throws IOException, URISyntaxException root = Paths.get(IntegrationTestSuite.class.getResource("../tests").toURI()); METADATA_REPOSITORY.setValue(JsonMetadataRepository.class); - ENVIRONMENT_IMPLEMENTATION.setValues(CSVPathEnvironment.class, WorkspaceImpl.class); List tests = new ArrayList<>(); String testName; StringBuilder testCode = new StringBuilder(); + + Set skipped = Stream.of( + "abs", + "absolute_value_1", + "absolute_value_2", + "aggr", + "aggr_1", + "aggr_2", + "and_1", + "and_2", + "and_or", + "average", + "basic_arithmetic_1", + "basic_arithmetic_2", + "between_1", + "between_2", + "boolean_and_1", + "boolean_and_2", + "boolean_not_1", + "boolean_or_1", + "boolean_or_2", + "boolean_xor_1", + "boolean_xor_2", + "calc", + "calc_1", + "case_conversion_1", + "case_conversion_2", + "case_conversion_3", + "ceil", + "ceil_1", + "ceil_floor_1", + "count", + "div", + "drop", + "element_of_1", + "element_of_2", + "element_of_3", + "equal", + "equal_1", + "equal_2", + "exists_in_1", + "exists_in_2", + "exists_in_3", + "exp", + "exp_2", + "fill_timeseries_1", + "fill_timeseries_2", + "filter", + "filter_1", + "filter_2", + "first_value_1", + "first_value_2", + "floor", + "floor_1", + "flow_to_stock_1", + "flow_to_stock_2", + "greater", + "greatereq", + "greater_than_1", + "greater_than_2", + "if-then-else_1", + "if-then-else_2", + "intersection", + "is_null_1", + "is_null_2", + "keep", + "keep_1", + "lag_1", + "lag_2", + "last_value_1", + "last_value_2", + "lead_1", + "lead_2", + "lesser", + "lessereq", + "less_than_1", + "ln", + "ln_1", + "ln_2", + "log_1", + "log_2", + "match_characters_1", + "match_characters_2", + "max", + "median", + "membership_1", + "min", + "minus", + "mod", + "mod_1", + "mod_2", + "mult", + "notequal", + "not_1", + "not_2", + "not_equal_1", + "not_equal_2", + "not_xor", + "nvl_1", + "nvl_2", + "nvl_3", + "op_with_calculated_scalar", + "or_1", + "or_2", + "parentheses_1", + "parentheses_2", + "pattern_location_1", + "pattern_location_2", + "pattern_replacement_1", + "pattern_replacement_2", + "pattern_replacement_3", + "plus", + "pow", + "power_1", + "power_2", + "rank_1", + "ratio_to_report_1", + "ratio_to_report_2", + "rename", + "rename_1", + "round", + "round_1", + "round_2", + "self_defined_operator1", + "self_defined_operator2", + "set_difference_1", + "sqrt", + "sqrt_1", + "sqrt_2", + "standard_deviation_pop", + "standard_deviation_samp", + "stock_to_flow_1", + "stock_to_flow_2", + "string_concatenation_1", + "string_concatenation_2", + "string_concatenation_3", + "string_trim_1", + "string_trim_2", + "string_trim_3", + "string_trim_4", + "sub", + "substring_extraction_1", + "substring_extraction_2", + "sub_1", + "sum", + "sum_1", + "symmetric_difference_1", + "timeshift_1", + "timeshift_2", + "trunc_1", + "trunc_2", + "uminus", + "union_1", + "union_2", + "uplus", + "var_pop", + "var_samp", + "xor_1", + "xor_2" + ).collect(Collectors.toSet()); try (BufferedReader dirReader = new BufferedReader(new InputStreamReader(IntegrationTestSuite.class.getResourceAsStream("../tests"), UTF_8))) { while ((testName = dirReader.readLine()) != null) - if (!testName.endsWith(".class")) + if (!testName.endsWith(".class")/* && !skipped.contains(testName) */) { try (BufferedReader testReader = Files.newBufferedReader(root.resolve(testName).resolve(testName + ".vtl"))) { @@ -93,7 +257,7 @@ public static Stream test() throws IOException, URISyntaxException testCode.setLength(0); } } - + return tests.stream(); } @@ -108,9 +272,56 @@ public synchronized void test(final String testName, final String testCode) thro System.out.println(testCode); System.out.println("------------------------------------------------------------------------------------------------"); + ENVIRONMENT_IMPLEMENTATION.setValues(CSVPathEnvironment.class, WorkspaceImpl.class); VTL_CSV_ENVIRONMENT_SEARCH_PATH.setValues(root.resolve(testName).toString()); JSON_METADATA_URL.setValue(IntegrationTestSuite.class.getResource(testName + "/" + testName + ".json").toString()); - VTLSession session = ConfigurationManagerFactory.getInstance().createSession(testCode); + VTLSession session = ConfigurationManagerFactory.newManager().createSession(testCode); + + session.compile(); + + DataSet expected = session.resolve("expected", DataSet.class); + DataSet result = session.resolve("test_result", DataSet.class); + + for (DataStructureComponent comp: expected.getMetadata()) + assertTrue(result.getMetadata().contains(comp), "Expected component " + comp + " is missing in " + result.getMetadata()); + for (DataStructureComponent comp: result.getMetadata()) + assertTrue(expected.getMetadata().contains(comp), "Unexpected component " + comp + " in result."); + + List resDPs; + List expectedDPs; + try (Stream resStream = result.stream(); Stream expStream = expected.stream()) + { + resDPs = resStream.collect(toList()); + expectedDPs = expStream.collect(toList()); + } + + System.out.println("Expected:"); + expectedDPs.forEach(System.out::println); + System.out.println("Actual:"); + resDPs.forEach(System.out::println); + + for (DataPoint dp: resDPs) + assertThat(dp, anyOf(expectedDPs.stream().map(Matchers::equalTo).collect(toList()))); + for (DataPoint dp: expectedDPs) + assertThat(dp, anyOf(resDPs.stream().map(Matchers::equalTo).collect(toList()))); + } + + @ParameterizedTest(name = "{0} - Spark") + @MethodSource("test") + public synchronized void testSpark(final String testName, final String testCode) throws IOException, URISyntaxException + { + System.out.println("------------------------------------------------------------------------------------------------"); + System.out.println(" SPARK -- " + testName); + System.out.println("------------------------------------------------------------------------------------------------"); + System.out.println(); + System.out.println(testCode); + System.out.println("------------------------------------------------------------------------------------------------"); + + ENVIRONMENT_IMPLEMENTATION.setValues(SparkEnvironment.class, WorkspaceImpl.class); + VTL_SPARK_SEARCH_PATH.setValues(root.resolve(testName).toString()); + VTL_SPARK_UI_ENABLED.setValue(false); + JSON_METADATA_URL.setValue(IntegrationTestSuite.class.getResource(testName + "/" + testName + "-spark.json").toString()); + VTLSession session = ConfigurationManagerFactory.newManager().createSession(testCode); session.compile(); diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/abs/abs-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/abs/abs-spark.json new file mode 100644 index 000000000..0791ed574 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/abs/abs-spark.json @@ -0,0 +1,56 @@ +{ + "datasets": [ + { + "name": "number_test3", + "source": "spark:csv:number_test3.csv", + "structure": "number_test3_str" + }, + { + "name": "expected", + "structure": "abs_str", + "source": "spark:csv:abs.csv" + } + ], + "structures": [ + { + "name": "number_test3_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "abs_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_1/absolute_value_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_1/absolute_value_1-spark.json new file mode 100644 index 000000000..b14581b72 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_1/absolute_value_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "absolute_value_1_str", + "source": "spark:csv:absolute_value_1.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "absolute_value_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_2/absolute_value_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_2/absolute_value_2-spark.json new file mode 100644 index 000000000..9596a4cb0 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/absolute_value_2/absolute_value_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "absolute_value_2_str", + "source": "spark:csv:absolute_value_2.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "absolute_value_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "abs_3", + "role": "Measure" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "abs_3", + "domain": "number" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr/aggr-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr/aggr-spark.json new file mode 100644 index 000000000..9dc19567e --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr/aggr-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test5", + "source": "spark:csv:number_test5.csv", + "structure": "number_test5_str" + }, + { + "name": "expected", + "structure": "clauses_aggr_1_str", + "source": "spark:csv:aggr.csv" + } + ], + "structures": [ + { + "name": "number_test5_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "clauses_aggr_1_str", + "components": [ + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m2", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "m2", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_1/aggr_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_1/aggr_1-spark.json new file mode 100644 index 000000000..281068f92 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_1/aggr_1-spark.json @@ -0,0 +1,184 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "aggr_1_str", + "source": "spark:csv:aggr_1.csv" + } + ], + "structures": [ + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "aggr_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_2/aggr_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_2/aggr_2-spark.json new file mode 100644 index 000000000..d09643f70 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/aggr_2/aggr_2-spark.json @@ -0,0 +1,184 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "aggr_2_str", + "source": "spark:csv:aggr_2.csv" + } + ], + "structures": [ + { + "name": "aggr_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_1/and_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_1/and_1-spark.json new file mode 100644 index 000000000..9bcfa166e --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_1/and_1-spark.json @@ -0,0 +1,122 @@ +{ + "datasets": [ + { + "name": "boolean_test5", + "source": "spark:csv:boolean_test5.csv", + "structure": "boolean_test5_str" + }, + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "and_1_str", + "source": "spark:csv:and_1.csv" + } + ], + "structures": [ + { + "name": "and_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test5_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_2/and_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_2/and_2-spark.json new file mode 100644 index 000000000..b48059e8f --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_2/and_2-spark.json @@ -0,0 +1,112 @@ +{ + "datasets": [ + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "and_2_str", + "source": "spark:csv:and_2.csv" + } + ], + "structures": [ + { + "name": "and_2_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "andt", + "role": "Measure" + }, + { + "name": "andf", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "andt", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "andf", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_or/and_or-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_or/and_or-spark.json new file mode 100644 index 000000000..837288287 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/and_or/and_or-spark.json @@ -0,0 +1,124 @@ +{ + "datasets": [ + { + "name": "boolean_test6", + "source": "spark:csv:boolean_test6.csv", + "structure": "boolean_test6_str" + }, + { + "name": "expected", + "structure": "and_or_str", + "source": "spark:csv:and_or.csv" + } + ], + "structures": [ + { + "name": "and_or_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_and", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_or", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test6_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "me_or", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_and", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "me_2", + "domain": "boolean" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/average/average-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/average/average-spark.json new file mode 100644 index 000000000..90f718799 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/average/average-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "average_str", + "source": "spark:csv:average.csv" + } + ], + "structures": [ + { + "name": "average_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_1/basic_arithmetic_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_1/basic_arithmetic_1-spark.json new file mode 100644 index 000000000..3608093e1 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_1/basic_arithmetic_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "basic_arithmetic_1_str", + "source": "spark:csv:basic_arithmetic_1.csv" + } + ], + "structures": [ + { + "name": "basic_arithmetic_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_2/basic_arithmetic_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_2/basic_arithmetic_2-spark.json new file mode 100644 index 000000000..c61ff6103 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/basic_arithmetic_2/basic_arithmetic_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "basic_arithmetic_2_str", + "source": "spark:csv:basic_arithmetic_2.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "basic_arithmetic_2_str", + "components": [ + { + "name": "num_3", + "role": "Measure" + }, + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "num_3", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_1/between_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_1/between_1-spark.json new file mode 100644 index 000000000..9b7739689 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_1/between_1-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "between_test", + "source": "spark:csv:between_test.csv", + "structure": "between_test_str" + }, + { + "name": "expected", + "structure": "between_1_str", + "source": "spark:csv:between_1.csv" + } + ], + "structures": [ + { + "name": "between_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "between_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_2/between_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_2/between_2-spark.json new file mode 100644 index 000000000..ce8da8433 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/between_2/between_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "between_test", + "source": "spark:csv:between_test.csv", + "structure": "between_test_str" + }, + { + "name": "expected", + "structure": "between_2_str", + "source": "spark:csv:between_2.csv" + } + ], + "structures": [ + { + "name": "between_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "between_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + }, + { + "name": "btwn", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "btwn", + "domain": "boolean" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_1/boolean_and_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_1/boolean_and_1-spark.json new file mode 100644 index 000000000..e45a5762c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_1/boolean_and_1-spark.json @@ -0,0 +1,74 @@ +{ + "datasets": [ + { + "name": "boolean_test2", + "source": "spark:csv:boolean_test2.csv", + "structure": "boolean_test2_str" + }, + { + "name": "boolean_test1", + "source": "spark:csv:boolean_test1.csv", + "structure": "boolean_test1_str" + }, + { + "name": "expected", + "structure": "boolean_and_1_str", + "source": "spark:csv:boolean_and_1.csv" + } + ], + "structures": [ + { + "name": "boolean_test2_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_and_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_2/boolean_and_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_2/boolean_and_2-spark.json new file mode 100644 index 000000000..5ff4a3405 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_and_2/boolean_and_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "boolean_test3", + "source": "spark:csv:boolean_test3.csv", + "structure": "boolean_test3_str" + }, + { + "name": "expected", + "structure": "boolean_and_2_str", + "source": "spark:csv:boolean_and_2.csv" + } + ], + "structures": [ + { + "name": "boolean_test3_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_and_2_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "m3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "m3", + "domain": "boolean" + }, + { + "name": "m2", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_not_1/boolean_not_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_not_1/boolean_not_1-spark.json new file mode 100644 index 000000000..ac6b7c122 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_not_1/boolean_not_1-spark.json @@ -0,0 +1,56 @@ +{ + "datasets": [ + { + "name": "boolean_test1", + "source": "spark:csv:boolean_test1.csv", + "structure": "boolean_test1_str" + }, + { + "name": "expected", + "structure": "boolean_not_1_str", + "source": "spark:csv:boolean_not_1.csv" + } + ], + "structures": [ + { + "name": "boolean_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_not_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_1/boolean_or_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_1/boolean_or_1-spark.json new file mode 100644 index 000000000..dcb6c9865 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_1/boolean_or_1-spark.json @@ -0,0 +1,74 @@ +{ + "datasets": [ + { + "name": "boolean_test2", + "source": "spark:csv:boolean_test2.csv", + "structure": "boolean_test2_str" + }, + { + "name": "boolean_test1", + "source": "spark:csv:boolean_test1.csv", + "structure": "boolean_test1_str" + }, + { + "name": "expected", + "structure": "boolean_or_1_str", + "source": "spark:csv:boolean_or_1.csv" + } + ], + "structures": [ + { + "name": "boolean_test2_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_or_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_2/boolean_or_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_2/boolean_or_2-spark.json new file mode 100644 index 000000000..bb6f62f36 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_or_2/boolean_or_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "boolean_test3", + "source": "spark:csv:boolean_test3.csv", + "structure": "boolean_test3_str" + }, + { + "name": "expected", + "structure": "boolean_or_2_str", + "source": "spark:csv:boolean_or_2.csv" + } + ], + "structures": [ + { + "name": "boolean_test3_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_or_2_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "m3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "m3", + "domain": "boolean" + }, + { + "name": "m2", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_1/boolean_xor_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_1/boolean_xor_1-spark.json new file mode 100644 index 000000000..aa46d76a4 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_1/boolean_xor_1-spark.json @@ -0,0 +1,74 @@ +{ + "datasets": [ + { + "name": "boolean_test2", + "source": "spark:csv:boolean_test2.csv", + "structure": "boolean_test2_str" + }, + { + "name": "boolean_test1", + "source": "spark:csv:boolean_test1.csv", + "structure": "boolean_test1_str" + }, + { + "name": "expected", + "structure": "boolean_xor_1_str", + "source": "spark:csv:boolean_xor_1.csv" + } + ], + "structures": [ + { + "name": "boolean_test2_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_xor_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_2/boolean_xor_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_2/boolean_xor_2-spark.json new file mode 100644 index 000000000..234bae8bb --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/boolean_xor_2/boolean_xor_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "boolean_test3", + "source": "spark:csv:boolean_test3.csv", + "structure": "boolean_test3_str" + }, + { + "name": "expected", + "structure": "boolean_xor_2_str", + "source": "spark:csv:boolean_xor_2.csv" + } + ], + "structures": [ + { + "name": "boolean_test3_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "boolean_xor_2_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "m3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id1", + "domain": "string" + }, + { + "name": "m3", + "domain": "boolean" + }, + { + "name": "m2", + "domain": "boolean" + }, + { + "name": "m1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc/calc-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc/calc-spark.json new file mode 100644 index 000000000..9e161d755 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc/calc-spark.json @@ -0,0 +1,51 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "number_test1_str", + "source": "spark:csv:calc.csv" + } + ], + "structures": [ + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc_1/calc_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc_1/calc_1-spark.json new file mode 100644 index 000000000..48db334b7 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/calc_1/calc_1-spark.json @@ -0,0 +1,252 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "calc_1_str", + "source": "spark:csv:calc_1.csv" + } + ], + "structures": [ + { + "name": "calc_1_str", + "components": [ + { + "name": "me_sum", + "role": "Measure" + }, + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "integer" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "integer" + }, + { + "name": "me_sum", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_1/case_conversion_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_1/case_conversion_1-spark.json new file mode 100644 index 000000000..9683ead48 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_1/case_conversion_1-spark.json @@ -0,0 +1,112 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "case_conversion_1_str", + "source": "spark:csv:case_conversion_1.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "case_conversion_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "text_low", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "text_up", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "text_low", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "text_up", + "domain": "string" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_2/case_conversion_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_2/case_conversion_2-spark.json new file mode 100644 index 000000000..564686c65 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_2/case_conversion_2-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "case_conversion_2_str", + "source": "spark:csv:case_conversion_2.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "case_conversion_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_3/case_conversion_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_3/case_conversion_3-spark.json new file mode 100644 index 000000000..86e4af248 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/case_conversion_3/case_conversion_3-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "case_conversion_3_str", + "source": "spark:csv:case_conversion_3.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "case_conversion_3_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil/ceil-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil/ceil-spark.json new file mode 100644 index 000000000..5740e8d0a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil/ceil-spark.json @@ -0,0 +1,43 @@ +{ + "datasets": [ + { + "name": "number_test2", + "source": "spark:csv:number_test3.csv", + "structure": "number_test3_str" + }, + { + "name": "expected", + "structure": "number_test3_str", + "source": "spark:csv:ceil.csv" + } + ], + "structures": [ + { + "name": "number_test3_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_1/ceil_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_1/ceil_1-spark.json new file mode 100644 index 000000000..84c0f7a47 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_1/ceil_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "ceil_1_str", + "source": "spark:csv:ceil_1.csv" + } + ], + "structures": [ + { + "name": "ceil_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_floor_1/ceil_floor_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_floor_1/ceil_floor_1-spark.json new file mode 100644 index 000000000..88433395c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ceil_floor_1/ceil_floor_1-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "ceil_floor_1_str", + "source": "spark:csv:ceil_floor_1.csv" + } + ], + "structures": [ + { + "name": "ceil_floor_1_str", + "components": [ + { + "name": "num_ceil", + "role": "Measure" + }, + { + "name": "num_floor", + "role": "Measure" + }, + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "num_ceil", + "domain": "number" + }, + { + "name": "num_floor", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/count/count-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/count/count-spark.json new file mode 100644 index 000000000..c53aba84c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/count/count-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "count_str", + "source": "spark:csv:count.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "count_str", + "components": [ + { + "name": "integer_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "integer_var", + "domain": "integer" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/div/div-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/div/div-spark.json new file mode 100644 index 000000000..4df0bce21 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/div/div-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "number_div_1_str", + "source": "spark:csv:div.csv" + } + ], + "structures": [ + { + "name": "number_div_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/drop/drop-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/drop/drop-spark.json new file mode 100644 index 000000000..98f6d0002 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/drop/drop-spark.json @@ -0,0 +1,84 @@ +{ + "datasets": [ + { + "name": "number_test6", + "source": "spark:csv:number_test6.csv", + "structure": "number_test6_str" + }, + { + "name": "expected", + "structure": "clauses_drop_1_str", + "source": "spark:csv:drop.csv" + } + ], + "structures": [ + { + "name": "number_test6_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "clauses_drop_1_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "m2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_1/element_of_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_1/element_of_1-spark.json new file mode 100644 index 000000000..e03a31661 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_1/element_of_1-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "between_test", + "source": "spark:csv:between_test.csv", + "structure": "between_test_str" + }, + { + "name": "expected", + "structure": "element_of_1_str", + "source": "spark:csv:element_of_1.csv" + } + ], + "structures": [ + { + "name": "element_of_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + } + ] + }, + { + "name": "between_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "year", + "domain": "integer" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_2/element_of_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_2/element_of_2-spark.json new file mode 100644 index 000000000..a33b3c7cd --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_2/element_of_2-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "between_test", + "source": "spark:csv:between_test.csv", + "structure": "between_test_str" + }, + { + "name": "expected", + "structure": "element_of_2_str", + "source": "spark:csv:element_of_2.csv" + } + ], + "structures": [ + { + "name": "element_of_2_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + } + ] + }, + { + "name": "between_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_3/element_of_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_3/element_of_3-spark.json new file mode 100644 index 000000000..5639ad30a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/element_of_3/element_of_3-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "between_test", + "source": "spark:csv:between_test.csv", + "structure": "between_test_str" + }, + { + "name": "expected", + "structure": "element_of_3_str", + "source": "spark:csv:element_of_3.csv" + } + ], + "structures": [ + { + "name": "between_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "element_of_3_str", + "components": [ + { + "name": "isleap", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "isleap", + "domain": "boolean" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal/equal-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal/equal-spark.json new file mode 100644 index 000000000..dd302513a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal/equal-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "comparison_eq_1", + "source": "spark:csv:comparison_eq_1.csv", + "structure": "comparison_eq_1_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "equal_str", + "source": "spark:csv:equal.csv" + } + ], + "structures": [ + { + "name": "equal_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + }, + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "comparison_eq_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_1/equal_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_1/equal_1-spark.json new file mode 100644 index 000000000..b41560dd9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_1/equal_1-spark.json @@ -0,0 +1,152 @@ +{ + "datasets": [ + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "eq_1", + "source": "spark:csv:eq_1.csv", + "structure": "eq_1_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "equal_1_str", + "source": "spark:csv:equal_1.csv" + } + ], + "structures": [ + { + "name": "equal_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "eq_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_2/equal_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_2/equal_2-spark.json new file mode 100644 index 000000000..f553cc4ba --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/equal_2/equal_2-spark.json @@ -0,0 +1,130 @@ +{ + "datasets": [ + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "eq_2_str", + "source": "spark:csv:equal_2.csv" + } + ], + "structures": [ + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "eq_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "eq", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "eq", + "domain": "boolean" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_1/exists_in_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_1/exists_in_1-spark.json new file mode 100644 index 000000000..bac5e1ca5 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_1/exists_in_1-spark.json @@ -0,0 +1,90 @@ +{ + "datasets": [ + { + "name": "match_characters_test", + "source": "spark:csv:match_characters_test.csv", + "structure": "match_characters_test_str" + }, + { + "name": "exists_in_test2", + "source": "spark:csv:exists_in_test2.csv", + "structure": "exists_in_test2_str" + }, + { + "name": "expected", + "structure": "exists_in_1_str", + "source": "spark:csv:exists_in_1.csv" + } + ], + "structures": [ + { + "name": "match_characters_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_test2_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_2/exists_in_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_2/exists_in_2-spark.json new file mode 100644 index 000000000..53ccc6626 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_2/exists_in_2-spark.json @@ -0,0 +1,112 @@ +{ + "datasets": [ + { + "name": "match_characters_test", + "source": "spark:csv:match_characters_test.csv", + "structure": "match_characters_test_str" + }, + { + "name": "exists_in_1", + "source": "spark:csv:exists_in_1.csv", + "structure": "exists_in_1_str" + }, + { + "name": "exists_in_test2", + "source": "spark:csv:exists_in_test2.csv", + "structure": "exists_in_test2_str" + }, + { + "name": "expected", + "structure": "exists_in_2_str", + "source": "spark:csv:exists_in_2.csv" + } + ], + "structures": [ + { + "name": "exists_in_2_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "match_characters_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_test2_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_3/exists_in_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_3/exists_in_3-spark.json new file mode 100644 index 000000000..ec4858119 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exists_in_3/exists_in_3-spark.json @@ -0,0 +1,112 @@ +{ + "datasets": [ + { + "name": "match_characters_test", + "source": "spark:csv:match_characters_test.csv", + "structure": "match_characters_test_str" + }, + { + "name": "exists_in_1", + "source": "spark:csv:exists_in_1.csv", + "structure": "exists_in_1_str" + }, + { + "name": "exists_in_test2", + "source": "spark:csv:exists_in_test2.csv", + "structure": "exists_in_test2_str" + }, + { + "name": "expected", + "structure": "exists_in_3_str", + "source": "spark:csv:exists_in_3.csv" + } + ], + "structures": [ + { + "name": "exists_in_3_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "match_characters_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "exists_in_test2_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp/exp-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp/exp-spark.json new file mode 100644 index 000000000..237b88106 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp/exp-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "number_exp_1_str", + "source": "spark:csv:exp.csv" + } + ], + "structures": [ + { + "name": "number_exp_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp_2/exp_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp_2/exp_2-spark.json new file mode 100644 index 000000000..212b2ac5a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/exp_2/exp_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "exp_2_str", + "source": "spark:csv:exp_2.csv" + } + ], + "structures": [ + { + "name": "exp_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "exp_3", + "role": "Measure" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "exp_3", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_1/fill_timeseries_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_1/fill_timeseries_1-spark.json new file mode 100644 index 000000000..15883e953 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_1/fill_timeseries_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "fill_timeseries_test", + "source": "spark:csv:fill_timeseries_test.csv", + "structure": "fill_timeseries_test_str" + }, + { + "name": "expected", + "structure": "fill_timeseries_1_str", + "source": "spark:csv:fill_timeseries_1.csv" + } + ], + "structures": [ + { + "name": "fill_timeseries_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + }, + { + "name": "fill_timeseries_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "id_date", + "domain": "date" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_2/fill_timeseries_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_2/fill_timeseries_2-spark.json new file mode 100644 index 000000000..d68f91c87 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/fill_timeseries_2/fill_timeseries_2-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "fill_timeseries_test", + "source": "spark:csv:fill_timeseries_test.csv", + "structure": "fill_timeseries_test_str" + }, + { + "name": "expected", + "structure": "fill_timeseries_2_str", + "source": "spark:csv:fill_timeseries_2.csv" + } + ], + "structures": [ + { + "name": "fill_timeseries_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + }, + { + "name": "fill_timeseries_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "id_date", + "domain": "date" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter/filter-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter/filter-spark.json new file mode 100644 index 000000000..2bacfdade --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter/filter-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "clauses_filter_1_str", + "source": "spark:csv:filter.csv" + } + ], + "structures": [ + { + "name": "clauses_filter_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "obs_agg", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "obs_agg", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_1/filter_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_1/filter_1-spark.json new file mode 100644 index 000000000..4b99de5ab --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_1/filter_1-spark.json @@ -0,0 +1,252 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "filter_1_str", + "source": "spark:csv:filter_1.csv" + } + ], + "structures": [ + { + "name": "filter_1_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_2/filter_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_2/filter_2-spark.json new file mode 100644 index 000000000..193fed84b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/filter_2/filter_2-spark.json @@ -0,0 +1,252 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "filter_2_str", + "source": "spark:csv:filter_2.csv" + } + ], + "structures": [ + { + "name": "filter_2_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_1/first_value_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_1/first_value_1-spark.json new file mode 100644 index 000000000..1e6e8aed8 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_1/first_value_1-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "first_value_1_str", + "source": "spark:csv:first_value_1.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "first_value_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_2/first_value_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_2/first_value_2-spark.json new file mode 100644 index 000000000..b8c0b0931 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/first_value_2/first_value_2-spark.json @@ -0,0 +1,108 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "first_value_2_str", + "source": "spark:csv:first_value_2.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "first_value_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "me_3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_3", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor/floor-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor/floor-spark.json new file mode 100644 index 000000000..19c0067cf --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor/floor-spark.json @@ -0,0 +1,56 @@ +{ + "datasets": [ + { + "name": "number_test3", + "source": "spark:csv:number_test3.csv", + "structure": "number_test3_str" + }, + { + "name": "expected", + "structure": "number_floor_1_str", + "source": "spark:csv:floor.csv" + } + ], + "structures": [ + { + "name": "number_test3_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_floor_1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor_1/floor_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor_1/floor_1-spark.json new file mode 100644 index 000000000..de87f4ec9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/floor_1/floor_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "floor_1_str", + "source": "spark:csv:floor_1.csv" + } + ], + "structures": [ + { + "name": "floor_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_1/flow_to_stock_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_1/flow_to_stock_1-spark.json new file mode 100644 index 000000000..956350584 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_1/flow_to_stock_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "flow_test", + "source": "spark:csv:flow_test.csv", + "structure": "flow_test_str" + }, + { + "name": "expected", + "structure": "flow_to_stock_1_str", + "source": "spark:csv:flow_to_stock_1.csv" + } + ], + "structures": [ + { + "name": "flow_to_stock_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + }, + { + "name": "flow_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_date", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "id_date", + "domain": "date" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_2/flow_to_stock_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_2/flow_to_stock_2-spark.json new file mode 100644 index 000000000..40217796a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/flow_to_stock_2/flow_to_stock_2-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "flow_annual_test", + "source": "spark:csv:flow_annual_test.csv", + "structure": "flow_annual_test_str" + }, + { + "name": "expected", + "structure": "flow_to_stock_2_str", + "source": "spark:csv:flow_to_stock_2.csv" + } + ], + "structures": [ + { + "name": "flow_to_stock_2_str", + "components": [ + { + "name": "id_annual", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "flow_annual_test_str", + "components": [ + { + "name": "id_annual", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_annual", + "domain": "date" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater/greater-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater/greater-spark.json new file mode 100644 index 000000000..bd9f735d6 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater/greater-spark.json @@ -0,0 +1,82 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "greater_str", + "source": "spark:csv:greater.csv" + } + ], + "structures": [ + { + "name": "greater_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + }, + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_1/greater_than_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_1/greater_than_1-spark.json new file mode 100644 index 000000000..29fc6675f --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_1/greater_than_1-spark.json @@ -0,0 +1,152 @@ +{ + "datasets": [ + { + "name": "gr_1", + "source": "spark:csv:gr_1.csv", + "structure": "gr_1_str" + }, + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "greater_than_1_str", + "source": "spark:csv:greater_than_1.csv" + } + ], + "structures": [ + { + "name": "gr_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "greater_than_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_2/greater_than_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_2/greater_than_2-spark.json new file mode 100644 index 000000000..cd8da0430 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greater_than_2/greater_than_2-spark.json @@ -0,0 +1,130 @@ +{ + "datasets": [ + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "gr_2_str", + "source": "spark:csv:greater_than_2.csv" + } + ], + "structures": [ + { + "name": "gr_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "gr", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "gr", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greatereq/greatereq-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greatereq/greatereq-spark.json new file mode 100644 index 000000000..5b98289b8 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/greatereq/greatereq-spark.json @@ -0,0 +1,82 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "greatereq_str", + "source": "spark:csv:greatereq.csv" + } + ], + "structures": [ + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "greatereq_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_1/if-then-else_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_1/if-then-else_1-spark.json new file mode 100644 index 000000000..168b9db41 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_1/if-then-else_1-spark.json @@ -0,0 +1,152 @@ +{ + "datasets": [ + { + "name": "if_test1", + "source": "spark:csv:if_test1.csv", + "structure": "if_test1_str" + }, + { + "name": "if_test2", + "source": "spark:csv:if_test2.csv", + "structure": "if_test2_str" + }, + { + "name": "if_test3", + "source": "spark:csv:if_test3.csv", + "structure": "if_test3_str" + }, + { + "name": "expected", + "structure": "if_1_str", + "source": "spark:csv:if-then-else_1.csv" + } + ], + "structures": [ + { + "name": "if_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "if_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "if_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "if_test3_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_2/if-then-else_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_2/if-then-else_2-spark.json new file mode 100644 index 000000000..8bd68fdf6 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/if-then-else_2/if-then-else_2-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "if_test1", + "source": "spark:csv:if_test1.csv", + "structure": "if_test1_str" + }, + { + "name": "expected", + "structure": "if_2_str", + "source": "spark:csv:if-then-else_2.csv" + } + ], + "structures": [ + { + "name": "if_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "if_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/intersection/intersection-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/intersection/intersection-spark.json new file mode 100644 index 000000000..8336fa2de --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/intersection/intersection-spark.json @@ -0,0 +1,146 @@ +{ + "datasets": [ + { + "name": "intersection_test1", + "source": "spark:csv:intersection_test1.csv", + "structure": "intersection_test1_str" + }, + { + "name": "intersection_test2", + "source": "spark:csv:intersection_test2.csv", + "structure": "intersection_test2_str" + }, + { + "name": "expected", + "structure": "intersection_1_str", + "source": "spark:csv:intersection.csv" + } + ], + "structures": [ + { + "name": "intersection_1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + }, + { + "name": "intersection_test1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + }, + { + "name": "intersection_test2_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "quality_status", + "domain": "string" + }, + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf2", + "domain": "number" + }, + { + "name": "edf3", + "domain": "number" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_1/is_null_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_1/is_null_1-spark.json new file mode 100644 index 000000000..e58f39c4d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_1/is_null_1-spark.json @@ -0,0 +1,122 @@ +{ + "datasets": [ + { + "name": "isnull_1", + "source": "spark:csv:isnull_1.csv", + "structure": "isnull_1_str" + }, + { + "name": "isnull_test1", + "source": "spark:csv:isnull_test1.csv", + "structure": "isnull_test1_str" + }, + { + "name": "expected", + "structure": "is_null_1_str", + "source": "spark:csv:is_null_1.csv" + } + ], + "structures": [ + { + "name": "is_null_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "isnull_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "isnull_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_2/is_null_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_2/is_null_2-spark.json new file mode 100644 index 000000000..8e7afb63c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/is_null_2/is_null_2-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "isnull_test1", + "source": "spark:csv:isnull_test1.csv", + "structure": "isnull_test1_str" + }, + { + "name": "expected", + "structure": "isnull_2_str", + "source": "spark:csv:is_null_2.csv" + } + ], + "structures": [ + { + "name": "isnull_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "isnull_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "isnl", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "isnl", + "domain": "boolean" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep/keep-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep/keep-spark.json new file mode 100644 index 000000000..84f426cf0 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep/keep-spark.json @@ -0,0 +1,84 @@ +{ + "datasets": [ + { + "name": "number_test6", + "source": "spark:csv:number_test6.csv", + "structure": "number_test6_str" + }, + { + "name": "expected", + "structure": "clauses_keep_1_str", + "source": "spark:csv:keep.csv" + } + ], + "structures": [ + { + "name": "number_test6_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "clauses_keep_1_str", + "components": [ + { + "name": "m2", + "role": "Measure" + }, + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "m2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep_1/keep_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep_1/keep_1-spark.json new file mode 100644 index 000000000..b43f3cd80 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/keep_1/keep_1-spark.json @@ -0,0 +1,196 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "keep_str", + "source": "spark:csv:keep_1.csv" + } + ], + "structures": [ + { + "name": "keep_str", + "components": [ + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "year", + "role": "Identifier" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_1/lag_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_1/lag_1-spark.json new file mode 100644 index 000000000..281f7d7ce --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_1/lag_1-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "lag_data", + "source": "spark:csv:lag.csv", + "structure": "lag_str" + }, + { + "name": "expected", + "structure": "lag_1_str", + "source": "spark:csv:lag_1.csv" + } + ], + "structures": [ + { + "name": "lag_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + }, + { + "name": "lag_1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_2/lag_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_2/lag_2-spark.json new file mode 100644 index 000000000..b073fcdb3 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lag_2/lag_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "lag_data", + "source": "spark:csv:lag.csv", + "structure": "lag_str" + }, + { + "name": "expected", + "structure": "lag_2_str", + "source": "spark:csv:lag_2.csv" + } + ], + "structures": [ + { + "name": "lag_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + }, + { + "name": "lag_2_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "previous", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "previous", + "domain": "number" + }, + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_1/last_value_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_1/last_value_1-spark.json new file mode 100644 index 000000000..a4654e0d1 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_1/last_value_1-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "last_value_1_str", + "source": "spark:csv:last_value_1.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "last_value_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_2/last_value_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_2/last_value_2-spark.json new file mode 100644 index 000000000..515d1c408 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/last_value_2/last_value_2-spark.json @@ -0,0 +1,108 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "last_value_2_str", + "source": "spark:csv:last_value_2.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "last_value_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "me_3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_3", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_1/lead_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_1/lead_1-spark.json new file mode 100644 index 000000000..5135d3e8b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_1/lead_1-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "lag_data", + "source": "spark:csv:lag.csv", + "structure": "lag_str" + }, + { + "name": "expected", + "structure": "lead_1_str", + "source": "spark:csv:lead_1.csv" + } + ], + "structures": [ + { + "name": "lag_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + }, + { + "name": "lead_1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_2/lead_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_2/lead_2-spark.json new file mode 100644 index 000000000..026ade31c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lead_2/lead_2-spark.json @@ -0,0 +1,80 @@ +{ + "datasets": [ + { + "name": "lag_data", + "source": "spark:csv:lag.csv", + "structure": "lag_str" + }, + { + "name": "expected", + "structure": "lead_2_str", + "source": "spark:csv:lead_2.csv" + } + ], + "structures": [ + { + "name": "lag_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + }, + { + "name": "lead_2_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "next", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "next", + "domain": "number" + }, + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/less_than_1/less_than_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/less_than_1/less_than_1-spark.json new file mode 100644 index 000000000..b10d6b4b3 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/less_than_1/less_than_1-spark.json @@ -0,0 +1,130 @@ +{ + "datasets": [ + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "less_than_1_str", + "source": "spark:csv:less_than_1.csv" + } + ], + "structures": [ + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "less_than_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "le", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "le", + "domain": "boolean" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lesser/lesser-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lesser/lesser-spark.json new file mode 100644 index 000000000..bd95c69f0 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lesser/lesser-spark.json @@ -0,0 +1,81 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "lesser_str", + "source": "spark:csv:lesser.csv" + } + ], + "structures": [ + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "lesser_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + } ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lessereq/lessereq-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lessereq/lessereq-spark.json new file mode 100644 index 000000000..14d821f73 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/lessereq/lessereq-spark.json @@ -0,0 +1,82 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "lessereq_str", + "source": "spark:csv:lessereq.csv" + } + ], + "structures": [ + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "lessereq_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln/ln-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln/ln-spark.json new file mode 100644 index 000000000..c52150ebc --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln/ln-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "ln_str", + "source": "spark:csv:ln.csv" + } + ], + "structures": [ + { + "name": "ln_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_1/ln_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_1/ln_1-spark.json new file mode 100644 index 000000000..818b0e123 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_1/ln_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "ln_1_str", + "source": "spark:csv:ln_1.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "ln_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_2/ln_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_2/ln_2-spark.json new file mode 100644 index 000000000..98ef8f78d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ln_2/ln_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "ln_2_str", + "source": "spark:csv:ln_2.csv" + } + ], + "structures": [ + { + "name": "ln_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "ln_3", + "role": "Measure" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "ln_3", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_1/log_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_1/log_1-spark.json new file mode 100644 index 000000000..88bfc9dd4 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_1/log_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "log_1_str", + "source": "spark:csv:log_1.csv" + } + ], + "structures": [ + { + "name": "log_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_2/log_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_2/log_2-spark.json new file mode 100644 index 000000000..75bfcd739 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/log_2/log_2-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "log_2_str", + "source": "spark:csv:log_2.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "log_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "log_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + }, + { + "name": "log_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "log_1", + "domain": "number" + }, + { + "name": "num_1", + "domain": "number" + }, + { + "name": "log_2", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_1/match_characters_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_1/match_characters_1-spark.json new file mode 100644 index 000000000..5cb21ba52 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_1/match_characters_1-spark.json @@ -0,0 +1,68 @@ +{ + "datasets": [ + { + "name": "match_characters_test", + "source": "spark:csv:match_characters_test.csv", + "structure": "match_characters_test_str" + }, + { + "name": "expected", + "structure": "match_characters_1_str", + "source": "spark:csv:match_characters_1.csv" + } + ], + "structures": [ + { + "name": "match_characters_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "match_characters_1_str", + "components": [ + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_2/match_characters_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_2/match_characters_2-spark.json new file mode 100644 index 000000000..39ebaa2c2 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/match_characters_2/match_characters_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "match_characters_test", + "source": "spark:csv:match_characters_test.csv", + "structure": "match_characters_test_str" + }, + { + "name": "expected", + "structure": "match_characters_2_str", + "source": "spark:csv:match_characters_2.csv" + } + ], + "structures": [ + { + "name": "match_characters_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "match_characters_2_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "is_isin", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "is_isin", + "domain": "boolean" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/max/max-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/max/max-spark.json new file mode 100644 index 000000000..068c2659a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/max/max-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "max_str", + "source": "spark:csv:max.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "max_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/median/median-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/median/median-spark.json new file mode 100644 index 000000000..91714b2b2 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/median/median-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "median_1_str", + "source": "spark:csv:median.csv" + } + ], + "structures": [ + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "median_1_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "integer" + }, + { + "name": "me_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/membership_1/membership_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/membership_1/membership_1-spark.json new file mode 100644 index 000000000..9cd54543b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/membership_1/membership_1-spark.json @@ -0,0 +1,104 @@ +{ + "datasets": [ + { + "name": "gen_operators_test1", + "source": "spark:csv:gen_operators_test1.csv", + "structure": "gen_operators_test1_str" + }, + { + "name": "expected", + "structure": "membership_1_str", + "source": "spark:csv:membership_1.csv" + } + ], + "structures": [ + { + "name": "gen_operators_test1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "membership_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "string_var", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + }, + { + "name": "string_var", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/min/min-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/min/min-spark.json new file mode 100644 index 000000000..cfd287e3a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/min/min-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "min_str", + "source": "spark:csv:min.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "min_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/minus/minus-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/minus/minus-spark.json new file mode 100644 index 000000000..d6d80661f --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/minus/minus-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "minus_str", + "source": "spark:csv:minus.csv" + } + ], + "structures": [ + { + "name": "minus_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod/mod-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod/mod-spark.json new file mode 100644 index 000000000..9582870ea --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod/mod-spark.json @@ -0,0 +1,78 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "number_test2", + "source": "spark:csv:number_test2.csv", + "structure": "number_test2_str" + }, + { + "name": "expected", + "structure": "mod_str", + "source": "spark:csv:mod.csv" + } + ], + "structures": [ + { + "name": "mod_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "number_test2_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_1/mod_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_1/mod_1-spark.json new file mode 100644 index 000000000..322bb6420 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_1/mod_1-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "mod_test", + "source": "spark:csv:mod_test.csv", + "structure": "mod_test_str" + }, + { + "name": "expected", + "structure": "mod_1_str", + "source": "spark:csv:mod_1.csv" + } + ], + "structures": [ + { + "name": "mod_test_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "mod_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "integer" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_2/mod_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_2/mod_2-spark.json new file mode 100644 index 000000000..7f48de3fd --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mod_2/mod_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "mod_test", + "source": "spark:csv:mod_test.csv", + "structure": "mod_test_str" + }, + { + "name": "expected", + "structure": "mod_2_str", + "source": "spark:csv:mod_2.csv" + } + ], + "structures": [ + { + "name": "mod_test_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "mod_2_str", + "components": [ + { + "name": "mod_1", + "role": "Measure" + }, + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "integer" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "mod_1", + "domain": "integer" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mult/mult-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mult/mult-spark.json new file mode 100644 index 000000000..59ad541ad --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/mult/mult-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "mult_str", + "source": "spark:csv:mult.csv" + } + ], + "structures": [ + { + "name": "mult_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_1/not_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_1/not_1-spark.json new file mode 100644 index 000000000..35482e93b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_1/not_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "not_1_str", + "source": "spark:csv:not_1.csv" + } + ], + "structures": [ + { + "name": "not_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_2/not_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_2/not_2-spark.json new file mode 100644 index 000000000..46a8649d9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_2/not_2-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "not_2_str", + "source": "spark:csv:not_2.csv" + } + ], + "structures": [ + { + "name": "not_2_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_not", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "me_not", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_1/not_equal_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_1/not_equal_1-spark.json new file mode 100644 index 000000000..106206cae --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_1/not_equal_1-spark.json @@ -0,0 +1,152 @@ +{ + "datasets": [ + { + "name": "neq_1", + "source": "spark:csv:neq_1.csv", + "structure": "neq_1_str" + }, + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "not_equal_1_str", + "source": "spark:csv:not_equal_1.csv" + } + ], + "structures": [ + { + "name": "not_equal_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "neq_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_2/not_equal_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_2/not_equal_2-spark.json new file mode 100644 index 000000000..2fd05f1ab --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_equal_2/not_equal_2-spark.json @@ -0,0 +1,130 @@ +{ + "datasets": [ + { + "name": "dataset_comparison_test2", + "source": "spark:csv:dataset_comparison_test2.csv", + "structure": "dataset_comparison_test2_str" + }, + { + "name": "dataset_comparison_test1", + "source": "spark:csv:dataset_comparison_test1.csv", + "structure": "dataset_comparison_test1_str" + }, + { + "name": "expected", + "structure": "not_equal_2_str", + "source": "spark:csv:not_equal_2.csv" + } + ], + "structures": [ + { + "name": "not_equal_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "neq", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "dataset_comparison_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "neq", + "domain": "boolean" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_xor/not_xor-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_xor/not_xor-spark.json new file mode 100644 index 000000000..5398dc833 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/not_xor/not_xor-spark.json @@ -0,0 +1,124 @@ +{ + "datasets": [ + { + "name": "boolean_test6", + "source": "spark:csv:boolean_test6.csv", + "structure": "boolean_test6_str" + }, + { + "name": "expected", + "structure": "not_xor_str", + "source": "spark:csv:not_xor.csv" + } + ], + "structures": [ + { + "name": "not_xor_str", + "components": [ + { + "name": "bool_not", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "bool_xor", + "role": "Measure" + } + ] + }, + { + "name": "boolean_test6_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "bool_not", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "me_2", + "domain": "boolean" + }, + { + "name": "bool_xor", + "domain": "boolean" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/notequal/notequal-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/notequal/notequal-spark.json new file mode 100644 index 000000000..41f256c0d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/notequal/notequal-spark.json @@ -0,0 +1,82 @@ +{ + "datasets": [ + { + "name": "number_test4", + "source": "spark:csv:number_test4.csv", + "structure": "number_test4_str" + }, + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "notequal_str", + "source": "spark:csv:notequal.csv" + } + ], + "structures": [ + { + "name": "number_test4_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "notequal_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "bool_var", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_1/nvl_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_1/nvl_1-spark.json new file mode 100644 index 000000000..b04a0c184 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_1/nvl_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "nvl_test1", + "source": "spark:csv:nvl_test1.csv", + "structure": "nvl_test1_str" + }, + { + "name": "expected", + "structure": "nvl_1_str", + "source": "spark:csv:nvl_1.csv" + } + ], + "structures": [ + { + "name": "nvl_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "nvl_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_2/nvl_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_2/nvl_2-spark.json new file mode 100644 index 000000000..eb52bc4e0 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_2/nvl_2-spark.json @@ -0,0 +1,122 @@ +{ + "datasets": [ + { + "name": "nvl_test1", + "source": "spark:csv:nvl_test1.csv", + "structure": "nvl_test1_str" + }, + { + "name": "nvl_test2", + "source": "spark:csv:nvl_test2.csv", + "structure": "nvl_test2_str" + }, + { + "name": "expected", + "structure": "nvl_2_str", + "source": "spark:csv:nvl_2.csv" + } + ], + "structures": [ + { + "name": "nvl_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "nvl_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "nvl_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_3/nvl_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_3/nvl_3-spark.json new file mode 100644 index 000000000..7aff1bba9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/nvl_3/nvl_3-spark.json @@ -0,0 +1,104 @@ +{ + "datasets": [ + { + "name": "nvl_test1", + "source": "spark:csv:nvl_test1.csv", + "structure": "nvl_test1_str" + }, + { + "name": "expected", + "structure": "nvl_3_str", + "source": "spark:csv:nvl_3.csv" + } + ], + "structures": [ + { + "name": "nvl_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "nvl_3_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "nullrepl", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "nullrepl", + "domain": "number" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_4", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/op_with_calculated_scalar/op_with_calculated_scalar-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/op_with_calculated_scalar/op_with_calculated_scalar-spark.json new file mode 100644 index 000000000..9d6fe5c0b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/op_with_calculated_scalar/op_with_calculated_scalar-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "op_with_calculated_scalar_str", + "source": "spark:csv:op_with_calculated_scalar.csv" + } + ], + "structures": [ + { + "name": "op_with_calculated_scalar_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_1/or_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_1/or_1-spark.json new file mode 100644 index 000000000..03e1d2dd2 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_1/or_1-spark.json @@ -0,0 +1,122 @@ +{ + "datasets": [ + { + "name": "boolean_test5", + "source": "spark:csv:boolean_test5.csv", + "structure": "boolean_test5_str" + }, + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "or_1_str", + "source": "spark:csv:or_1.csv" + } + ], + "structures": [ + { + "name": "or_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test5_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_2/or_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_2/or_2-spark.json new file mode 100644 index 000000000..90d3dafa4 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/or_2/or_2-spark.json @@ -0,0 +1,112 @@ +{ + "datasets": [ + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "or_2_str", + "source": "spark:csv:or_2.csv" + } + ], + "structures": [ + { + "name": "or_2_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "orf", + "role": "Measure" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "ort", + "role": "Measure" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "ort", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "orf", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_1/parentheses_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_1/parentheses_1-spark.json new file mode 100644 index 000000000..db7aac5b8 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_1/parentheses_1-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "gen_operators_test1", + "source": "spark:csv:gen_operators_test1.csv", + "structure": "gen_operators_test1_str" + }, + { + "name": "expected", + "structure": "parentheses_1_str", + "source": "spark:csv:parentheses_1.csv" + } + ], + "structures": [ + { + "name": "gen_operators_test1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "parentheses_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "num_2", + "domain": "number" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_2/parentheses_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_2/parentheses_2-spark.json new file mode 100644 index 000000000..abe2ee1e1 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/parentheses_2/parentheses_2-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "gen_operators_test1", + "source": "spark:csv:gen_operators_test1.csv", + "structure": "gen_operators_test1_str" + }, + { + "name": "expected", + "structure": "parentheses_2_str", + "source": "spark:csv:parentheses_2.csv" + } + ], + "structures": [ + { + "name": "gen_operators_test1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "parentheses_2_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "integer" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_1/pattern_location_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_1/pattern_location_1-spark.json new file mode 100644 index 000000000..e431df57d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_1/pattern_location_1-spark.json @@ -0,0 +1,84 @@ +{ + "datasets": [ + { + "name": "pattern_test", + "source": "spark:csv:pattern_test.csv", + "structure": "pattern_test_str" + }, + { + "name": "expected", + "structure": "pattern_location_1_str", + "source": "spark:csv:pattern_location_1.csv" + } + ], + "structures": [ + { + "name": "pattern_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "pattern_location_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "integer_var", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "integer_var", + "domain": "integer" + }, + { + "name": "id_3", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_2/pattern_location_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_2/pattern_location_2-spark.json new file mode 100644 index 000000000..98440fc19 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_location_2/pattern_location_2-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "pattern_test", + "source": "spark:csv:pattern_test.csv", + "structure": "pattern_test_str" + }, + { + "name": "expected", + "structure": "pattern_location_2_str", + "source": "spark:csv:pattern_location_2.csv" + } + ], + "structures": [ + { + "name": "pattern_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "pattern_location_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "a_location", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "a_location", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_1/pattern_replacement_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_1/pattern_replacement_1-spark.json new file mode 100644 index 000000000..4ab2adc36 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_1/pattern_replacement_1-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "substring_test", + "source": "spark:csv:substring_test.csv", + "structure": "substring_test_str" + }, + { + "name": "expected", + "structure": "pattern_replacement_1_str", + "source": "spark:csv:pattern_replacement_1.csv" + } + ], + "structures": [ + { + "name": "substring_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "pattern_replacement_1_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2-spark.json new file mode 100644 index 000000000..07917691a --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "substring_test", + "source": "spark:csv:substring_test.csv", + "structure": "substring_test_str" + }, + { + "name": "expected", + "structure": "pattern_replacement_2_str", + "source": "spark:csv:pattern_replacement_2.csv" + } + ], + "structures": [ + { + "name": "substring_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "pattern_replacement_2_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2.csv index 05bf6821c..925f079f7 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/pattern_replacement_2.csv @@ -1,9 +1,9 @@ id_1,id_2,text_1 -2001-12-14,B,"""-L-o-r-e-m- -i-p-s-u-m- -d-o-l-o-r- -s-i-t- -a-m-e-t-,- -c-o-n-s-e-c-t-e-t-u-r- -a-d-i-p-i-s-c-i-n-g- -e-l-i-t-.- -P-h-a-s-e-l-l-u-s- -a- -v-u-l-p-u-t-a-t-e- -u-r-n-a-.- -I-n- -f-e-r-m-e-n-t-u-m- -d-u-i- -e-g-e-t- -n-i-s-i- -p-l-a-c-e-r-a-t-,- -e-t-.-""" -2001-12-15,B,"""-N-u-l-l-a-m- -h-e-n-d-r-e-r-i-t- -m-o-l-e-s-t-i-e- -c-o-n-g-u-e-.- -U-t- -l-o-r-e-m- -l-o-r-e-m-,- -s-u-s-c-i-p-i-t- -q-u-i-s- -f-r-i-n-g-i-l-l-a- -e-t-,- -t-e-m-p-u-s- -r-u-t-r-u-m- -e-x-.- -E-t-i-a-m- -s-c-e-l-e-r-i-s-q-u-e- -d-i-a-m- -u-t- -s-a-p-i-e-n-.-""" -2001-12-14,A,"""-D-o-n-e-c- -v-e-n-e-n-a-t-i-s- -c-o-n-d-i-m-e-n-t-u-m- -j-u-s-t-o-,- -n-o-n- -i-a-c-u-l-i-s- -e-r-a-t- -f-i-n-i-b-u-s- -q-u-i-s-.- -A-l-i-q-u-a-m- -e-u-i-s-m-o-d-,- -l-a-c-u-s- -e-g-e-t- -l-a-c-i-n-i-a- -v-e-h-i-c-u-l-a-,- -n-u-l-l-a- -d-o-l-o-r- -d-a-p-i-b-u-s- -e-n-i-m-.-""" -2001-12-15,A,"""-N-u-n-c- -m-o-l-l-i-s- -v-i-t-a-e- -n-i-s-i- -v-a-r-i-u-s- -t-i-n-c-i-d-u-n-t-.- -V-i-v-a-m-u-s- -u-t- -v-e-l-i-t- -l-a-c-u-s-.- -M-a-u-r-i-s- -e-s-t- -u-r-n-a-,- -v-o-l-u-t-p-a-t- -a- -t-i-n-c-i-d-u-n-t- -s-e-d-,- -c-o-n-g-u-e- -u-t-.-""" -2001-12-14,D,"""-""" -2001-12-15,D,"""-V-i-v-a-m-u-s- -d-u-i- -a-r-c-u-,- -b-i-b-e-n-d-u-m- -n-o-n- -m-a-u-r-i-s- -n-e-c-,- -v-e-n-e-n-a-t-i-s- -e-g-e-s-t-a-s- -l-i-b-e-r-o-.- -P-r-o-i-n- -u-l-t-r-i-c-e-s- -l-e-c-t-u-s- -m-o-l-e-s-t-i-e- -p-u-r-u-s- -g-r-a-v-i-d-a-,- -e-t- -b-i-b-e-n-d-u-m- -a-r-c-u-.-""" -2001-12-14,G,"""-A-l-i-q-u-a-m- -c-u-r-s-u-s-,- -t-e-l-l-u-s- -u-t- -p-o-r-t-a- -i-n-t-e-r-d-u-m-,- -m-i- -l-a-c-u-s- -l-u-c-t-u-s- -l-o-r-e-m-,- -e-g-e-t- -c-o-n-g-u-e- -n-e-q-u-e- -l-e-o- -e-t- -a-n-t-e-.- -I-n- -c-o-m-m-o-d-o- -p-r-e-t-i-u-m-.-""" -2001-12-15,G,"""-P-r-a-e-s-e-n-t- -v-e-l- -a-n-t-e- -a- -s-e-m- -e-l-e-i-f-e-n-d- -e-g-e-s-t-a-s-.- -""" +2001-12-14,B,"-L-o-r-e-m- -i-p-s-u-m- -d-o-l-o-r- -s-i-t- -a-m-e-t-,- -c-o-n-s-e-c-t-e-t-u-r- -a-d-i-p-i-s-c-i-n-g- -e-l-i-t-.- -P-h-a-s-e-l-l-u-s- -a- -v-u-l-p-u-t-a-t-e- -u-r-n-a-.- -I-n- -f-e-r-m-e-n-t-u-m- -d-u-i- -e-g-e-t- -n-i-s-i- -p-l-a-c-e-r-a-t-,- -e-t-.-" +2001-12-15,B,"-N-u-l-l-a-m- -h-e-n-d-r-e-r-i-t- -m-o-l-e-s-t-i-e- -c-o-n-g-u-e-.- -U-t- -l-o-r-e-m- -l-o-r-e-m-,- -s-u-s-c-i-p-i-t- -q-u-i-s- -f-r-i-n-g-i-l-l-a- -e-t-,- -t-e-m-p-u-s- -r-u-t-r-u-m- -e-x-.- -E-t-i-a-m- -s-c-e-l-e-r-i-s-q-u-e- -d-i-a-m- -u-t- -s-a-p-i-e-n-.-" +2001-12-14,A,"-D-o-n-e-c- -v-e-n-e-n-a-t-i-s- -c-o-n-d-i-m-e-n-t-u-m- -j-u-s-t-o-,- -n-o-n- -i-a-c-u-l-i-s- -e-r-a-t- -f-i-n-i-b-u-s- -q-u-i-s-.- -A-l-i-q-u-a-m- -e-u-i-s-m-o-d-,- -l-a-c-u-s- -e-g-e-t- -l-a-c-i-n-i-a- -v-e-h-i-c-u-l-a-,- -n-u-l-l-a- -d-o-l-o-r- -d-a-p-i-b-u-s- -e-n-i-m-.-" +2001-12-15,A,"-N-u-n-c- -m-o-l-l-i-s- -v-i-t-a-e- -n-i-s-i- -v-a-r-i-u-s- -t-i-n-c-i-d-u-n-t-.- -V-i-v-a-m-u-s- -u-t- -v-e-l-i-t- -l-a-c-u-s-.- -M-a-u-r-i-s- -e-s-t- -u-r-n-a-,- -v-o-l-u-t-p-a-t- -a- -t-i-n-c-i-d-u-n-t- -s-e-d-,- -c-o-n-g-u-e- -u-t-.-" +2001-12-14,D, +2001-12-15,D,"-V-i-v-a-m-u-s- -d-u-i- -a-r-c-u-,- -b-i-b-e-n-d-u-m- -n-o-n- -m-a-u-r-i-s- -n-e-c-,- -v-e-n-e-n-a-t-i-s- -e-g-e-s-t-a-s- -l-i-b-e-r-o-.- -P-r-o-i-n- -u-l-t-r-i-c-e-s- -l-e-c-t-u-s- -m-o-l-e-s-t-i-e- -p-u-r-u-s- -g-r-a-v-i-d-a-,- -e-t- -b-i-b-e-n-d-u-m- -a-r-c-u-.-" +2001-12-14,G,"-A-l-i-q-u-a-m- -c-u-r-s-u-s-,- -t-e-l-l-u-s- -u-t- -p-o-r-t-a- -i-n-t-e-r-d-u-m-,- -m-i- -l-a-c-u-s- -l-u-c-t-u-s- -l-o-r-e-m-,- -e-g-e-t- -c-o-n-g-u-e- -n-e-q-u-e- -l-e-o- -e-t- -a-n-t-e-.- -I-n- -c-o-m-m-o-d-o- -p-r-e-t-i-u-m-.-" +2001-12-15,G,"-P-r-a-e-s-e-n-t- -v-e-l- -a-n-t-e- -a- -s-e-m- -e-l-e-i-f-e-n-d- -e-g-e-s-t-a-s-.- -" diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/substring_test.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/substring_test.csv index 359fb614c..bd040ff67 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/substring_test.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_2/substring_test.csv @@ -1,9 +1,9 @@ id_1,id_2,num_1,text_1 -2001-12-14,B,2.3,"""Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus a vulputate urna. In fermentum dui eget nisi placerat, et.""" -2001-12-15,B,,"""Nullam hendrerit molestie congue. Ut lorem lorem, suscipit quis fringilla et, tempus rutrum ex. Etiam scelerisque diam ut sapien.""" -2001-12-14,A,5.443,"""Donec venenatis condimentum justo, non iaculis erat finibus quis. Aliquam euismod, lacus eget lacinia vehicula, nulla dolor dapibus enim.""" -2001-12-15,A,2,"""Nunc mollis vitae nisi varius tincidunt. Vivamus ut velit lacus. Mauris est urna, volutpat a tincidunt sed, congue ut.""" -2001-12-14,D,4,"""""" -2001-12-15,D,6,"""Vivamus dui arcu, bibendum non mauris nec, venenatis egestas libero. Proin ultrices lectus molestie purus gravida, et bibendum arcu.""" -2001-12-14,G,4,"""Aliquam cursus, tellus ut porta interdum, mi lacus luctus lorem, eget congue neque leo et ante. In commodo pretium.""" -2001-12-15,G,1.1,"""Praesent vel ante a sem eleifend egestas. """ +2001-12-14,B,2.3,"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus a vulputate urna. In fermentum dui eget nisi placerat, et." +2001-12-15,B,,"Nullam hendrerit molestie congue. Ut lorem lorem, suscipit quis fringilla et, tempus rutrum ex. Etiam scelerisque diam ut sapien." +2001-12-14,A,5.443,"Donec venenatis condimentum justo, non iaculis erat finibus quis. Aliquam euismod, lacus eget lacinia vehicula, nulla dolor dapibus enim." +2001-12-15,A,2,"Nunc mollis vitae nisi varius tincidunt. Vivamus ut velit lacus. Mauris est urna, volutpat a tincidunt sed, congue ut." +2001-12-14,D,4,"" +2001-12-15,D,6,"Vivamus dui arcu, bibendum non mauris nec, venenatis egestas libero. Proin ultrices lectus molestie purus gravida, et bibendum arcu." +2001-12-14,G,4,"Aliquam cursus, tellus ut porta interdum, mi lacus luctus lorem, eget congue neque leo et ante. In commodo pretium." +2001-12-15,G,1.1,"Praesent vel ante a sem eleifend egestas. " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_3/pattern_replacement_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_3/pattern_replacement_3-spark.json new file mode 100644 index 000000000..56a2ab94b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pattern_replacement_3/pattern_replacement_3-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "substring_test", + "source": "spark:csv:substring_test.csv", + "structure": "substring_test_str" + }, + { + "name": "expected", + "structure": "pattern_replacement_3_str", + "source": "spark:csv:pattern_replacement_3.csv" + } + ], + "structures": [ + { + "name": "pattern_replacement_3_str", + "components": [ + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "substring_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "num_1", + "domain": "number" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/plus/plus-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/plus/plus-spark.json new file mode 100644 index 000000000..3caee9e67 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/plus/plus-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "plus_str", + "source": "spark:csv:plus.csv" + } + ], + "structures": [ + { + "name": "plus_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pow/pow-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pow/pow-spark.json new file mode 100644 index 000000000..9719b2f1b --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/pow/pow-spark.json @@ -0,0 +1,51 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "pow_str" + }, + { + "name": "expected", + "structure": "pow_str", + "source": "spark:csv:pow.csv" + } + ], + "structures": [ + { + "name": "pow_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_1/power_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_1/power_1-spark.json new file mode 100644 index 000000000..57a33b790 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_1/power_1-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:power_1_in.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "power_1_str", + "source": "spark:csv:power_1.csv" + } + ], + "structures": [ + { + "name": "power_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_2/power_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_2/power_2-spark.json new file mode 100644 index 000000000..bc4addb05 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/power_2/power_2-spark.json @@ -0,0 +1,80 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:power_2_in.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "power_2_str", + "source": "spark:csv:power_2.csv" + } + ], + "structures": [ + { + "name": "power_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "pow_1", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + }, + { + "name": "pow_2", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "pow_1", + "domain": "integer" + }, + { + "name": "pow_2", + "domain": "number" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rank_1/rank_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rank_1/rank_1-spark.json new file mode 100644 index 000000000..9f5517c44 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rank_1/rank_1-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "rank_data", + "source": "spark:csv:rank.csv", + "structure": "rank_str" + }, + { + "name": "expected", + "structure": "rank_1_str", + "source": "spark:csv:rank_1.csv" + } + ], + "structures": [ + { + "name": "rank_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "me_3", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "rank_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "me_3", + "domain": "integer" + }, + { + "name": "me_2", + "domain": "integer" + }, + { + "name": "me_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_1/ratio_to_report_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_1/ratio_to_report_1-spark.json new file mode 100644 index 000000000..57c1796ed --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_1/ratio_to_report_1-spark.json @@ -0,0 +1,96 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "ratio_to_report_1_str", + "source": "spark:csv:ratio_to_report_1.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "ratio_to_report_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_2/ratio_to_report_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_2/ratio_to_report_2-spark.json new file mode 100644 index 000000000..4a273a962 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/ratio_to_report_2/ratio_to_report_2-spark.json @@ -0,0 +1,108 @@ +{ + "datasets": [ + { + "name": "ratiotoreport", + "source": "spark:csv:ratiotoreport.csv", + "structure": "ratiotoreport_str" + }, + { + "name": "expected", + "structure": "ratio_to_report_2_str", + "source": "spark:csv:ratio_to_report_2.csv" + } + ], + "structures": [ + { + "name": "ratiotoreport_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "ratio_to_report_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "me_3", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_3", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename/rename-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename/rename-spark.json new file mode 100644 index 000000000..9f30c3a85 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename/rename-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "rename_str", + "source": "spark:csv:rename.csv" + } + ], + "structures": [ + { + "name": "rename_str", + "components": [ + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m2", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "m2", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename_1/rename_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename_1/rename_1-spark.json new file mode 100644 index 000000000..789468905 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/rename_1/rename_1-spark.json @@ -0,0 +1,284 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "rename_str", + "source": "spark:csv:rename_1.csv" + } + ], + "structures": [ + { + "name": "rename_str", + "components": [ + { + "name": "me_4", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "me_3", + "role": "Measure" + }, + { + "name": "me_6", + "role": "Measure" + }, + { + "name": "me_5", + "role": "Measure" + }, + { + "name": "me_7", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res5", + "domain": "boolean" + }, + { + "name": "me_6", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "me_7", + "domain": "string" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "me_3", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "me_4", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "me_5", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + }, + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round/round-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round/round-spark.json new file mode 100644 index 000000000..223ca07e4 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round/round-spark.json @@ -0,0 +1,39 @@ +{ + "datasets": [ + { + "name": "number_test3", + "source": "spark:csv:number_test3.csv", + "structure": "round_str" + }, + { + "name": "expected", + "structure": "round_str", + "source": "spark:csv:round.csv" + } + ], + "structures": [ + { + "name": "round_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_1/round_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_1/round_1-spark.json new file mode 100644 index 000000000..c3b66b7a1 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_1/round_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "round_1_str", + "source": "spark:csv:round_1.csv" + } + ], + "structures": [ + { + "name": "round_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_2/round_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_2/round_2-spark.json new file mode 100644 index 000000000..56acee562 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/round_2/round_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "round_2_str", + "source": "spark:csv:round_2.csv" + } + ], + "structures": [ + { + "name": "round_2_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "round_1", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "round_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator1/self_defined_operator1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator1/self_defined_operator1-spark.json new file mode 100644 index 000000000..3e9e2f341 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator1/self_defined_operator1-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "number_test1_str" + }, + { + "name": "expected", + "structure": "self_defined_operator1_str", + "source": "spark:csv:self_defined_operator1.csv" + } + ], + "structures": [ + { + "name": "self_defined_operator1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + }, + { + "name": "number_test1_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test1.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test1.csv index c691b32ff..f62e2e033 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test1.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test1.csv @@ -1,9 +1,9 @@ id_1,id_2,id_3,text_1 -2012,B,Total,""" this is text""" -2011,B,Total,"""sample """ -2012,G,Total,"""third example""" -2011,G,Total,""" left space""" -2012,S,Total,"""right space """ -2011,S,Total,""" """ -2012,F,Total,""" 12-12-2001 """ -2011,F,Total,"""F """ +2012,B,Total," this is text" +2011,B,Total,"sample " +2012,G,Total,"third example" +2011,G,Total," left space" +2012,S,Total,"right space " +2011,S,Total," " +2012,F,Total," 12-12-2001 " +2011,F,Total,"F " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test2.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test2.csv index b57cfd8a4..e8a415db5 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test2.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/concat_test2.csv @@ -1,9 +1,9 @@ id_1,id_2,id_3,text_1 -2012,B,Total,"""abc def gh""" -2011,B,Total,""" comment!!""" -2012,G,Total,"""abc """ -2011,G,Total,"""""" +2012,B,Total,"abc def gh" +2011,B,Total," comment!!" +2012,G,Total,"abc " +2011,G,Total,"" 2012,S,Total, -2011,S,Total,"""nulls included above for testing """ -2012,F,Total,"""two spaces in between""" -2011,F,Total,"""123:) """ +2011,S,Total,"nulls included above for testing " +2012,F,Total,"two spaces in between" +2011,F,Total,"123:) " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2-spark.json new file mode 100644 index 000000000..2fb0ceadd --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2-spark.json @@ -0,0 +1,102 @@ +{ + "datasets": [ + { + "name": "concat_test2", + "source": "spark:csv:concat_test2.csv", + "structure": "concat_test2_str" + }, + { + "name": "concat_test1", + "source": "spark:csv:concat_test1.csv", + "structure": "concat_test1_str" + }, + { + "name": "expected", + "structure": "self_defined_operator2_str", + "source": "spark:csv:self_defined_operator2.csv" + } + ], + "structures": [ + { + "name": "self_defined_operator2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "concat_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "concat_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2.csv index 5608bd879..afb2bc459 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/self_defined_operator2/self_defined_operator2.csv @@ -3,7 +3,7 @@ id_1,id_2,id_3,text_1 2011,B,Total,"sample - comment!!" 2012,G,Total,"third example - abc " 2011,G,Total," left space - " -2012,S,Total, +2012,S,Total,"right space - " 2011,S,Total," - nulls included above for testing " 2012,F,Total," 12-12-2001 - two spaces in between" 2011,F,Total,"F - 123:) " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/set_difference_1/set_difference_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/set_difference_1/set_difference_1-spark.json new file mode 100644 index 000000000..20db7c34f --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/set_difference_1/set_difference_1-spark.json @@ -0,0 +1,146 @@ +{ + "datasets": [ + { + "name": "intersection_test1", + "source": "spark:csv:intersection_test1.csv", + "structure": "intersection_test1_str" + }, + { + "name": "intersection_test2", + "source": "spark:csv:intersection_test2.csv", + "structure": "intersection_test2_str" + }, + { + "name": "expected", + "structure": "set_difference_1_str", + "source": "spark:csv:set_difference_1.csv" + } + ], + "structures": [ + { + "name": "set_difference_1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + }, + { + "name": "intersection_test1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + }, + { + "name": "intersection_test2_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "quality_status", + "domain": "string" + }, + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf2", + "domain": "number" + }, + { + "name": "edf3", + "domain": "number" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt/sqrt-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt/sqrt-spark.json new file mode 100644 index 000000000..d9d03b88d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt/sqrt-spark.json @@ -0,0 +1,51 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "sqrt_str" + }, + { + "name": "expected", + "structure": "sqrt_str", + "source": "spark:csv:sqrt.csv" + } + ], + "structures": [ + { + "name": "sqrt_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_1/sqrt_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_1/sqrt_1-spark.json new file mode 100644 index 000000000..1ff29501d --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_1/sqrt_1-spark.json @@ -0,0 +1,64 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "sqrt_1_str", + "source": "spark:csv:sqrt_1.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "sqrt_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_2/sqrt_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_2/sqrt_2-spark.json new file mode 100644 index 000000000..ee9329395 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sqrt_2/sqrt_2-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "sqrt_2_str", + "source": "spark:csv:sqrt_2.csv" + } + ], + "structures": [ + { + "name": "sqrt_2_str", + "components": [ + { + "name": "sqrt_3", + "role": "Measure" + }, + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "integer" + }, + { + "name": "sqrt_3", + "domain": "number" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_pop/standard_deviation_pop-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_pop/standard_deviation_pop-spark.json new file mode 100644 index 000000000..fd66aaafe --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_pop/standard_deviation_pop-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "standard_deviation_pop_str", + "source": "spark:csv:standard_deviation_pop.csv" + } + ], + "structures": [ + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "standard_deviation_pop_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_samp/standard_deviation_samp-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_samp/standard_deviation_samp-spark.json new file mode 100644 index 000000000..512645d89 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/standard_deviation_samp/standard_deviation_samp-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "stdev", + "source": "spark:csv:stdev.csv", + "structure": "stdev_str" + }, + { + "name": "expected", + "structure": "standard_deviation_samp_str", + "source": "spark:csv:standard_deviation_samp.csv" + } + ], + "structures": [ + { + "name": "standard_deviation_samp_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "stdev_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_1/stock_to_flow_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_1/stock_to_flow_1-spark.json new file mode 100644 index 000000000..d966d52dc --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_1/stock_to_flow_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "stock_daily_test", + "source": "spark:csv:stock_daily_test.csv", + "structure": "stock_daily_test_str" + }, + { + "name": "expected", + "structure": "stock_to_flow_1_str", + "source": "spark:csv:stock_to_flow_1.csv" + } + ], + "structures": [ + { + "name": "stock_daily_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + }, + { + "name": "stock_to_flow_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_daily", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_2/stock_to_flow_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_2/stock_to_flow_2-spark.json new file mode 100644 index 000000000..48b756884 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/stock_to_flow_2/stock_to_flow_2-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "stock_annual_test", + "source": "spark:csv:stock_annual_test.csv", + "structure": "stock_annual_test_str" + }, + { + "name": "expected", + "structure": "stock_to_flow_2_str", + "source": "spark:csv:stock_to_flow_2.csv" + } + ], + "structures": [ + { + "name": "stock_annual_test_str", + "components": [ + { + "name": "id_annual", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "stock_to_flow_2_str", + "components": [ + { + "name": "id_annual", + "role": "Identifier" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_annual", + "domain": "date" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_1/string_concatenation_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_1/string_concatenation_1-spark.json new file mode 100644 index 000000000..7f276a3e2 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_1/string_concatenation_1-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_concatenation_1_str", + "source": "spark:csv:string_concatenation_1.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "string_concatenation_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_3", + "role": "Measure" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "number" + }, + { + "name": "text_3", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2-spark.json new file mode 100644 index 000000000..462594841 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_concatenation_2_str", + "source": "spark:csv:string_concatenation_2.csv" + } + ], + "structures": [ + { + "name": "string_concatenation_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_3", + "role": "Measure" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_3", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2.csv index 9e774671a..a49cd42f5 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_2/string_concatenation_2.csv @@ -2,8 +2,8 @@ id_1,id_2,id_3,text_1,text_2,text_3 2012,B,Total," this is text","ABC def gh"," this is textABC def gh" 2011,B,Total,"sample "," comment!!","sample comment!!", 2012,G,Total,"third example","abc ","third exampleabc " -2011,G,Total," left space",, -2012,S,Total,"right space ",, +2011,G,Total," left space",," left space" +2012,S,Total,"right space ",,"right space " 2011,S,Total," ","NULLS included above for testing "," NULLS included above for testing " 2012,F,Total," 12-12-2001 ","two spaces in between"," 12-12-2001 two spaces in between" 2011,F,Total,"F ","123:) Aa ","F 123:) Aa " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test1.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test1.csv index c691b32ff..f62e2e033 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test1.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test1.csv @@ -1,9 +1,9 @@ id_1,id_2,id_3,text_1 -2012,B,Total,""" this is text""" -2011,B,Total,"""sample """ -2012,G,Total,"""third example""" -2011,G,Total,""" left space""" -2012,S,Total,"""right space """ -2011,S,Total,""" """ -2012,F,Total,""" 12-12-2001 """ -2011,F,Total,"""F """ +2012,B,Total," this is text" +2011,B,Total,"sample " +2012,G,Total,"third example" +2011,G,Total," left space" +2012,S,Total,"right space " +2011,S,Total," " +2012,F,Total," 12-12-2001 " +2011,F,Total,"F " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test2.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test2.csv index b57cfd8a4..384240a00 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test2.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/concat_test2.csv @@ -1,9 +1,9 @@ id_1,id_2,id_3,text_1 -2012,B,Total,"""abc def gh""" -2011,B,Total,""" comment!!""" -2012,G,Total,"""abc """ -2011,G,Total,"""""" +2012,B,Total,"abc def gh" +2011,B,Total," comment!!" +2012,G,Total,"abc " +2011,G,Total, 2012,S,Total, -2011,S,Total,"""nulls included above for testing """ -2012,F,Total,"""two spaces in between""" -2011,F,Total,"""123:) """ +2011,S,Total,"nulls included above for testing " +2012,F,Total,"two spaces in between" +2011,F,Total,"123:) " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3-spark.json new file mode 100644 index 000000000..7e7dca07f --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3-spark.json @@ -0,0 +1,102 @@ +{ + "datasets": [ + { + "name": "concat_test2", + "source": "spark:csv:concat_test2.csv", + "structure": "concat_test2_str" + }, + { + "name": "concat_test1", + "source": "spark:csv:concat_test1.csv", + "structure": "concat_test1_str" + }, + { + "name": "expected", + "structure": "string_concatenation_3_str", + "source": "spark:csv:string_concatenation_3.csv" + } + ], + "structures": [ + { + "name": "string_concatenation_3_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "concat_test2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "concat_test1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_1", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3.csv b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3.csv index 5608bd879..afb2bc459 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3.csv +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_concatenation_3/string_concatenation_3.csv @@ -3,7 +3,7 @@ id_1,id_2,id_3,text_1 2011,B,Total,"sample - comment!!" 2012,G,Total,"third example - abc " 2011,G,Total," left space - " -2012,S,Total, +2012,S,Total,"right space - " 2011,S,Total," - nulls included above for testing " 2012,F,Total," 12-12-2001 - two spaces in between" 2011,F,Total,"F - 123:) " diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_1/string_trim_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_1/string_trim_1-spark.json new file mode 100644 index 000000000..6774356a5 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_1/string_trim_1-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_trim_1_str", + "source": "spark:csv:string_trim_1.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "string_trim_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_2/string_trim_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_2/string_trim_2-spark.json new file mode 100644 index 000000000..ccb6bc78c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_2/string_trim_2-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_trim_2_str", + "source": "spark:csv:string_trim_2.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "string_trim_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_3/string_trim_3-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_3/string_trim_3-spark.json new file mode 100644 index 000000000..7a9e608b9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_3/string_trim_3-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_trim_3_str", + "source": "spark:csv:string_trim_3.csv" + } + ], + "structures": [ + { + "name": "string_trim_3_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_4/string_trim_4-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_4/string_trim_4-spark.json new file mode 100644 index 000000000..99d8c45db --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/string_trim_4/string_trim_4-spark.json @@ -0,0 +1,104 @@ +{ + "datasets": [ + { + "name": "trim_test", + "source": "spark:csv:trim_test.csv", + "structure": "trim_test_str" + }, + { + "name": "expected", + "structure": "string_trim_4_str", + "source": "spark:csv:string_trim_4.csv" + } + ], + "structures": [ + { + "name": "trim_test_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "string_trim_4_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "text_3", + "role": "Measure" + }, + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "text_4", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_2", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_1", + "domain": "integer" + }, + { + "name": "text_3", + "domain": "string" + }, + { + "name": "text_4", + "domain": "string" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub/sub-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub/sub-spark.json new file mode 100644 index 000000000..99653d658 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub/sub-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test5", + "source": "spark:csv:number_test5.csv", + "structure": "number_test5_str" + }, + { + "name": "expected", + "structure": "sub_str", + "source": "spark:csv:sub.csv" + } + ], + "structures": [ + { + "name": "number_test5_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "sub_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub_1/sub_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub_1/sub_1-spark.json new file mode 100644 index 000000000..167ed2842 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sub_1/sub_1-spark.json @@ -0,0 +1,248 @@ +{ + "datasets": [ + { + "name": "ecbexrusd_vtl", + "source": "spark:csv:ecbexrusd_vtl.csv", + "structure": "ecbexrusd_vtl_str" + }, + { + "name": "expected", + "structure": "sub_str", + "source": "spark:csv:sub_1.csv" + } + ], + "structures": [ + { + "name": "sub_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + }, + { + "name": "ecbexrusd_vtl_str", + "components": [ + { + "name": "exr_suffix", + "role": "Measure" + }, + { + "name": "time_format", + "role": "Attribute" + }, + { + "name": "freq", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id", + "role": "Attribute" + }, + { + "name": "title", + "role": "Attribute" + }, + { + "name": "unit_mult", + "role": "Attribute" + }, + { + "name": "year", + "role": "Identifier" + }, + { + "name": "unit", + "role": "Attribute" + }, + { + "name": "currency", + "role": "Measure" + }, + { + "name": "currency_denom", + "role": "Measure" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "source_agency", + "role": "Attribute" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "exr_type", + "role": "Measure" + }, + { + "name": "decimals", + "role": "Attribute" + }, + { + "name": "collection", + "role": "Attribute" + }, + { + "name": "obs_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "unit", + "domain": "string" + }, + { + "name": "decimals", + "domain": "string" + }, + { + "name": "currency_denom", + "domain": "string" + }, + { + "name": "source_agency", + "domain": "string" + }, + { + "name": "exr_suffix", + "domain": "string" + }, + { + "name": "collection", + "domain": "string" + }, + { + "name": "title", + "domain": "string" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "freq", + "domain": "string" + }, + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id", + "domain": "string" + }, + { + "name": "exr_type", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "time_format", + "domain": "string" + }, + { + "name": "obs_status", + "domain": "string" + }, + { + "name": "year", + "domain": "number" + }, + { + "name": "unit_mult", + "domain": "string" + }, + { + "name": "currency", + "domain": "string" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_1/substring_extraction_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_1/substring_extraction_1-spark.json new file mode 100644 index 000000000..606351905 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_1/substring_extraction_1-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "substring_test", + "source": "spark:csv:substring_test.csv", + "structure": "substring_test_str" + }, + { + "name": "expected", + "structure": "substring_extraction_1_str", + "source": "spark:csv:substring_extraction_1.csv" + } + ], + "structures": [ + { + "name": "substring_extraction_1_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + } + ] + }, + { + "name": "substring_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_2/substring_extraction_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_2/substring_extraction_2-spark.json new file mode 100644 index 000000000..7fb2f216c --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/substring_extraction_2/substring_extraction_2-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "substring_test", + "source": "spark:csv:substring_test.csv", + "structure": "substring_test_str" + }, + { + "name": "expected", + "structure": "substring_extraction_2_str", + "source": "spark:csv:substring_extraction_2.csv" + } + ], + "structures": [ + { + "name": "substring_extraction_2_str", + "components": [ + { + "name": "text_2", + "role": "Measure" + }, + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "substring_test_str", + "components": [ + { + "name": "text_1", + "role": "Measure" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "text_1", + "domain": "string" + }, + { + "name": "num_1", + "domain": "number" + }, + { + "name": "text_2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum/sum-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum/sum-spark.json new file mode 100644 index 000000000..ccf761505 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum/sum-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "number_test5", + "source": "spark:csv:number_test5.csv", + "structure": "number_test5_str" + }, + { + "name": "expected", + "structure": "sum_str", + "source": "spark:csv:sum.csv" + } + ], + "structures": [ + { + "name": "number_test5_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + }, + { + "name": "sum_str", + "components": [ + { + "name": "id2", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "m2", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + }, + { + "name": "id2", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum_1/sum_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum_1/sum_1-spark.json new file mode 100644 index 000000000..7f756ea70 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/sum_1/sum_1-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "sum_str", + "source": "spark:csv:sum_1.csv" + } + ], + "structures": [ + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "sum_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/symmetric_difference_1/symmetric_difference_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/symmetric_difference_1/symmetric_difference_1-spark.json new file mode 100644 index 000000000..0f5b8b458 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/symmetric_difference_1/symmetric_difference_1-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "intersection_test1", + "source": "spark:csv:intersection_test1.csv", + "structure": "symmetric_difference_1_str" + }, + { + "name": "intersection_test2", + "source": "spark:csv:intersection_test2.csv", + "structure": "symmetric_difference_1_str" + }, + { + "name": "expected", + "structure": "symmetric_difference_1_str", + "source": "spark:csv:symmetric_difference_1.csv" + } + ], + "structures": [ + { + "name": "symmetric_difference_1_str", + "components": [ + { + "name": "edfdate", + "role": "Identifier" + }, + { + "name": "edf2", + "role": "Measure" + }, + { + "name": "mkmvid", + "role": "Identifier" + }, + { + "name": "edf1", + "role": "Measure" + }, + { + "name": "edf3", + "role": "Measure" + }, + { + "name": "quality_status", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "quality_status", + "domain": "string" + }, + { + "name": "edfdate", + "domain": "date" + }, + { + "name": "edf2", + "domain": "number" + }, + { + "name": "edf3", + "domain": "number" + }, + { + "name": "edf1", + "domain": "number" + }, + { + "name": "mkmvid", + "domain": "string" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_1/timeshift_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_1/timeshift_1-spark.json new file mode 100644 index 000000000..bac8efb74 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_1/timeshift_1-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "stock_daily_test", + "source": "spark:csv:stock_daily_test.csv", + "structure": "stock_daily_test_str" + }, + { + "name": "expected", + "structure": "timeshift_1_str", + "source": "spark:csv:timeshift_1.csv" + } + ], + "structures": [ + { + "name": "stock_daily_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + }, + { + "name": "timeshift_1_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_daily", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_2/timeshift_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_2/timeshift_2-spark.json new file mode 100644 index 000000000..9992d69e4 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/timeshift_2/timeshift_2-spark.json @@ -0,0 +1,92 @@ +{ + "datasets": [ + { + "name": "stock_daily_test", + "source": "spark:csv:stock_daily_test.csv", + "structure": "stock_daily_test_str" + }, + { + "name": "expected", + "structure": "timeshift_2_str", + "source": "spark:csv:timeshift_2.csv" + } + ], + "structures": [ + { + "name": "stock_daily_test_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + }, + { + "name": "timeshift_2_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_daily", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "res4", + "domain": "boolean" + }, + { + "name": "res3", + "domain": "boolean" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "id_daily", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_1/trunc_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_1/trunc_1-spark.json new file mode 100644 index 000000000..6fb15a800 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_1/trunc_1-spark.json @@ -0,0 +1,72 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "trunc_1_str", + "source": "spark:csv:trunc_1.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "trunc_1_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_2/trunc_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_2/trunc_2-spark.json new file mode 100644 index 000000000..6ab889048 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/trunc_2/trunc_2-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "number_test7", + "source": "spark:csv:number_test7.csv", + "structure": "number_test7_str" + }, + { + "name": "expected", + "structure": "trunc_2_str", + "source": "spark:csv:trunc_2.csv" + } + ], + "structures": [ + { + "name": "number_test7_str", + "components": [ + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "trunc_2_str", + "components": [ + { + "name": "trun", + "role": "Measure" + }, + { + "name": "roun", + "role": "Measure" + }, + { + "name": "time_period", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "num_2", + "domain": "number" + }, + { + "name": "time_period", + "domain": "number" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "roun", + "domain": "number" + }, + { + "name": "num_1", + "domain": "number" + }, + { + "name": "trun", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uminus/uminus-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uminus/uminus-spark.json new file mode 100644 index 000000000..29f5485fd --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uminus/uminus-spark.json @@ -0,0 +1,51 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "uminus_str" + }, + { + "name": "expected", + "structure": "uminus_str", + "source": "spark:csv:uminus.csv" + } + ], + "structures": [ + { + "name": "uminus_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_1/union_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_1/union_1-spark.json new file mode 100644 index 000000000..73628e5c2 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_1/union_1-spark.json @@ -0,0 +1,110 @@ +{ + "datasets": [ + { + "name": "union_test_2", + "source": "spark:csv:union_test_2.csv", + "structure": "union_test_2_str" + }, + { + "name": "union_test_1", + "source": "spark:csv:union_test_1.csv", + "structure": "union_test_1_str" + }, + { + "name": "expected", + "structure": "union_1_str", + "source": "spark:csv:union_1.csv" + } + ], + "structures": [ + { + "name": "union_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "union_test_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "union_test_1_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "num_2", + "domain": "number" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "num_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_2/union_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_2/union_2-spark.json new file mode 100644 index 000000000..649558406 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/union_2/union_2-spark.json @@ -0,0 +1,76 @@ +{ + "datasets": [ + { + "name": "union_test_2", + "source": "spark:csv:union_test_2.csv", + "structure": "union_test_2_str" + }, + { + "name": "expected", + "structure": "union_2_str", + "source": "spark:csv:union_2.csv" + } + ], + "structures": [ + { + "name": "union_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + }, + { + "name": "union_test_2_str", + "components": [ + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "num_2", + "role": "Measure" + }, + { + "name": "num_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "date" + }, + { + "name": "num_2", + "domain": "number" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "num_1", + "domain": "integer" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uplus/uplus-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uplus/uplus-spark.json new file mode 100644 index 000000000..69cf0da57 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/uplus/uplus-spark.json @@ -0,0 +1,51 @@ +{ + "datasets": [ + { + "name": "number_test1", + "source": "spark:csv:number_test1.csv", + "structure": "uplus_str" + }, + { + "name": "expected", + "structure": "uplus_str", + "source": "spark:csv:uplus.csv" + } + ], + "structures": [ + { + "name": "uplus_str", + "components": [ + { + "name": "id1", + "role": "Identifier" + }, + { + "name": "m1", + "role": "Measure" + }, + { + "name": "att1", + "role": "Attribute" + } + ] + } + ], + "variables": [ + { + "name": "m1", + "domain": "number" + }, + { + "name": "id1", + "domain": "string" + }, + { + "name": "att1", + "domain": "string" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_pop/var_pop-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_pop/var_pop-spark.json new file mode 100644 index 000000000..8bdfccb55 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_pop/var_pop-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "var_pop_str", + "source": "spark:csv:var_pop.csv" + } + ], + "structures": [ + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "var_pop_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_samp/var_samp-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_samp/var_samp-spark.json new file mode 100644 index 000000000..7dc9bffb6 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/var_samp/var_samp-spark.json @@ -0,0 +1,88 @@ +{ + "datasets": [ + { + "name": "median_data", + "source": "spark:csv:median_variance.csv", + "structure": "median_str" + }, + { + "name": "expected", + "structure": "var_samp_str", + "source": "spark:csv:var_samp.csv" + } + ], + "structures": [ + { + "name": "median_str", + "components": [ + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + }, + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + }, + { + "name": "var_samp_str", + "components": [ + { + "name": "me_2", + "role": "Measure" + }, + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_2", + "domain": "string" + }, + { + "name": "res2", + "domain": "boolean" + }, + { + "name": "id_3", + "domain": "date" + }, + { + "name": "res1", + "domain": "boolean" + }, + { + "name": "me_2", + "domain": "number" + }, + { + "name": "me_1", + "domain": "number" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_1/xor_1-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_1/xor_1-spark.json new file mode 100644 index 000000000..0bbd7bce9 --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_1/xor_1-spark.json @@ -0,0 +1,122 @@ +{ + "datasets": [ + { + "name": "boolean_test5", + "source": "spark:csv:boolean_test5.csv", + "structure": "boolean_test5_str" + }, + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "xor_1_str", + "source": "spark:csv:xor_1.csv" + } + ], + "structures": [ + { + "name": "xor_1_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test5_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "number" + }, + { + "name": "id_4", + "domain": "number" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_2/xor_2-spark.json b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_2/xor_2-spark.json new file mode 100644 index 000000000..8dab72eff --- /dev/null +++ b/vtl-bundles/vtl-coverage/src/test/resources/it/bancaditalia/oss/vtl/coverage/tests/xor_2/xor_2-spark.json @@ -0,0 +1,100 @@ +{ + "datasets": [ + { + "name": "boolean_test4", + "source": "spark:csv:boolean_test4.csv", + "structure": "boolean_test4_str" + }, + { + "name": "expected", + "structure": "xor_2_str", + "source": "spark:csv:xor_2.csv" + } + ], + "structures": [ + { + "name": "xor_2_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "xorf", + "role": "Measure" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + }, + { + "name": "boolean_test4_str", + "components": [ + { + "name": "id_2", + "role": "Identifier" + }, + { + "name": "id_4", + "role": "Identifier" + }, + { + "name": "me_1", + "role": "Measure" + }, + { + "name": "id_3", + "role": "Identifier" + }, + { + "name": "id_1", + "role": "Identifier" + } + ] + } + ], + "variables": [ + { + "name": "id_1", + "domain": "string" + }, + { + "name": "id_3", + "domain": "string" + }, + { + "name": "me_1", + "domain": "boolean" + }, + { + "name": "id_2", + "domain": "integer" + }, + { + "name": "id_4", + "domain": "integer" + }, + { + "name": "xorf", + "domain": "boolean" + }, + { + "name": "bool_var", + "domain": "boolean" + } + ] +} diff --git a/vtl-bundles/vtl-coverage/src/test/resources/log4j2.xml b/vtl-bundles/vtl-coverage/src/test/resources/log4j2.xml index cfee69254..95e7c39ed 100644 --- a/vtl-bundles/vtl-coverage/src/test/resources/log4j2.xml +++ b/vtl-bundles/vtl-coverage/src/test/resources/log4j2.xml @@ -43,8 +43,8 @@ - - + + diff --git a/vtl-bundles/vtl-jupyter/pom.xml b/vtl-bundles/vtl-jupyter/pom.xml index be2e9351f..a0e503ed4 100644 --- a/vtl-bundles/vtl-jupyter/pom.xml +++ b/vtl-bundles/vtl-jupyter/pom.xml @@ -109,9 +109,9 @@ it.bancaditalia.oss.vtl - vtl-parser - parser-js + vtl-jsparser zip + grammar diff --git a/vtl-bundles/vtl-jupyter/src/main/java/it/bancaditalia/oss/vtl/impl/jupyter/MessageReplies.java b/vtl-bundles/vtl-jupyter/src/main/java/it/bancaditalia/oss/vtl/impl/jupyter/MessageReplies.java index 4511f22a6..aa15763f2 100644 --- a/vtl-bundles/vtl-jupyter/src/main/java/it/bancaditalia/oss/vtl/impl/jupyter/MessageReplies.java +++ b/vtl-bundles/vtl-jupyter/src/main/java/it/bancaditalia/oss/vtl/impl/jupyter/MessageReplies.java @@ -188,7 +188,7 @@ private static void compile(MessageChannel iopub, JupyterMessage request, Jupyte VTLSession oldSession = SESSIONS.get(sessionName); VTLSession vtlSession = SESSIONS.compute(sessionName, (n, v) -> { String mergedCode = v == null ? code : v.getOriginalCode() + "\n\n" + code; - return ConfigurationManagerFactory.getInstance().createSession(mergedCode); + return ConfigurationManagerFactory.newManager().createSession(mergedCode); }); Map compiled = new HashMap<>(vtlSession.compile()); diff --git a/vtl-bundles/vtl-r/vtl-editor/pom.xml b/vtl-bundles/vtl-r/vtl-editor/pom.xml index 85fc56e71..475776a51 100644 --- a/vtl-bundles/vtl-r/vtl-editor/pom.xml +++ b/vtl-bundles/vtl-r/vtl-editor/pom.xml @@ -38,9 +38,9 @@ it.bancaditalia.oss.vtl - vtl-parser - parser-js + vtl-jsparser zip + grammar @@ -52,14 +52,9 @@ index.ts package.json - package-lock.json tsconfig.json test.html - - node_modules - Vtl* - @@ -68,6 +63,7 @@ maven-dependency-plugin + unpack-grammar generate-resources unpack-dependencies @@ -76,9 +72,7 @@ ${project.build.outputDirectory} - vtl-parser - parser-js - zip + vtl-jsparser diff --git a/vtl-envs/vtl-spark/pom.xml b/vtl-envs/vtl-spark/pom.xml index 279033743..bcdc821e0 100644 --- a/vtl-envs/vtl-spark/pom.xml +++ b/vtl-envs/vtl-spark/pom.xml @@ -53,7 +53,6 @@ org.apache.spark spark-sql_${scala.compat.version} provided - true org.slf4j diff --git a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/DataPointEncoder.java b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/DataPointEncoder.java index 1b5f7e2ac..ef35d2a62 100644 --- a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/DataPointEncoder.java +++ b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/DataPointEncoder.java @@ -45,11 +45,9 @@ import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; -import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureBuilder; import it.bancaditalia.oss.vtl.model.data.Component; import it.bancaditalia.oss.vtl.model.data.Component.NonIdentifier; import it.bancaditalia.oss.vtl.model.data.DataPoint; -import it.bancaditalia.oss.vtl.model.data.DataSetMetadata; import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; import it.bancaditalia.oss.vtl.model.data.Lineage; import it.bancaditalia.oss.vtl.model.data.ScalarValue; @@ -60,14 +58,12 @@ public class DataPointEncoder implements Serializable { private static final long serialVersionUID = 1L; public final DataStructureComponent[] components; - public final DataSetMetadata structure; public final StructType schema; public final Encoder rowEncoder; public final Encoder rowEncoderNoLineage; - public DataPointEncoder(Set> dataStructure) + public DataPointEncoder(Set> structure) { - structure = dataStructure instanceof DataSetMetadata ? (DataSetMetadata) dataStructure : new DataStructureBuilder(dataStructure).build(); components = structure.toArray(new DataStructureComponent[structure.size()]); Arrays.sort(components, DataStructureComponent::byNameAndRole); List fields = new ArrayList<>(createStructFromComponents(components)); @@ -103,7 +99,7 @@ public DataPointImpl decode(Row row, int start) { ScalarValue[] vals = new ScalarValue[components.length]; for (int i = 0; i < components.length; i++) - vals[i] = getScalarFor(components[i], row.get(i + start)); + vals[i] = getScalarFor(components[i], (Serializable) row.get(i + start)); Object lineage = row.get(components.length + start); if (lineage instanceof byte[]) @@ -131,11 +127,6 @@ public Encoder getRowEncoderNoLineage() { return rowEncoderNoLineage; } - - public DataSetMetadata getStructure() - { - return structure; - } public static class DataPointImpl extends AbstractMap, ScalarValue> implements DataPoint, Serializable { diff --git a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkDataSet.java b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkDataSet.java index aa01cbc03..86a316a41 100644 --- a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkDataSet.java +++ b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkDataSet.java @@ -29,13 +29,18 @@ import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkUtils.getNamesFromComponents; import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkUtils.getScalarFor; import static it.bancaditalia.oss.vtl.impl.types.dataset.DataPointBuilder.toDataPoint; +import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.NULLDS; import static it.bancaditalia.oss.vtl.model.transform.analytic.LimitCriterion.LimitDirection.PRECEDING; import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.ASC; import static it.bancaditalia.oss.vtl.model.transform.analytic.WindowCriterion.LimitType.RANGE; import static it.bancaditalia.oss.vtl.util.SerCollectors.collectingAndThen; +import static it.bancaditalia.oss.vtl.util.SerCollectors.mapping; import static it.bancaditalia.oss.vtl.util.SerCollectors.toArray; +import static it.bancaditalia.oss.vtl.util.SerCollectors.toConcurrentMap; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; +import static it.bancaditalia.oss.vtl.util.SerUnaryOperator.identity; +import static java.lang.Boolean.TRUE; import static java.util.Collections.emptyMap; import static java.util.Spliterator.ORDERED; import static java.util.Spliterators.spliteratorUnknownSize; @@ -48,7 +53,6 @@ import static org.apache.spark.sql.functions.udaf; import static org.apache.spark.sql.functions.udf; import static org.apache.spark.sql.types.DataTypes.createArrayType; -import static scala.collection.JavaConverters.asJava; import static scala.collection.JavaConverters.asScala; import java.io.Serializable; @@ -56,13 +60,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.stream.IntStream; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -88,24 +96,22 @@ import org.apache.spark.sql.expressions.Window; import org.apache.spark.sql.expressions.Window$; import org.apache.spark.sql.expressions.WindowSpec; +import org.apache.spark.sql.types.ArrayType; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.io.Input; - import it.bancaditalia.oss.vtl.exceptions.VTLInvariantIdentifiersException; import it.bancaditalia.oss.vtl.impl.environment.spark.DataPointEncoder.DataPointImpl; +import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.dataset.AbstractDataSet; import it.bancaditalia.oss.vtl.impl.types.dataset.DataPointBuilder; import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureBuilder; import it.bancaditalia.oss.vtl.impl.types.lineage.LineageExternal; import it.bancaditalia.oss.vtl.impl.types.lineage.LineageNode; import it.bancaditalia.oss.vtl.model.data.Component.Identifier; -import it.bancaditalia.oss.vtl.model.data.Component.NonIdentifier; import it.bancaditalia.oss.vtl.model.data.DataPoint; import it.bancaditalia.oss.vtl.model.data.DataSet; import it.bancaditalia.oss.vtl.model.data.DataSetMetadata; @@ -113,6 +119,7 @@ import it.bancaditalia.oss.vtl.model.data.Lineage; import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.data.Variable; +import it.bancaditalia.oss.vtl.model.domain.ValueDomainSubset; import it.bancaditalia.oss.vtl.model.transform.analytic.LimitCriterion; import it.bancaditalia.oss.vtl.model.transform.analytic.WindowClause; import it.bancaditalia.oss.vtl.util.SerBiFunction; @@ -123,8 +130,6 @@ import it.bancaditalia.oss.vtl.util.SerPredicate; import it.bancaditalia.oss.vtl.util.SerUnaryOperator; import it.bancaditalia.oss.vtl.util.Utils; -import scala.collection.immutable.Seq; -import scala.collection.immutable.SeqOps; public class SparkDataSet extends AbstractDataSet { @@ -135,11 +140,11 @@ public class SparkDataSet extends AbstractDataSet private final DataPointEncoder encoder; private final Dataset dataFrame; - public SparkDataSet(SparkSession session, DataPointEncoder encoder, Dataset dataFrame) + public SparkDataSet(SparkSession session, DataSetMetadata structure, DataPointEncoder encoder, Dataset dataFrame) { - super(encoder.getStructure()); + super(structure); - for (DataStructureComponent component: encoder.getStructure()) + for (DataStructureComponent component: encoder.components) { String name = component.getVariable().getName(); dataFrame = dataFrame.withColumn(name, dataFrame.col(name), getMetadataFor(component)); @@ -153,9 +158,9 @@ public SparkDataSet(SparkSession session, DataPointEncoder encoder, Dataset logInfo(toString()); } - public SparkDataSet(SparkSession session, DataSetMetadata dataStructure, Dataset dataFrame) + public SparkDataSet(SparkSession session, DataSetMetadata structure, Dataset dataFrame) { - this(session, new DataPointEncoder(dataStructure), dataFrame); + this(session, structure, new DataPointEncoder(structure), dataFrame); } public SparkDataSet(SparkSession session, DataSetMetadata dataStructure, DataSet toWrap) @@ -187,14 +192,16 @@ public Dataset getDataFrame() @Override public DataSet membership(String alias) { - final DataSetMetadata membershipStructure = getMetadata().membership(alias); + DataSetMetadata membershipStructure = getMetadata().membership(alias); LOGGER.debug("Creating dataset by membership on {} from {} to {}", alias, getMetadata(), membershipStructure); - DataStructureComponent membershipMeasure = membershipStructure.getMeasures().iterator().next(); - Dataset newDF = dataFrame; - if (!getMetadata().contains(membershipMeasure)) - newDF = dataFrame.withColumnRenamed(alias, membershipMeasure.getVariable().getName()); + Optional> idMembership = getMetadata().getComponent(alias).filter(d -> d.is(Identifier.class)); + if (idMembership.isPresent()) + { + String defaultName = idMembership.get().getVariable().getDomain().getDefaultVariable().getName(); + newDF = dataFrame.withColumn(defaultName, dataFrame.col(alias)); + } Column[] columns = getColumnsFromComponents(membershipStructure).toArray(new Column[membershipStructure.size()]); @@ -247,7 +254,7 @@ public DataSet subspace(Map LineageNode.of("sub " + keyValues, (Lineage) dpLin), LineageSparkUDT) .apply(newDf.col("$lineage$"))); - return new SparkDataSet(session, newEncoder, newDf); + return new SparkDataSet(session, newMetadata, newEncoder, newDf); } @Override @@ -340,7 +347,7 @@ public DataSet flatmapKeepingKeys(DataSetMetadata metadata, SerFunction leftDataframe = dataFrame.as("a"); Dataset rightDataframe = sparkOther.dataFrame.as("b"); - Column joinKeys = null; - for (String commonID: commonIDs) - { - Column newCond = leftDataframe.col(commonID).equalTo(rightDataframe.col(commonID)); - joinKeys = joinKeys == null ? newCond : joinKeys.and(newCond); - } - int leftSize = getMetadata().size(); // TODO: Try to execute filter with a null datapoint? @@ -371,6 +371,13 @@ public DataSet filteredMappedJoin(DataSetMetadata metadata, DataSet other, SerBi return predicate.test(encoder.decode(row), right); }; + Column joinKeys = null; + for (String commonID: commonIDs) + { + Column newCond = leftDataframe.col(commonID).equalTo(rightDataframe.col(commonID)); + joinKeys = joinKeys == null ? newCond : joinKeys.and(newCond); + } + DataPointEncoder newEncoder = new DataPointEncoder(metadata); MapPartitionsFunction sparkMerge = iterator -> new Iterator() { @Override @@ -388,19 +395,23 @@ public Row next() } }; - Dataset joined = leftDataframe.join(rightDataframe, joinKeys, leftJoin ? "left" : "inner"); + Dataset joined; + if (commonIDs.isEmpty()) + joined = leftDataframe.crossJoin(rightDataframe); + else + joined = leftDataframe.join(rightDataframe, joinKeys, leftJoin ? "left" : "inner"); + if (!predicate.equals(DataSet.ALL)) joined = joined.filter(sparkFilter); return new SparkDataSet(session, metadata, joined.mapPartitions(sparkMerge, newEncoder.getRowEncoder())); } + @Override - public DataSet analytic(SerFunction lineageOp, - Map, ? extends DataStructureComponent> components, - WindowClause clause, - Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers) + public DataSet analytic(SerFunction lineageOp, DataStructureComponent sourceComp, + DataStructureComponent destComp, WindowClause clause, SerFunction extractor, + SerCollector collector2, SerBiFunction>> finisher) { // Convert a VTL window clause to a Spark Window Specification WindowSpec windowSpec = null; @@ -432,90 +443,102 @@ public DataSet analytic(SerFunction lineageOp, windowSpec = windowSpec == null ? Window.rangeBetween(inf, sup) : windowSpec.rangeBetween(inf, sup); else windowSpec = windowSpec == null ? Window.rowsBetween(inf, sup) : windowSpec.rowsBetween(inf, sup); - WindowSpec finalWindowSpec = windowSpec == null ? Window$.MODULE$.spec() : windowSpec; + + SerCollector collector; + if (extractor == null) + collector = (SerCollector) collector2; + else + collector = mapping(extractor.compose(encoder::decode), collector2); + + Serializable accum = (Serializable) collector.supplier().get(); + @SuppressWarnings("unchecked") + Serializable tag = ((SerFunction) collector.finisher()).apply(accum); - // Sort by dest component + DataSetMetadata structure = getMetadata(); + ValueDomainSubset domain = sourceComp.getVariable().getDomain(); + Encoder accEncoder = SparkUtils.getEncoderFor(accum, domain, structure); @SuppressWarnings("unchecked") - Entry, DataStructureComponent>[] compArray = (Entry, DataStructureComponent>[]) components.entrySet().toArray(new Entry[components.size()]); - Arrays.sort(compArray, (e1, e2) -> DataStructureComponent.byNameAndRole(e1.getValue(), e2.getValue())); + Encoder ttEncoder = (Encoder) SparkUtils.getEncoderFor(tag, domain, structure); + + Column[] columns = new Column[getMetadata().size() + 1]; + int l = 0; + for (String col: dataFrame.columns()) + columns[l++] = dataFrame.col(col); - // Create the udafs to generate each dest component - Map destComponents = new HashMap<>(); - for (int i = 0; i < compArray.length; i++) + Column udaf, input; + UDF2 udf; + if (extractor == null) // then T is ScalarValue { - DataStructureComponent oldC = (DataStructureComponent) compArray[i].getKey(); - - @SuppressWarnings("unchecked") - // Safe as all scalars are Serializable - Encoder measureEncoder = (Encoder) getEncoderFor(oldC); - Column column = dataFrame.col(oldC.getVariable().getName()); - Column udaf = udaf(new VTLSparkAggregator<>(oldC, compArray[i].getValue(), collectors.get(oldC), session), measureEncoder) - .apply(column) + Encoder inputEncoder = (Encoder) getEncoderFor(NullValue.instance(NULLDS), domain, structure); + udaf = udaf(new VTLSparkAggregator<>(collector2, accEncoder, ttEncoder), inputEncoder) + .apply(dataFrame.col(sourceComp.getVariable().getName())) .over(finalWindowSpec); - - // Apply the finisher (as an udf) to the result of the window function; the result will be exploded - Column udf = udf(udfForComponent(oldC, finishers.get(oldC)), createArrayType(getDataTypeFor(oldC))) - .apply(udaf, column); - - destComponents.put(compArray[i].getValue().getVariable().getName(), udf); - }; - - // apply all the udfs - Dataset withColumns = dataFrame.withColumns(destComponents); + udf = nonExtractingUDF(sourceComp, finisher); + input = dataFrame.col(sourceComp.getVariable().getName()); + } + else + { + udaf = udaf(new VTLSparkAggregator<>(collector, accEncoder, ttEncoder), encoder.rowEncoder) + .apply(columns) + .over(finalWindowSpec); + udf = extractorUDF(sourceComp, extractor, finisher); + input = struct(columns); + } - // explode each column that is the result of the analytic invocation - for (String name: destComponents.keySet()) - withColumns = withColumns.withColumn(name, explode(withColumns.col(name))); - Dataset exploded = withColumns; + String destName = destComp.getVariable().getName(); + ArrayType arrayType = createArrayType(getDataTypeFor(sourceComp)); + Dataset analyticResult = dataFrame.withColumn(destName, udf(udf, arrayType).apply(udaf, input)); + Dataset exploded = analyticResult.withColumn(destName, explode(analyticResult.col(destName))); // Structure of the result DataSetMetadata newStructure = new DataStructureBuilder(getMetadata()) - .removeComponents(components.keySet()) - .addComponents(components.values()) + .removeComponent(sourceComp) + .addComponent(destComp) .build(); DataPointEncoder resultEncoder = new DataPointEncoder(newStructure); Column[] cols = getColumnsFromComponents(newStructure).toArray(new Column[newStructure.size() + 1]); cols[cols.length - 1] = exploded.col("$lineage$"); - return new SparkDataSet(session, resultEncoder, exploded.select(cols)); + return new SparkDataSet(session, newStructure, resultEncoder, exploded.select(cols)); } - private UDF2 udfForComponent(DataStructureComponent comp, - SerBiFunction, Collection>> finisher) + private UDF2 extractorUDF(DataStructureComponent comp, SerFunction extractor, SerBiFunction>> finisher) { - return (newV, oldV) -> { - // The result should be TT, but it must be constructed back if it isn't - // ClassCastException may happen if the result type is not supported by spark - Object result; - if (newV instanceof byte[]) - { - // This is the decoder for Encoders.kryo(ArrayList.class) used in VTLSparkAggregator - final Kryo kryo = new Kryo(); - Serializable[] source = (Serializable[]) kryo.readClassAndObject(new Input((byte[]) newV)); - result = Arrays.stream(source) - .map(v -> v instanceof byte[] ? kryo.readClassAndObject(new Input((byte[]) v)) : v) - .map(v -> getScalarFor(comp, v)) - .collect(toList()); - } - else if (newV instanceof Seq) - { - @SuppressWarnings("unchecked") - SeqOps, Seq> ravV = (SeqOps, Seq>) newV; - result = asJava(ravV.map(serialized -> getScalarFor(comp, serialized))); - } + return (serAcc, input) -> { + serAcc = SparkUtils.reinterpret(comp, serAcc); + + Collection> finished; + if (finisher != null) + finished = finisher.apply((TT) serAcc, extractor.apply(encoder.decode(input))); else - result = getScalarFor(comp, newV); + finished = List.of((ScalarValue) serAcc); - Collection> finished = finisher.apply((TT) result, getScalarFor(comp, oldV)); - return finished.stream() .map(ScalarValue::get) - .collect(toArray(new Object[finished.size()])); + .collect(toArray(new Serializable[finished.size()])); }; } - + + private UDF2 nonExtractingUDF(DataStructureComponent comp, SerBiFunction>> finisher) + { + return (serAcc, input) -> { + serAcc = SparkUtils.reinterpret(comp, serAcc); + input = SparkUtils.reinterpret(comp, input); + + Collection> finished; + if (finisher != null) + finished = finisher.apply((TT) serAcc, (T) input); + else + finished = List.of((ScalarValue) serAcc); + + return finished.stream() + .map(ScalarValue::get) + .collect(toArray(new Serializable[finished.size()])); + }; + } + @Override public , ScalarValue>> DataSet aggregate(DataSetMetadata structure, Set> keys, SerCollector groupCollector, @@ -538,10 +561,9 @@ else if (newV instanceof Seq) else { DataPointEncoder keyEncoder = new DataPointEncoder(keys); - Column[] keyNames = keys.stream() - .map(DataStructureComponent::getVariable).map(Variable::getName) - .map(dataFrame::col) - .collect(toArray(new Column[keys.size()])); + Column[] keyNames = new Column[keys.size()]; + for (int i = 0; i < keyNames.length; i++) + keyNames[i] = dataFrame.col(keyEncoder.components[i].getVariable().getName()); MapGroupsFunction aggregator = (keyRow, s) -> { Map, ScalarValue> keyValues = keys.stream() @@ -583,37 +605,78 @@ public Stream streamByKeys(Set sample = dataFrame.limit(1).collectAsList().stream().map(encoder::decode).collect(toList()); T sampleResult = finisher.apply(sample.stream().collect(groupCollector), sample.get(0).getValues(keys, Identifier.class)); - + if (sampleResult instanceof List && !((List) sampleResult).isEmpty() && (((List) sampleResult).get(0) instanceof DataPoint)) + return processAsList(groupCollector, finisher, sortedKeys, groupingCols, keyEncoder); + else if (sampleResult instanceof SortedMap && !((SortedMap) sampleResult).isEmpty() && + ((SortedMap) sampleResult).firstKey() instanceof DataPoint) { - LOGGER.warn("An unsupported transformation will move data into the driver. OutOfMemoryError may occur."); - - // case: supports decoding into a List for fill_time_series - List> resultComponents = getMetadata().stream() - .sorted(DataStructureComponent::byNameAndRole) - .collect(toList()); - - // Use kryo encoder hoping that the class has been registered beforehand - Encoder resultEncoder = Encoders.kryo(Serializable[][].class); - - Dataset result = dataFrame.groupBy(groupingCols).as(keyEncoder, encoder.getRowEncoder()) - .mapGroups(groupMapper(groupCollector, finisher, sortedKeys, encoder), resultEncoder); - - return StreamSupport.stream(spliteratorUnknownSize(result.toLocalIterator(), 0), !Utils.SEQUENTIAL) - // decode Row[] from the UDF into List - .map(group -> Arrays.stream(group) + @SuppressWarnings("unchecked") + Comparator comparator = (Comparator) ((SortedMap) sampleResult).comparator(); + return processAsBoolMap(groupCollector, finisher, sortedKeys, groupingCols, keyEncoder, comparator); + } + else + // Other cases not supported + throw new UnsupportedOperationException(sampleResult.getClass().getSimpleName() + " not supported in Spark datasets"); + } + + private Stream processAsList(SerCollector groupCollector, + SerBiFunction, ScalarValue>, T> finisher, + DataStructureComponent[] sortedKeys, Column[] groupingCols, Encoder keyEncoder) + { + LOGGER.warn("An unsupported transformation will move data into the driver. OutOfMemoryError may occur."); + + // case: supports decoding into a List for fill_time_series + List> resultComponents = getMetadata().stream() + .sorted(DataStructureComponent::byNameAndRole) + .collect(toList()); + + // Use kryo encoder hoping that the class has been registered beforehand + Encoder resultEncoder = Encoders.kryo(Serializable[][].class); + + Dataset result = dataFrame.groupBy(groupingCols).as(keyEncoder, encoder.getRowEncoder()) + .mapGroups(groupToListMapper(groupCollector, finisher, sortedKeys, encoder), resultEncoder); + + return StreamSupport.stream(spliteratorUnknownSize(result.toLocalIterator(), 0), !Utils.SEQUENTIAL) + // decode Row[] from the UDF into List + .map(group -> Arrays.stream(group) + .map(array -> IntStream.range(0, array.length - 1) + .mapToObj(i -> new SimpleEntry<>(resultComponents.get(i), getScalarFor(resultComponents.get(i), array[i]))) + .collect(toDataPoint((Lineage) array[array.length - 1], getMetadata()))) + .collect(toList())) + .map(out -> (T) out); + } + + private Stream processAsBoolMap(SerCollector groupCollector, + SerBiFunction, ScalarValue>, T> finisher, + DataStructureComponent[] sortedKeys, Column[] groupingCols, Encoder keyEncoder, + Comparator comparator) + { + LOGGER.warn("An unsupported transformation will move data into the driver. OutOfMemoryError may occur."); + + // case: supports decoding into a List for fill_time_series + List> resultComponents = getMetadata().stream() + .sorted(DataStructureComponent::byNameAndRole) + .collect(toList()); + + // Use kryo encoder hoping that the class has been registered beforehand + Encoder resultEncoder = Encoders.kryo(Serializable[][].class); + + Dataset result = dataFrame.groupBy(groupingCols).as(keyEncoder, encoder.getRowEncoder()) + .mapGroups(groupToSkipListMapper(groupCollector, finisher, sortedKeys, encoder), resultEncoder); + + // Rebuild a stream of ConcurrentSkipListMap + return StreamSupport.stream(spliteratorUnknownSize(result.toLocalIterator(), 0), !Utils.SEQUENTIAL) + // decode Row[] from the UDF into ConcurrentSkipListMap + .map(group -> Arrays.stream(group) .map(array -> IntStream.range(0, array.length - 1) .mapToObj(i -> new SimpleEntry<>(resultComponents.get(i), getScalarFor(resultComponents.get(i), array[i]))) .collect(toDataPoint((Lineage) array[array.length - 1], getMetadata()))) - .collect(toList())) + .collect(toConcurrentMap(identity(), k -> TRUE, (a, b) -> a, () -> new ConcurrentSkipListMap<>(comparator)))) .map(out -> (T) out); - } - else - // Other cases not supported - throw new UnsupportedOperationException(sampleResult.getClass().getName() + " not supported in Spark datasets"); } - + @Override public DataSet union(SerFunction lineageOp, List others) { @@ -647,10 +710,10 @@ public DataSet union(SerFunction lineageOp, List ot .select(cols) .withColumn("$lineage$", lineage); - return new SparkDataSet(session, encoder, result); + return new SparkDataSet(session, getMetadata(), encoder, result); } - private static MapGroupsFunction groupMapper(SerCollector groupCollector, + private static MapGroupsFunction groupToListMapper(SerCollector groupCollector, SerBiFunction, ScalarValue>, T> finisher, DataStructureComponent[] sortedKeys, DataPointEncoder encoder) { @@ -661,13 +724,48 @@ private static MapGroupsFunction groupMap Map, ScalarValue> keyMap = new HashMap<>(); for (int i = 0; i < keyRow.size(); i++) - keyMap.put(sortedKeys[i], getScalarFor(sortedKeys[i], keyRow.get(i))); + keyMap.put(sortedKeys[i], getScalarFor(sortedKeys[i], (Serializable) keyRow.get(i))); // Each group is mapped to an array of rows where each row is an array of values Serializable[][] array = ((Collection) finisher.apply(before, keyMap)).stream() .map(DataPoint.class::cast) .map(encoder::encode) - .collect(collectingAndThen(toList(), l -> l.toArray(new Serializable[l.size()][]))); + .map(row -> { + int size = row.length(); + Serializable[] arrayRow = new Serializable[size]; + for (int i = 0; i < size; i++) + arrayRow[i] = (Serializable) row.get(i); + return arrayRow; + }).collect(collectingAndThen(toList(), l -> l.toArray(new Serializable[l.size()][]))); + + return array; + }; + } + + private static MapGroupsFunction groupToSkipListMapper(SerCollector groupCollector, + SerBiFunction, ScalarValue>, T> finisher, + DataStructureComponent[] sortedKeys, DataPointEncoder encoder) + { + return (keyRow, values) -> { + TT before = StreamSupport.stream(spliteratorUnknownSize(values, ORDERED), !Utils.SEQUENTIAL) + .map(encoder::decode) + .collect(groupCollector); + + Map, ScalarValue> keyMap = new HashMap<>(); + for (int i = 0; i < keyRow.size(); i++) + keyMap.put(sortedKeys[i], getScalarFor(sortedKeys[i], (Serializable) keyRow.get(i))); + + // Each group is mapped to an array of rows where each row is an array of values + Serializable[][] array = ((ConcurrentSkipListMap) finisher.apply(before, keyMap)).keySet().stream() + .map(DataPoint.class::cast) + .map(encoder::encode) + .map(row -> { + int size = row.length(); + Serializable[] arrayRow = new Serializable[size]; + for (int i = 0; i < size; i++) + arrayRow[i] = (Serializable) row.get(i); + return arrayRow; + }).collect(collectingAndThen(toList(), l -> l.toArray(new Serializable[l.size()][]))); return array; }; diff --git a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkEnvironment.java b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkEnvironment.java index 601e4d02e..06b034557 100644 --- a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkEnvironment.java +++ b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkEnvironment.java @@ -34,8 +34,13 @@ import static org.apache.spark.sql.functions.lit; import static org.apache.spark.sql.functions.to_date; import static org.apache.spark.sql.functions.udf; +import static org.apache.spark.sql.types.DataTypes.BooleanType; +import static org.apache.spark.sql.types.DataTypes.DoubleType; import static org.apache.spark.sql.types.DataTypes.LongType; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.security.InvalidParameterException; import java.util.AbstractMap.SimpleEntry; import java.util.Arrays; import java.util.EnumSet; @@ -82,6 +87,11 @@ import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; import it.bancaditalia.oss.vtl.model.data.Lineage; import it.bancaditalia.oss.vtl.model.data.VTLValue; +import it.bancaditalia.oss.vtl.model.domain.BooleanDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.DateDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.IntegerDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.NumberDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.ValueDomainSubset; import it.bancaditalia.oss.vtl.session.MetadataRepository; public class SparkEnvironment implements Environment @@ -96,12 +106,14 @@ public class SparkEnvironment implements Environment new VTLPropertyImpl("vtl.spark.ui.port", "Indicates which port the Spark web UI should be listening to", "4040", EnumSet.of(REQUIRED), "4040"); public static final VTLProperty VTL_SPARK_PAGE_SIZE = new VTLPropertyImpl("vtl.spark.page.size", "Indicates the buffer size when retrieving datapoints from Spark", "1000", EnumSet.of(REQUIRED), "1000"); + public static final VTLProperty VTL_SPARK_SEARCH_PATH = + new VTLPropertyImpl("vtl.spark.search.path", "Path to search for spark files", System.getenv("VTL_PATH"), EnumSet.of(REQUIRED), System.getenv("VTL_PATH")); /* package */ static final LineageSparkUDT LineageSparkUDT = new LineageSparkUDT(); static { - registerSupportedProperties(SparkEnvironment.class, VTL_SPARK_MASTER_CONNECTION, VTL_SPARK_UI_ENABLED, VTL_SPARK_UI_PORT, VTL_SPARK_PAGE_SIZE); + registerSupportedProperties(SparkEnvironment.class, VTL_SPARK_MASTER_CONNECTION, VTL_SPARK_UI_ENABLED, VTL_SPARK_UI_PORT, VTL_SPARK_PAGE_SIZE, VTL_SPARK_SEARCH_PATH); if (!UDTRegistration.exists(Lineage.class.getName())) { List> lClasses = List.of(LineageExternal.class, LineageCall.class, LineageNode.class, LineageImpl.class, LineageSet.class, Lineage.class); @@ -115,6 +127,7 @@ public static class VTLKryoRegistrator implements KryoRegistrator @Override public void registerClasses(Kryo kryo) { + com.esotericsoftware.minlog.Log.DEBUG(); LineageSerializer lineageSerializer = new LineageSerializer(); kryo.register(LineageExternal.class, lineageSerializer); kryo.register(LineageCall.class, lineageSerializer); @@ -128,6 +141,7 @@ public void registerClasses(Kryo kryo) private final SparkSession session; private final Map frames = new ConcurrentHashMap<>(); private final DataFrameReader reader; + private final List paths; public SparkEnvironment() { @@ -142,7 +156,11 @@ public SparkEnvironment() .set("spark.sql.catalyst.dateType", "Instant") .set("spark.executor.instances", "4") .set("spark.executor.cores", "2") -// .set("spark.sql.codegen.wholeStage", "false") + /* enable for DEBUG + .set("spark.sql.codegen.wholeStage", "false") + .set("spark.sql.codegen", "false") + .set("spark.sql.codegen.factoryMode", "NO_CODEGEN") + //*/ .set("spark.sql.windowExec.buffer.in.memory.threshold", "16384") .set("spark.sql.caseSensitive", "true") .set("spark.executor.extraClassPath", System.getProperty("java.class.path")) @@ -151,6 +169,7 @@ public SparkEnvironment() // Set SEQUENTIAL to avoid creating new threads while inside the executor System.setProperty("vtl.sequential", "true"); + paths = VTL_SPARK_SEARCH_PATH.getValues(); session = SparkSession .builder() @@ -163,6 +182,7 @@ public SparkEnvironment() public SparkEnvironment(SparkContext sc) { + paths = VTL_SPARK_SEARCH_PATH.getValues(); session = SparkSession .builder() .config(sc.getConf()) @@ -174,7 +194,6 @@ public SparkEnvironment(SparkContext sc) @Override public boolean contains(String name) { - name = name.matches("'.*'") ? name.replaceAll("'(.*)'", "$1") : name.toLowerCase(); if (!name.startsWith("spark:")) return false; name = name.substring(6); @@ -190,25 +209,41 @@ public boolean contains(String name) } @Override - public Optional getValue(MetadataRepository repo, String name) + public Optional getValue(MetadataRepository repo, String alias) { - if (!contains(name)) + if (frames.containsKey(alias)) + return Optional.of(frames.get(alias)); + + String source = repo.getDatasetSource(alias); + if (!contains(source)) return Optional.empty(); - + SparkDataSet dataset = null; - String[] parts = name.split(":"); + String[] parts = source.substring(6).split(":"); + if (parts.length != 2) + throw new InvalidParameterException("Invalid source format: " + source); + + String file = paths.stream() + .map(path -> Paths.get(path, parts[1])) + .filter(Files::exists) + .limit(1) + .peek(path -> LOGGER.info("Found {} in {}", parts[1], path)) + .findAny() + .orElseThrow(() -> new InvalidParameterException("Cannot find " + parts[0] + " file in Spark search path: " + parts[1])) + .toString(); + switch (parts[0]) { case "csv": - dataset = inferSchema(repo, reader.format("csv").option("header", "true").load(parts[1]), name); + dataset = inferSchema(repo, reader.format("csv").option("header", "true").load(file), alias); break; default: - dataset = inferSchema(repo, reader.format(parts[0]).load(parts[1]), name); + dataset = inferSchema(repo, reader.format(parts[0]).load(file), source); break; } - frames.put(name, dataset); - return Optional.of(frames.get(name)); + frames.put(alias, dataset); + return Optional.of(frames.get(alias)); } @Override @@ -258,15 +293,37 @@ public boolean store(VTLValue value, String alias) private SparkDataSet inferSchema(MetadataRepository repo, Dataset sourceDataFrame, String alias) { + Column lineage = new Column(Literal.create(LineageSparkUDT.serialize(LineageExternal.of("spark:" + alias)), LineageSparkUDT)); StructType schema = sourceDataFrame.schema(); - if (schema.forall(field -> !(field.dataType() instanceof StructType || field.dataType() instanceof ArrayType) && field.metadata().contains("Role"))) + if (repo.getStructure(alias) != null) + { + DataSetMetadata structure = repo.getStructure(alias); + Column[] names = getColumnsFromComponents(structure).toArray(new Column[structure.size()]); + + Dataset sourceDataFrame2 = sourceDataFrame; + for (DataStructureComponent comp: structure) + { + ValueDomainSubset domain = comp.getVariable().getDomain(); + String name = comp.getVariable().getName(); + if (domain instanceof IntegerDomainSubset) + sourceDataFrame2 = sourceDataFrame2.withColumn(name, sourceDataFrame2.col(name).cast(LongType)); + else if (domain instanceof NumberDomainSubset) + sourceDataFrame2 = sourceDataFrame2.withColumn(name, sourceDataFrame2.col(name).cast(DoubleType)); + else if (domain instanceof BooleanDomainSubset) + sourceDataFrame2 = sourceDataFrame2.withColumn(name, sourceDataFrame2.col(name).cast(BooleanType)); + else if (domain instanceof DateDomainSubset) + sourceDataFrame2 = sourceDataFrame2.withColumn(name, to_date(sourceDataFrame2.col(name))); + } + + return new SparkDataSet(session, structure, new DataPointEncoder(structure), sourceDataFrame2.select(names).withColumn("$lineage$", lineage)); + } + else if (schema.forall(field -> !(field.dataType() instanceof StructType || field.dataType() instanceof ArrayType) && field.metadata().contains("Role"))) { // infer structure from the schema metadata DataSetMetadata structure = new DataStructureBuilder().addComponents(getComponentsFromStruct(repo, schema)).build(); Column[] names = getColumnsFromComponents(structure).toArray(new Column[structure.size()]); - Column lineage = new Column(Literal.create(LineageSparkUDT.serialize(LineageExternal.of("spark:" + alias)), LineageSparkUDT)); - return new SparkDataSet(session, new DataPointEncoder(structure), sourceDataFrame.select(names).withColumn("$lineage$", lineage)); + return new SparkDataSet(session, structure, new DataPointEncoder(structure), sourceDataFrame.select(names).withColumn("$lineage$", lineage)); } else if (!schema.forall(field -> field.dataType() instanceof StringType)) { @@ -282,9 +339,8 @@ else if (field.dataType() instanceof IntegerType) DataSetMetadata structure = new DataStructureBuilder().addComponents(getComponentsFromStruct(repo, sourceDataFrame2.schema())).build(); Column[] names = getColumnsFromComponents(structure).toArray(new Column[structure.size()]); - Column lineage = new Column(Literal.create(LineageSparkUDT.serialize(LineageExternal.of("spark:" + alias)), LineageSparkUDT)); Dataset enriched = sourceDataFrame2.select(names).withColumn("$lineage$", lineage); - return new SparkDataSet(session, new DataPointEncoder(structure), enriched); + return new SparkDataSet(session, structure, new DataPointEncoder(structure), enriched); } else { diff --git a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkUtils.java b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkUtils.java index 163883f71..9b162c5e1 100644 --- a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkUtils.java +++ b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/SparkUtils.java @@ -20,6 +20,7 @@ package it.bancaditalia.oss.vtl.impl.environment.spark; import static it.bancaditalia.oss.vtl.config.VTLGeneralProperties.isUseBigDecimal; +import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkEnvironment.LineageSparkUDT; import static it.bancaditalia.oss.vtl.impl.types.data.NumberValueImpl.createNumberValue; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.BOOLEANDS; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.DATEDS; @@ -28,12 +29,7 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.STRINGDS; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.TIMEDS; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; -import static java.util.Objects.requireNonNull; -import static org.apache.spark.sql.Encoders.BOOLEAN; -import static org.apache.spark.sql.Encoders.DOUBLE; -import static org.apache.spark.sql.Encoders.LOCALDATE; -import static org.apache.spark.sql.Encoders.LONG; -import static org.apache.spark.sql.Encoders.STRING; +import static org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.STRICT_LOCAL_DATE_ENCODER; import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.types.DataTypes.BooleanType; import static org.apache.spark.sql.types.DataTypes.DateType; @@ -41,12 +37,16 @@ import static org.apache.spark.sql.types.DataTypes.LongType; import static org.apache.spark.sql.types.DataTypes.StringType; import static org.apache.spark.sql.types.DataTypes.createDecimalType; +import static scala.collection.JavaConverters.asJava; +import static scala.jdk.javaapi.CollectionConverters.asScala; import java.io.Serializable; import java.sql.Date; import java.time.LocalDate; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -56,15 +56,26 @@ import org.apache.spark.sql.Column; import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.catalyst.JavaTypeInference; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.EncoderField; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.IterableEncoder; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.MapEncoder; +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.ProductEncoder; +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; import org.apache.spark.sql.types.DataType; import org.apache.spark.sql.types.Metadata; import org.apache.spark.sql.types.MetadataBuilder; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; +import it.bancaditalia.oss.vtl.impl.types.data.BaseScalarValue; import it.bancaditalia.oss.vtl.impl.types.data.BooleanValue; import it.bancaditalia.oss.vtl.impl.types.data.DateValue; import it.bancaditalia.oss.vtl.impl.types.data.IntegerValue; +import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.data.StringValue; import it.bancaditalia.oss.vtl.model.data.Component; import it.bancaditalia.oss.vtl.model.data.Component.Attribute; @@ -72,36 +83,41 @@ import it.bancaditalia.oss.vtl.model.data.Component.Measure; import it.bancaditalia.oss.vtl.model.data.Component.NonIdentifier; import it.bancaditalia.oss.vtl.model.data.Component.ViralAttribute; +import it.bancaditalia.oss.vtl.model.data.DataSetMetadata; import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.domain.StringDomain; import it.bancaditalia.oss.vtl.model.domain.ValueDomainSubset; import it.bancaditalia.oss.vtl.session.MetadataRepository; +import it.bancaditalia.oss.vtl.util.GenericTuple; +import it.bancaditalia.oss.vtl.util.SerDoubleSumAvgCount; import it.bancaditalia.oss.vtl.util.SerFunction; +import scala.Option; +import scala.Tuple2; +import scala.collection.JavaConverters; +import scala.collection.immutable.Seq; +import scala.reflect.ClassTag; public class SparkUtils { - private static final Map, Encoder> DOMAIN_ENCODERS = new ConcurrentHashMap<>(); + private static final Map, AgnosticEncoder> DOMAIN_ENCODERS = new ConcurrentHashMap<>(); private static final Map, DataType> DOMAIN_DATATYPES = new ConcurrentHashMap<>(); - private static final Map, SerFunction>> DOMAIN_BUILDERS = new ConcurrentHashMap<>(); + private static final Map, SerFunction>> DOMAIN_BUILDERS = new ConcurrentHashMap<>(); + static final Map, SerFunction>> PRIM_BUILDERS = new ConcurrentHashMap<>(); static { - DOMAIN_ENCODERS.put(BOOLEANDS, BOOLEAN()); - DOMAIN_ENCODERS.put(STRINGDS, STRING()); - DOMAIN_ENCODERS.put(INTEGERDS, LONG()); - DOMAIN_ENCODERS.put(NUMBERDS, DOUBLE()); - DOMAIN_ENCODERS.put(TIMEDS, LOCALDATE()); - DOMAIN_ENCODERS.put(DATEDS, LOCALDATE()); + DOMAIN_ENCODERS.put(BOOLEANDS, AgnosticEncoders.BoxedBooleanEncoder$.MODULE$); + DOMAIN_ENCODERS.put(STRINGDS, AgnosticEncoders.StringEncoder$.MODULE$); + DOMAIN_ENCODERS.put(INTEGERDS, AgnosticEncoders.BoxedLongEncoder$.MODULE$); + DOMAIN_ENCODERS.put(NUMBERDS, AgnosticEncoders.BoxedDoubleEncoder$.MODULE$); + DOMAIN_ENCODERS.put(TIMEDS, STRICT_LOCAL_DATE_ENCODER()); + DOMAIN_ENCODERS.put(DATEDS, STRICT_LOCAL_DATE_ENCODER()); DOMAIN_DATATYPES.put(BOOLEANDS, BooleanType); DOMAIN_DATATYPES.put(STRINGDS, StringType); DOMAIN_DATATYPES.put(INTEGERDS, LongType); - - if (isUseBigDecimal()) - DOMAIN_DATATYPES.put(NUMBERDS, createDecimalType()); - else - DOMAIN_DATATYPES.put(NUMBERDS, DoubleType); + DOMAIN_DATATYPES.put(NUMBERDS, isUseBigDecimal() ? createDecimalType() : DoubleType); DOMAIN_DATATYPES.put(TIMEDS, DateType); DOMAIN_DATATYPES.put(DATEDS, DateType); @@ -111,6 +127,12 @@ public class SparkUtils DOMAIN_BUILDERS.put(NUMBERDS, v -> createNumberValue((Number) v)); DOMAIN_BUILDERS.put(TIMEDS, v -> DateValue.of((LocalDate) v)); DOMAIN_BUILDERS.put(DATEDS, v -> DateValue.of((LocalDate) v)); + + PRIM_BUILDERS.put(Boolean.class, v -> BooleanValue.of((Boolean) v)); + PRIM_BUILDERS.put(String.class, v -> StringValue.of((String) v)); + PRIM_BUILDERS.put(Long.class, v -> IntegerValue.of((Long) v)); + PRIM_BUILDERS.put(Double.class, v -> createNumberValue((Number) v)); + PRIM_BUILDERS.put(LocalDate.class, v -> DateValue.of((LocalDate) v)); } public static Set> getComponentsFromStruct(MetadataRepository repo, StructType schema) @@ -137,9 +159,9 @@ public class SparkUtils return repo.getVariable(field.name()).as(role); } - public static ScalarValue getScalarFor(DataStructureComponent component, Object serialized) + public static ScalarValue getScalarFor(DataStructureComponent component, Serializable serialized) { - SerFunction> builder = null; + SerFunction> builder = null; ValueDomainSubset domain; for (domain = component.getVariable().getDomain(); domain != null && builder == null; domain = (ValueDomainSubset) domain.getParentDomain()) builder = DOMAIN_BUILDERS.get(domain); @@ -180,9 +202,10 @@ public static StructField getFieldFor(DataStructureComponent component) return new StructField(component.getVariable().getName(), type, component.is(NonIdentifier.class), metadata); } - public static Encoder getEncoderFor(DataStructureComponent component) + public static EncoderField getEncoderFieldFor(DataStructureComponent component) { - return requireNonNull(DOMAIN_ENCODERS.get(component.getVariable().getDomain()), "Unsupported serialization for domain " + component.getVariable().getDomain()); + return new EncoderField(component.getVariable().getName(), DOMAIN_ENCODERS.get(component.getVariable().getDomain()), + true, getMetadataFor(component), Option.empty(), Option.empty()); } public static DataType getDataTypeFor(DataStructureComponent component) @@ -201,10 +224,15 @@ public static List createStructFromComponents(DataStructureComponen return structHelper(Arrays.stream(components), SparkUtils::getFieldFor); } + public static List createFieldFromComponents(DataStructureComponent[] components) + { + return structHelper(Arrays.stream(components), SparkUtils::getEncoderFieldFor); + } + public static List createStructFromComponents(Collection> components) { return structHelper(components.stream(), SparkUtils::getFieldFor); - } + } public static List getNamesFromComponents(Collection> components) { @@ -224,6 +252,125 @@ public static List structHelper(Stream getEncoderFor(Serializable instance, ValueDomainSubset domain, DataSetMetadata structure) + { + AgnosticEncoder resultEncoder; + + String className = instance.getClass().getSimpleName(); + if ("PartitionToRank".equals(className)) + { + DataStructureComponent[] components = structure.toArray(new DataStructureComponent[structure.size()]); + Arrays.sort(components, DataStructureComponent::byNameAndRole); + List fields = new ArrayList<>(createFieldFromComponents(components)); + fields.add(new EncoderField("$lineage$", new AgnosticEncoders.UDTEncoder<>(LineageSparkUDT, LineageSparkUDT.class), false, Metadata.empty(), Option.empty(), Option.empty())); + resultEncoder = new AgnosticEncoders.RowEncoder(asScala((Iterable) fields).toSeq()); + } + else if ("RankedPartition".equals(className)) + { + try + { + Class[] repr = (Class[]) instance.getClass().getField("repr").get(instance); + List fieldEncoders = new ArrayList<>(); + for (int i = 0; i < repr.length; i++) + fieldEncoders.add(new EncoderField("get" + (i + 1), JavaTypeInference.encoderFor(repr[i]), + true, new Metadata(), Option.apply("get" + (i + 1)), Option.apply("set" + (i + 1)))); + + Seq seq = asScala((Iterable) fieldEncoders).toSeq(); + ProductEncoder keyEncoder = new ProductEncoder<>(ClassTag.apply(GenericTuple.class), seq, Option.empty()); + AgnosticEncoder longEncoder = JavaTypeInference.encoderFor(Long.class); + resultEncoder = new MapEncoder<>(ClassTag.apply(HashMap.class), keyEncoder, longEncoder, true); + } + catch (IllegalArgumentException | IllegalAccessException | NoSuchFieldException | SecurityException e) + { + throw new IllegalStateException(e); + } + } + else if ("SerDoubleSumAvgCount".equals(className)) + { + List fieldEncoders = new ArrayList<>(); + fieldEncoders.add(new EncoderField("getCount", JavaTypeInference.encoderFor(int.class), true, new Metadata(), Option.empty(), Option.empty())); + fieldEncoders.add(new EncoderField("getSums", JavaTypeInference.encoderFor(double[].class), true, new Metadata(), Option.empty(), Option.empty())); + Seq seq = asScala((Iterable) fieldEncoders).toSeq(); + resultEncoder = new ProductEncoder<>(ClassTag.apply(SerDoubleSumAvgCount.class), seq, Option.empty()); + } + else if ("ListOfDateValues".equals(className)) + { + List fieldEncoders = new ArrayList<>(); + fieldEncoders.add(new EncoderField("get", STRICT_LOCAL_DATE_ENCODER(), true, new Metadata(), Option.empty(), Option.empty())); + Seq seq = asScala((Iterable) fieldEncoders).toSeq(); + resultEncoder = new IterableEncoder<>(ClassTag.apply(ArrayList.class), new ProductEncoder<>(ClassTag.apply(DateValue.class), seq, Option.empty()), true, false); + } + else if ("Holder".equals(className)) + { + try + { + List fieldEncoders = new ArrayList<>(); + AgnosticEncoder vEncoder = JavaTypeInference.encoderFor((Class) instance.getClass().getField("reprType").get(instance)); + fieldEncoders.add(new EncoderField("get", vEncoder, true, new Metadata(), Option.empty(), Option.empty())); + Seq seq = asScala((Iterable) fieldEncoders).toSeq(); + resultEncoder = new ProductEncoder<>(ClassTag.apply(SerDoubleSumAvgCount.class), seq, Option.empty()); + } + catch (IllegalArgumentException | IllegalAccessException | NoSuchFieldException | SecurityException e) + { + throw new IllegalStateException(e); + } + } + else if (instance instanceof BaseScalarValue) + { + resultEncoder = DOMAIN_ENCODERS.get(domain); + if (resultEncoder == null) + throw new UnsupportedOperationException(domain.toString()); + } + else if (instance instanceof Object[]) + { + resultEncoder = JavaTypeInference.encoderFor(instance.getClass()); + } + else + throw new IllegalStateException(instance.getClass().getName()); + + return ExpressionEncoder.apply(resultEncoder); + } + + public static Serializable reinterpret(DataStructureComponent comp, Serializable serAcc) + { + if (serAcc == null) + { + return NullValue.instanceFrom(comp); + } + else if (serAcc instanceof Row) + { + return serAcc; + } + else if (serAcc instanceof scala.collection.immutable.Map) + { + Map rankedPartition = new HashMap<>(); + scala.collection.Iterator> iterator = ((scala.collection.immutable.Map) serAcc).iterator(); + while (iterator.hasNext()) + { + Tuple2 entry = iterator.next(); + + Serializable[] values = JavaConverters.asJava(((Row) entry._1).toSeq()).toArray(Serializable[]::new); + rankedPartition.put(new GenericTuple(values), (Long) entry._2); + } + + serAcc = (Serializable) rankedPartition; + } + else if (serAcc instanceof scala.collection.immutable.ArraySeq) + { + SerFunction> builder = DOMAIN_BUILDERS.getOrDefault(comp.getVariable().getDomain(), ScalarValue.class::cast); + ArrayList list = new ArrayList<>(); + for (Serializable value: asJava((scala.collection.Iterable) serAcc)) + list.add(builder.apply((Serializable) ((Row) value).get(0))); + serAcc = list; + } + else if (PRIM_BUILDERS.containsKey(serAcc.getClass())) + { + serAcc = PRIM_BUILDERS.get(serAcc.getClass()).apply(serAcc); + } + + return serAcc; + } + private SparkUtils() { } diff --git a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/VTLSparkAggregator.java b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/VTLSparkAggregator.java index ef50dbd76..e62118c9c 100644 --- a/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/VTLSparkAggregator.java +++ b/vtl-envs/vtl-spark/src/main/java/it/bancaditalia/oss/vtl/impl/environment/spark/VTLSparkAggregator.java @@ -19,160 +19,81 @@ */ package it.bancaditalia.oss.vtl.impl.environment.spark; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toArray; -import static org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.STRICT_LOCAL_DATE_ENCODER; +import static it.bancaditalia.oss.vtl.impl.environment.spark.SparkUtils.PRIM_BUILDERS; import java.io.Serializable; -import java.time.LocalDate; -import java.util.ArrayList; -import java.util.Arrays; import org.apache.spark.sql.Encoder; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.ArrayEncoder; -import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedDoubleEncoder$; -import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedLongEncoder$; -import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.PrimitiveDoubleEncoder$; -import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder$; -import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder; import org.apache.spark.sql.expressions.Aggregator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; import it.bancaditalia.oss.vtl.model.data.ScalarValue; -import it.bancaditalia.oss.vtl.model.domain.IntegerDomainSubset; -import it.bancaditalia.oss.vtl.model.domain.NumberDomainSubset; -import it.bancaditalia.oss.vtl.model.domain.StringDomainSubset; -import it.bancaditalia.oss.vtl.model.domain.TimeDomainSubset; -import it.bancaditalia.oss.vtl.util.OptionalBox; import it.bancaditalia.oss.vtl.util.SerCollector; -import it.bancaditalia.oss.vtl.util.SerDoubleSumAvgCount; -public class VTLSparkAggregator extends Aggregator +public class VTLSparkAggregator extends Aggregator { private static final long serialVersionUID = 1L; + private static final Logger LOGGER = LoggerFactory.getLogger(VTLSparkAggregator.class); - private final Encoder accEncoder; - private final Encoder resultEncoder; - private final SerCollector, A, A> coll; - private final DataStructureComponent oldComp; - private final Object[] array; + private final Encoder accEncoder; + private final Encoder resultEncoder; + private final SerCollector collector; @SuppressWarnings("unchecked") - public VTLSparkAggregator(DataStructureComponent oldComp, DataStructureComponent newComp, - SerCollector, ?, A> collector, SparkSession session) + public VTLSparkAggregator(SerCollector collector, Encoder accEncoder, Encoder resultEncoder) { - try - { - this.coll = (SerCollector, A, A>) collector; - this.oldComp = oldComp; - - A zero = zero(); - if (zero instanceof ArrayList) - { - accEncoder = (Encoder) Encoders.kryo(ArrayList.class); - if (newComp.getVariable().getDomain() instanceof TimeDomainSubset) - { - resultEncoder = ExpressionEncoder.apply(new ArrayEncoder<>(STRICT_LOCAL_DATE_ENCODER(), false)); - array = new LocalDate[0]; - } - else if (newComp.getVariable().getDomain() instanceof IntegerDomainSubset) - { - resultEncoder = ExpressionEncoder.apply(new ArrayEncoder<>(BoxedLongEncoder$.MODULE$, false)); - array = new Double[0]; - } - else if (newComp.getVariable().getDomain() instanceof NumberDomainSubset) - { - resultEncoder = ExpressionEncoder.apply(new ArrayEncoder<>(BoxedDoubleEncoder$.MODULE$, false)); - array = new Double[0]; - } - else if (newComp.getVariable().getDomain() instanceof StringDomainSubset) - { - resultEncoder = ExpressionEncoder.apply(new ArrayEncoder<>(StringEncoder$.MODULE$, false)); - array = new String[0]; - } - else - throw new UnsupportedOperationException("Spark aggregation on domain " + newComp.getVariable().getDomain()); - } - else - { - array = null; - - if (zero instanceof double[]) - { - accEncoder = (Encoder) session.implicits().newDoubleArrayEncoder(); - resultEncoder = SparkUtils.getEncoderFor(oldComp); - } - else if (zero instanceof OptionalBox) - { - accEncoder = (Encoder) Encoders.kryo(OptionalBox.class); - resultEncoder = SparkUtils.getEncoderFor(oldComp); - } - else if (zero instanceof SerDoubleSumAvgCount) - { - accEncoder = (Encoder) Encoders.tuple(Encoders.LONG(), ExpressionEncoder.apply(new ArrayEncoder<>(PrimitiveDoubleEncoder$.MODULE$, false))); - resultEncoder = Encoders.DOUBLE(); - } - else - throw new UnsupportedOperationException("Spark encoder not found for " + zero.getClass().getName()); - } - } - catch (RuntimeException e) - { - throw e; - } + this.collector = (SerCollector) collector; + this.resultEncoder = resultEncoder; + this.accEncoder = (Encoder) accEncoder; } @Override - public A zero() + public Object zero() { - return coll.supplier().get(); + return collector.supplier().get(); } @Override - public Encoder bufferEncoder() + public Encoder bufferEncoder() { - return (Encoder) accEncoder; + return accEncoder; } @Override - public A reduce(A acc, Serializable value) + public Object reduce(Object acc, I value) { - coll.accumulator().accept(acc, SparkUtils.getScalarFor(oldComp, value)); + // For performance reasons scalars are encoded as boxed primitive types, and must be rebuilt + if (value != null && PRIM_BUILDERS.containsKey(value.getClass())) + value = (I) PRIM_BUILDERS.get(value.getClass()).apply((Serializable) value); + + collector.accumulator().accept(acc, value); return acc; } @Override - public A merge(A acc1, A acc2) + public Object merge(Object acc1, Object acc2) { - return coll.combiner().apply(acc1, acc2); + return collector.combiner().apply(acc1, acc2); } @Override - public Serializable finish(A reduction) + public TT finish(Object reduction) { - final A result = coll.finisher().apply(reduction); - if (result instanceof ArrayList) - return ((ArrayList) result).stream() - .map(ScalarValue.class::cast) - .map(ScalarValue::get) - .collect(toArray(Arrays.copyOf(array, ((ArrayList) result).size()))); - else if (result instanceof ScalarValue) - return ((ScalarValue) result).get(); - else - throw new UnsupportedOperationException("Class not implemented as finished value in spark aggregator: " + result.getClass().getName()); + Object apply = collector.finisher().apply(reduction); + if (apply instanceof ScalarValue) + apply = ((ScalarValue) apply).get(); + + LOGGER.debug("Finished Spark aggregation: {} of {}", apply, apply == null ? null : apply.getClass()); + + @SuppressWarnings("unchecked") + TT result = (TT) apply; + return result; } - @SuppressWarnings("unchecked") - @Override - public Encoder outputEncoder() - { - return (Encoder) resultEncoder; - } - @Override - public String toString() + public Encoder outputEncoder() { - return "VTLSparkAggregator(" + oldComp + ")"; + return resultEncoder; } -}; +} diff --git a/vtl-meta/vtl-metabase/src/main/java/it/bancaditalia/oss/vtl/impl/meta/subsets/IntegerCodeList.java b/vtl-meta/vtl-metabase/src/main/java/it/bancaditalia/oss/vtl/impl/meta/subsets/IntegerCodeList.java index bc2ca3dee..ea793ec98 100644 --- a/vtl-meta/vtl-metabase/src/main/java/it/bancaditalia/oss/vtl/impl/meta/subsets/IntegerCodeList.java +++ b/vtl-meta/vtl-metabase/src/main/java/it/bancaditalia/oss/vtl/impl/meta/subsets/IntegerCodeList.java @@ -154,4 +154,10 @@ public Variable getDefaultVariable() { return new DefaultVariable<>(this); } + + @Override + public Class getRepresentation() + { + return INTEGERDS.getRepresentation(); + } } diff --git a/vtl-meta/vtl-metasdmx/src/main/java/it/bancaditalia/oss/vtl/impl/meta/sdmx/SDMXRepository.java b/vtl-meta/vtl-metasdmx/src/main/java/it/bancaditalia/oss/vtl/impl/meta/sdmx/SDMXRepository.java index db45ee7ed..4d0a8addd 100644 --- a/vtl-meta/vtl-metasdmx/src/main/java/it/bancaditalia/oss/vtl/impl/meta/sdmx/SDMXRepository.java +++ b/vtl-meta/vtl-metasdmx/src/main/java/it/bancaditalia/oss/vtl/impl/meta/sdmx/SDMXRepository.java @@ -397,7 +397,7 @@ public SdmxBeanRetrievalManager getBeanRetrievalManager() public TransformationScheme getTransformationScheme(String alias) { return Optional.ofNullable(schemes.get(alias)) - .map(ConfigurationManagerFactory.getInstance()::createSession) + .map(ConfigurationManagerFactory.newManager()::createSession) .orElseThrow(() -> new VTLException("Transformation scheme " + alias + " not found.")); } diff --git a/vtl-parsers/pom.xml b/vtl-parsers/pom.xml new file mode 100644 index 000000000..3f419c088 --- /dev/null +++ b/vtl-parsers/pom.xml @@ -0,0 +1,58 @@ + + + + 4.0.0 + + vtl-parsers + pom + + + it.bancaditalia.oss.vtl + vtl + ${revision} + + + vtl-parsers + VTL parsers for language targets + + + + with-r + + vtl-jsparser + + + + with-jupyter + + vtl-jsparser + + + + + + vtl-parser + + diff --git a/vtl-parsers/vtl-jsparser/.gitignore b/vtl-parsers/vtl-jsparser/.gitignore new file mode 100644 index 000000000..247507bb0 --- /dev/null +++ b/vtl-parsers/vtl-jsparser/.gitignore @@ -0,0 +1 @@ +/vtl-parser/ diff --git a/vtl-parser/javadoc/README.md b/vtl-parsers/vtl-jsparser/javadoc/README.md similarity index 100% rename from vtl-parser/javadoc/README.md rename to vtl-parsers/vtl-jsparser/javadoc/README.md diff --git a/vtl-parsers/vtl-jsparser/pom.xml b/vtl-parsers/vtl-jsparser/pom.xml new file mode 100644 index 000000000..71c4088ae --- /dev/null +++ b/vtl-parsers/vtl-jsparser/pom.xml @@ -0,0 +1,93 @@ + + + + 4.0.0 + vtl-jsparser + pom + + + it.bancaditalia.oss.vtl + vtl-parsers + ${revision} + + + vtl-jsparser + VTL grammar and generated parser for TypeScript + + + + + org.codehaus.mojo + flatten-maven-plugin + + + org.antlr + antlr4-maven-plugin + + 4.13.1 + + + generate-sources + + antlr4 + + + + -Dlanguage=TypeScript + + ${project.build.directory}/generated-sources/js + + + + + true + false + + + + org.apache.maven.plugins + maven-assembly-plugin + + + package + + single + + + + + + src/assembly/parser-js.xml + + true + + + + org.apache.maven.plugins + maven-gpg-plugin + + + + diff --git a/vtl-parsers/vtl-jsparser/src/assembly/parser-js.xml b/vtl-parsers/vtl-jsparser/src/assembly/parser-js.xml new file mode 100644 index 000000000..7203d8ba7 --- /dev/null +++ b/vtl-parsers/vtl-jsparser/src/assembly/parser-js.xml @@ -0,0 +1,42 @@ + + + + + grammar + + + zip + + + false + + + + ${project.build.directory}/generated-sources/js/it/bancaditalia/oss/vtl/grammar + / + + **/*.?s + + + + \ No newline at end of file diff --git a/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 b/vtl-parsers/vtl-jsparser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 similarity index 100% rename from vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 rename to vtl-parsers/vtl-jsparser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 diff --git a/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 b/vtl-parsers/vtl-jsparser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 similarity index 100% rename from vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 rename to vtl-parsers/vtl-jsparser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 diff --git a/vtl-parsers/vtl-parser/.gitignore b/vtl-parsers/vtl-parser/.gitignore new file mode 100644 index 000000000..247507bb0 --- /dev/null +++ b/vtl-parsers/vtl-parser/.gitignore @@ -0,0 +1 @@ +/vtl-parser/ diff --git a/vtl-parsers/vtl-parser/javadoc/README.md b/vtl-parsers/vtl-parser/javadoc/README.md new file mode 100644 index 000000000..94f46d358 --- /dev/null +++ b/vtl-parsers/vtl-parser/javadoc/README.md @@ -0,0 +1 @@ +All sources are generated. diff --git a/vtl-parser/pom.xml b/vtl-parsers/vtl-parser/pom.xml similarity index 86% rename from vtl-parser/pom.xml rename to vtl-parsers/vtl-parser/pom.xml index cc843666a..a0c662e48 100644 --- a/vtl-parser/pom.xml +++ b/vtl-parsers/vtl-parser/pom.xml @@ -29,12 +29,12 @@ it.bancaditalia.oss.vtl - vtl + vtl-parsers ${revision} vtl-parser - VTL grammar and generated parser + VTL grammar and generated parser for Java @@ -65,19 +65,6 @@ antlr4 - - js - generate-sources - - antlr4 - - - - -Dlanguage=TypeScript - - ${project.build.directory}/generated-sources/js - - true @@ -98,7 +85,6 @@ src/assembly/grammar.xml - src/assembly/parser-js.xml true diff --git a/vtl-parser/src/assembly/grammar.xml b/vtl-parsers/vtl-parser/src/assembly/grammar.xml similarity index 100% rename from vtl-parser/src/assembly/grammar.xml rename to vtl-parsers/vtl-parser/src/assembly/grammar.xml diff --git a/vtl-parser/src/assembly/parser-js.xml b/vtl-parsers/vtl-parser/src/assembly/parser-js.xml similarity index 100% rename from vtl-parser/src/assembly/parser-js.xml rename to vtl-parsers/vtl-parser/src/assembly/parser-js.xml diff --git a/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 b/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 new file mode 100644 index 000000000..149c2fcdf --- /dev/null +++ b/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/Vtl.g4 @@ -0,0 +1,676 @@ +parser grammar Vtl; +options { tokenVocab=VtlTokens; } + +start: + (statement EOL)* EOF +; + +/* statement */ +statement: + varID ASSIGN expr # temporaryAssignment + | varID PUT_SYMBOL expr # persistAssignment + | defOperators # defineExpression +; + +/* expression */ +expr: + LPAREN expr RPAREN # parenthesisExpr + | functions # functionsExpression + | dataset=expr QLPAREN clause=datasetClause QRPAREN # clauseExpr + | expr MEMBERSHIP simpleComponentId # membershipExpr + | op=(PLUS|MINUS|NOT) right=expr # unaryExpr + | left=expr op=(MUL|DIV) right=expr # arithmeticExpr + | left=expr op=(PLUS|MINUS|CONCAT) right=expr # arithmeticExprOrConcat + | left=expr op=comparisonOperand right=expr # comparisonExpr + | left=expr op=(IN|NOT_IN) right=inexpr # inNotInExpr + | left=expr op=AND right=expr # booleanExpr + | left=expr op=(OR|XOR) right=expr # booleanExpr + | IF conditionalExpr=expr THEN thenExpr=expr ELSE elseExpr=expr # ifExpr + | constant # constantExpr + | varID # varIdExpr +; + +exprComponent: + LPAREN exprComponent RPAREN # parenthesisExprComp + | functionsComponents # functionsExpressionComp + | op=(PLUS|MINUS|NOT) right=exprComponent # unaryExprComp + | left=exprComponent op=(MUL|DIV) right=exprComponent # arithmeticExprComp + | left=exprComponent op=(PLUS|MINUS|CONCAT) right=exprComponent # arithmeticExprOrConcatComp + | left=exprComponent comparisonOperand right=exprComponent # comparisonExprComp + | left=exprComponent op=(IN|NOT_IN) right=inexpr # inNotInExprComp + | left=exprComponent op=AND right=exprComponent # booleanExprComp + | left=exprComponent op=(OR|XOR) right=exprComponent # booleanExprComp + | IF conditionalExpr=exprComponent THEN thenExpr=exprComponent ELSE elseExpr=exprComponent # ifExprComp + | constant # constantExprComp + | componentID # compId +; + +functionsComponents: + genericOperatorsComponent # genericFunctionsComponents + | stringOperatorsComponent # stringFunctionsComponents + | numericOperatorsComponent # numericFunctionsComponents + | comparisonOperatorsComponent # comparisonFunctionsComponents + | timeOperatorsComponent # timeFunctionsComponents + | conditionalOperatorsComponent # conditionalFunctionsComponents + | aggrOperators # aggregateFunctionsComponents + | anFunctionComponent # analyticFunctionsComponents + +; + +/* functions */ +functions: + joinOperators # joinFunctions + | genericOperators # genericFunctions + | stringOperators # stringFunctions + | numericOperators # numericFunctions + | comparisonOperators # comparisonFunctions + | timeOperators # timeFunctions + | setOperators # setFunctions + | hierarchyOperators # hierarchyFunctions + | validationOperators # validationFunctions + | conditionalOperators # conditionalFunctions + | aggrOperatorsGrouping # aggregateFunctions + | anFunction # analyticFunctions +; + + +/*------------------------------------------------------ Clauses----------------------------------------------- */ +datasetClause: + renameClause + | aggrClause + | filterClause + | calcClause + | keepOrDropClause + | pivotOrUnpivotClause + // | customPivotClause + | subspaceClause +; + +renameClause: + RENAME renameClauseItem (COMMA renameClauseItem)* +; + +aggrClause: + AGGREGATE aggregateClause (groupingClause havingClause?)? +; + +filterClause: + FILTER exprComponent +; + +calcClause: + CALC calcClauseItem (COMMA calcClauseItem)* +; + +keepOrDropClause: + op = (KEEP | DROP) componentID (COMMA componentID)* +; + + +pivotOrUnpivotClause: + op=(PIVOT|UNPIVOT) id_=componentID COMMA mea=componentID +; + +/*customPivotClause: + CUSTOMPIVOT id_=componentID COMMA mea=componentID IN constant (COMMA constant)* +;*/ + +subspaceClause: + SUBSPACE subspaceClauseItem (COMMA subspaceClauseItem)* +; + +/*------------------------------------------------------End Clauses----------------------------------------------- */ + +/************************************************** JOIN FUNCITONS -------------------------------------------*/ + +joinOperators: + joinKeyword=(INNER_JOIN | LEFT_JOIN) LPAREN joinClause joinBody RPAREN # joinExpr + | joinKeyword=(FULL_JOIN | CROSS_JOIN) LPAREN joinClauseWithoutUsing joinBody RPAREN # joinExpr +; + +/************************************************** END JOIN FUNCITONS -------------------------------------------*/ +/* --------------------------------------------Define Functions------------------------------------------------- */ +defOperators: + DEFINE OPERATOR operatorID LPAREN (parameterItem (COMMA parameterItem)*)? RPAREN (RETURNS outputParameterType)? IS (expr) END OPERATOR # defOperator + | DEFINE DATAPOINT RULESET rulesetID LPAREN rulesetSignature RPAREN IS ruleClauseDatapoint END DATAPOINT RULESET # defDatapointRuleset + | DEFINE HIERARCHICAL RULESET rulesetID LPAREN hierRuleSignature RPAREN IS ruleClauseHierarchical END HIERARCHICAL RULESET # defHierarchical +; + +/* --------------------------------------------END DEFINE FUNCTIONS------------------------------------------------- */ + +/*---------------------------------------------------FUNCTIONS-------------------------------------------------*/ +genericOperators: + operatorID LPAREN (parameter (COMMA parameter)*)? RPAREN # callDataset + | EVAL LPAREN routineName LPAREN (varID|constant)? (COMMA (varID|constant))* RPAREN (LANGUAGE STRING_CONSTANT)? (RETURNS datasetType)? RPAREN # evalAtom + | CAST LPAREN expr COMMA (basicScalarType|valueDomainName) (COMMA STRING_CONSTANT)? RPAREN # castExprDataset +; + +genericOperatorsComponent: + operatorID LPAREN (parameterComponent (COMMA parameterComponent)*)? RPAREN # callComponent + | CAST LPAREN exprComponent COMMA (basicScalarType|valueDomainName) (COMMA STRING_CONSTANT)? RPAREN # castExprComponent + | EVAL LPAREN routineName LPAREN (componentID|constant)? (COMMA (componentID|constant))* RPAREN (LANGUAGE STRING_CONSTANT)? (RETURNS outputParameterTypeComponent)? RPAREN # evalAtomComponent + +; + + +parameterComponent: + componentID + | constant + | OPTIONAL +; + +parameter: + varID + | constant + | OPTIONAL +; + +stringOperators: + op=(TRIM | LTRIM | RTRIM | UCASE | LCASE | LEN) LPAREN expr RPAREN # unaryStringFunction + | SUBSTR LPAREN expr (((COMMA startParameter=optionalExpr) (COMMA endParameter=optionalExpr))? | COMMA startParameter=optionalExpr ) RPAREN # substrAtom + | REPLACE LPAREN expr COMMA param=expr ( COMMA optionalExpr)? RPAREN # replaceAtom + | INSTR LPAREN expr COMMA pattern=expr ( COMMA startParameter=optionalExpr)? (COMMA occurrenceParameter=optionalExpr)? RPAREN # instrAtom +; + +stringOperatorsComponent: + op=(TRIM | LTRIM | RTRIM | UCASE | LCASE | LEN) LPAREN exprComponent RPAREN # unaryStringFunctionComponent + | SUBSTR LPAREN exprComponent (((COMMA startParameter=optionalExprComponent) (COMMA endParameter=optionalExprComponent))? | COMMA startParameter=optionalExprComponent ) RPAREN # substrAtomComponent + | REPLACE LPAREN exprComponent COMMA param=exprComponent ( COMMA optionalExprComponent)? RPAREN # replaceAtomComponent + | INSTR LPAREN exprComponent COMMA pattern=exprComponent ( COMMA startParameter=optionalExprComponent)? (COMMA occurrenceParameter=optionalExprComponent)? RPAREN # instrAtomComponent +; + +numericOperators: + op=(CEIL | FLOOR | ABS | EXP | LN | SQRT) LPAREN expr RPAREN # unaryNumeric + | op=(ROUND | TRUNC) LPAREN expr (COMMA optionalExpr)? RPAREN # unaryWithOptionalNumeric + | op=(MOD | POWER|LOG) LPAREN left=expr COMMA right=expr RPAREN # binaryNumeric +; + +numericOperatorsComponent: + op=(CEIL | FLOOR | ABS | EXP | LN | SQRT) LPAREN exprComponent RPAREN # unaryNumericComponent + | op=(ROUND | TRUNC) LPAREN exprComponent (COMMA optionalExprComponent)? RPAREN # unaryWithOptionalNumericComponent + | op=(MOD | POWER | LOG) LPAREN left=exprComponent COMMA right=exprComponent RPAREN # binaryNumericComponent +; + +comparisonOperators: + BETWEEN LPAREN op=expr COMMA from_=expr COMMA to_=expr RPAREN # betweenAtom + | CHARSET_MATCH LPAREN op=expr COMMA pattern=expr RPAREN # charsetMatchAtom + | ISNULL LPAREN expr RPAREN # isNullAtom + | EXISTS_IN LPAREN left=expr COMMA right=expr (COMMA retainType)? RPAREN # existInAtom +; + +comparisonOperatorsComponent: + BETWEEN LPAREN op=exprComponent COMMA from_=exprComponent COMMA to_=exprComponent RPAREN # betweenAtomComponent + | CHARSET_MATCH LPAREN op=exprComponent COMMA pattern=exprComponent RPAREN # charsetMatchAtomComponent + | ISNULL LPAREN exprComponent RPAREN # isNullAtomComponent +; + +timeOperators: + PERIOD_INDICATOR LPAREN expr? RPAREN # periodAtom + | FILL_TIME_SERIES LPAREN expr (COMMA op=(SINGLE|ALL))? RPAREN # fillTimeAtom + | op=(FLOW_TO_STOCK | STOCK_TO_FLOW) LPAREN expr RPAREN # flowAtom + | TIMESHIFT LPAREN expr COMMA signedInteger RPAREN # timeShiftAtom + | TIME_AGG LPAREN periodIndTo=TIME_UNIT (COMMA periodIndFrom=(TIME_UNIT | OPTIONAL))? (COMMA op=optionalExpr)? (COMMA delim=(FIRST|LAST))? RPAREN # timeAggAtom + | CURRENT_DATE LPAREN RPAREN # currentDateAtom +; + +timeOperatorsComponent: + PERIOD_INDICATOR LPAREN exprComponent? RPAREN # periodAtomComponent + | FILL_TIME_SERIES LPAREN exprComponent (COMMA (SINGLE|ALL))? RPAREN # fillTimeAtomComponent + | op=(FLOW_TO_STOCK | STOCK_TO_FLOW) LPAREN exprComponent RPAREN # flowAtomComponent + | TIMESHIFT LPAREN exprComponent COMMA signedInteger RPAREN # timeShiftAtomComponent + | TIME_AGG LPAREN periodIndTo=TIME_UNIT (COMMA periodIndFrom=(TIME_UNIT | OPTIONAL ))? (COMMA op=optionalExprComponent)? (COMMA delim=(FIRST|LAST))? RPAREN # timeAggAtomComponent + | CURRENT_DATE LPAREN RPAREN # currentDateAtomComponent +; + +setOperators: + UNION LPAREN left=expr (COMMA expr)+ RPAREN # unionAtom + | INTERSECT LPAREN left=expr (COMMA expr)+ RPAREN # intersectAtom + | op=(SETDIFF|SYMDIFF) LPAREN left=expr COMMA right=expr RPAREN # setOrSYmDiffAtom +; +/* hierarchy */ +hierarchyOperators: + HIERARCHY LPAREN op=expr COMMA hrName=IDENTIFIER (conditionClause)? (RULE ruleComponent=componentID)? (validationMode)? (inputModeHierarchy)? outputModeHierarchy? RPAREN +; + +validationOperators: + CHECK_DATAPOINT LPAREN op=expr COMMA dpName=IDENTIFIER (COMPONENTS componentID (COMMA componentID)*)? validationOutput? RPAREN # validateDPruleset + | CHECK_HIERARCHY LPAREN op=expr COMMA hrName=IDENTIFIER conditionClause? (RULE componentID)? validationMode? inputMode? validationOutput? RPAREN # validateHRruleset + | CHECK LPAREN op=expr (codeErr=erCode)? (levelCode=erLevel)? imbalanceExpr? output=(INVALID|ALL)? RPAREN # validationSimple +; + +conditionalOperators: + NVL LPAREN left=expr COMMA right = expr RPAREN # nvlAtom +; + +conditionalOperatorsComponent: + NVL LPAREN left=exprComponent COMMA right = exprComponent RPAREN # nvlAtomComponent +; + +aggrOperators: + op =(SUM + | AVG + | COUNT + | MEDIAN + | MIN + | MAX + | STDDEV_POP + | STDDEV_SAMP + | VAR_POP + | VAR_SAMP) LPAREN exprComponent RPAREN # aggrComp + | COUNT LPAREN RPAREN # countAggrComp + +; + + +aggrOperatorsGrouping: + op =(SUM + | AVG + | COUNT + | MEDIAN + | MIN + | MAX + | STDDEV_POP + | STDDEV_SAMP + | VAR_POP + | VAR_SAMP) LPAREN expr (groupingClause havingClause?)? RPAREN #aggrDataset + +; + + anFunction: + op = ( SUM + | AVG + | COUNT + | MEDIAN + | MIN + | MAX + | STDDEV_POP + | STDDEV_SAMP + | VAR_POP + | VAR_SAMP + | FIRST_VALUE + | LAST_VALUE) + LPAREN expr OVER LPAREN (partition=partitionByClause? orderBy=orderByClause? windowing=windowingClause?) RPAREN RPAREN # anSimpleFunction + | op=(LAG |LEAD) LPAREN expr (COMMA offet=signedInteger(defaultValue=constant)?)? OVER LPAREN (partition=partitionByClause? orderBy=orderByClause) RPAREN RPAREN # lagOrLeadAn + | op=RATIO_TO_REPORT LPAREN expr OVER LPAREN (partition=partitionByClause) RPAREN RPAREN # ratioToReportAn +; + + anFunctionComponent: + op = ( SUM + | AVG + | COUNT + | MEDIAN + | MIN + | MAX + | STDDEV_POP + | STDDEV_SAMP + | VAR_POP + | VAR_SAMP + | FIRST_VALUE + | LAST_VALUE) + LPAREN exprComponent OVER LPAREN (partition=partitionByClause? orderBy=orderByClause? windowing=windowingClause?) RPAREN RPAREN # anSimpleFunctionComponent + | op=(LAG |LEAD) LPAREN exprComponent (COMMA offet=signedInteger(defaultValue=constant)?)? OVER LPAREN (partition=partitionByClause? orderBy=orderByClause) RPAREN RPAREN # lagOrLeadAnComponent + | op=RANK LPAREN OVER LPAREN (partition=partitionByClause? orderBy=orderByClause) RPAREN RPAREN # rankAnComponent + | op=RATIO_TO_REPORT LPAREN exprComponent OVER LPAREN (partition=partitionByClause) RPAREN RPAREN # ratioToReportAnComponent +; +/*---------------------------------------------------END FUNCTIONS-------------------------------------------------*/ + +/*-------------------------------------------------CLAUSE EXPRESSION------------------------------------------------*/ +/*RENAME CLAUSE */ +renameClauseItem: + fromName=componentID TO toName=componentID +; + +/*END RENAME CLAUSE*/ + +/*AGGR CLAUSE*/ +aggregateClause: + aggrFunctionClause (COMMA aggrFunctionClause)* +; + +aggrFunctionClause: + (componentRole)? componentID ASSIGN aggrOperators +; +/*END AGGR CLAUSE*/ + +/*CALC CLAUSE*/ +calcClauseItem: + (componentRole)? componentID ASSIGN exprComponent +; +/*END CALC CLAUSE*/ + +/*SUBSPACE CLAUSE*/ +subspaceClauseItem + : + componentID EQ constant + ; +/*END SUBSPACE CLAUSE*/ +/*----------------------------------------------END CLAUSE EXPRESSION--------------------------------------*/ + +/*---------------------------------------------JOIN CLAUSE EXPRESSION---------------------------------------*/ + +joinClauseWithoutUsing: + joinClauseItem (COMMA joinClauseItem)* +; + +joinClause: + joinClauseItem (COMMA joinClauseItem)* (USING componentID (COMMA componentID)*)? +; + +joinClauseItem: + expr (AS alias)? +; + +joinBody: + filterClause? (calcClause|joinApplyClause|aggrClause)? (keepOrDropClause)? renameClause? +; + +/* JOIN APPLY CLAUSE*/ +joinApplyClause: + APPLY expr +; +/* END JOIN APPLY CLAUSE*/ + +/*---------------------------------------------END JOIN CLAUSE EXPRESSION---------------------------------------*/ + +/*-----------------------------------------ANALYTIC CLAUSE -----------------------------------------------*/ + +partitionByClause: + PARTITION BY componentID (COMMA componentID)* +; + +orderByClause: + ORDER BY orderByItem (COMMA orderByItem)* +; + +orderByItem: + componentID (ASC|DESC)? +; + +windowingClause: + ((DATA POINTS)|RANGE) BETWEEN from_=limitClauseItem AND to_=limitClauseItem +; + +signedInteger: + INTEGER_CONSTANT +; + +limitClauseItem: + INTEGER_CONSTANT dir=PRECEDING + | INTEGER_CONSTANT dir=FOLLOWING + | CURRENT DATA POINT + | UNBOUNDED dir=PRECEDING + | UNBOUNDED dir=FOLLOWING +; + +/*--------------------------------------------END ANALYTIC CLAUSE -----------------------------------------------*/ +/* ------------------------------------------------------------ GROUPING CLAUSE ------------------------------------*/ +groupingClause: + GROUP op=(BY | EXCEPT) componentID (COMMA componentID)* ( TIME_AGG LPAREN TIME_UNIT (COMMA delim=(FIRST|LAST))? RPAREN )? # groupByOrExcept + | GROUP ALL exprComponent ( TIME_AGG LPAREN TIME_UNIT RPAREN )? # groupAll + ; + +havingClause: + HAVING exprComponent + ; +/*-------------------------------------------END GROUPING CLAUSE-----------------------------------------------------*/ + +/*------------------------------------------------DEFINE OPERATOR ---------------------------------------------------*/ + +parameterItem: + varID inputParameterType (DEFAULT constant)? +; + +outputParameterType: + scalarType + | datasetType + | componentType +; + +outputParameterTypeComponent: + componentType + |scalarType +; + +inputParameterType: + scalarType + | datasetType + | scalarSetType + | rulesetType + | componentType +; + +rulesetType: + RULESET + | dpRuleset + | hrRuleset +; + +scalarType: + (basicScalarType|valueDomainName)scalarTypeConstraint?((NOT)? NULL_CONSTANT)? +; + +componentType: + componentRole ( LT scalarType MT )? +; + +datasetType: + DATASET ( GLPAREN compConstraint (COMMA compConstraint)* GRPAREN )? +; + + +scalarSetType: + SET ( LT scalarType MT )? +; + +dpRuleset: + DATAPOINT # dataPoint + | DATAPOINT_ON_VD (GLPAREN valueDomainName (MUL valueDomainName)* GRPAREN )? # dataPointVd + | DATAPOINT_ON_VAR (GLPAREN varID (MUL varID)* GRPAREN )? # dataPointVar +; + +hrRuleset: + HIERARCHICAL # hrRulesetType + | HIERARCHICAL_ON_VD ( GLPAREN vdName=IDENTIFIER (LPAREN valueDomainName (MUL valueDomainName)* RPAREN)? GRPAREN )? # hrRulesetVdType + | HIERARCHICAL_ON_VAR ( GLPAREN varName=varID (LPAREN varID (MUL varID)* RPAREN)? GRPAREN )? # hrRulesetVarType +; + +valueDomainName: + IDENTIFIER +; + +rulesetID: + IDENTIFIER +; + +rulesetSignature: + (VALUE_DOMAIN|VARIABLE) signature (COMMA signature)* +; + +signature: + varID (AS alias)? +; + +ruleClauseDatapoint: + ruleItemDatapoint ( EOL ruleItemDatapoint)* +; + +ruleItemDatapoint: + (ruleName=IDENTIFIER COLON )? ( WHEN antecedentContiditon=exprComponent THEN )? consequentCondition=exprComponent (erCode)? (erLevel)? +; + +ruleClauseHierarchical: + ruleItemHierarchical ( EOL ruleItemHierarchical)* + ; + +ruleItemHierarchical: + (ruleName=IDENTIFIER COLON )? codeItemRelation (erCode)? (erLevel)? + ; + + hierRuleSignature: + (VALUE_DOMAIN|VARIABLE) (CONDITION valueDomainSignature)? RULE IDENTIFIER + ; + + valueDomainSignature: + signature (COMMA signature)* + ; + +codeItemRelation: + ( WHEN exprComponent THEN )? codeItemRef=valueDomainValue comparisonOperand? codeItemRelationClause (codeItemRelationClause)* +; + +codeItemRelationClause: + (opAdd=( PLUS | MINUS ))? rightCodeItem=valueDomainValue ( QLPAREN rightCondition=exprComponent QRPAREN )? +; + +valueDomainValue: + IDENTIFIER + |INTEGER_CONSTANT + |NUMBER_CONSTANT +; + +scalarTypeConstraint: + QLPAREN exprComponent QRPAREN # conditionConstraint + | GLPAREN constant (COMMA constant)* GRPAREN # rangeConstraint +; + + +compConstraint: + componentType (componentID|multModifier) +; + +multModifier: + OPTIONAL ( PLUS | MUL )? +; + +/*--------------------------------------------END DEFINE OPERATOR ---------------------------------------------------*/ + + +/*------------------------------------------VALIDATION OPERATOR ---------------------------------------------------*/ +validationOutput: + INVALID|ALL_MEASURES|ALL +; + +validationMode: + NON_NULL|NON_ZERO|PARTIAL_NULL|PARTIAL_ZERO|ALWAYS_NULL|ALWAYS_ZERO +; + +conditionClause: + CONDITION componentID (COMMA componentID)* +; + +inputMode: + DATASET|DATASET_PRIORITY +; + +imbalanceExpr: + IMBALANCE expr +; + +inputModeHierarchy: + RULE|DATASET|RULE_PRIORITY +; + +outputModeHierarchy: + COMPUTED|ALL +; +/*--------------------------------------END VALIDATION OPERATOR ---------------------------------------------------*/ +alias: + IDENTIFIER +; + +varID: + IDENTIFIER +; + +simpleComponentId: + IDENTIFIER +; + +componentID: + IDENTIFIER (MEMBERSHIP IDENTIFIER)? +; + +inexpr: + GLPAREN constant (COMMA constant)* GRPAREN # setExpr + | valueDomainID # valueDomainExpr +; + +erCode: + ERRORCODE constant +; + +erLevel: + ERRORLEVEL constant +; + +comparisonOperand: + MT + | ME + | LE + | LT + | EQ + | NEQ +; + +/* Conditional */ +optionalExpr: + expr + | OPTIONAL +; + +optionalExprComponent: + exprComponent + | OPTIONAL +; +/* Role name*/ +componentRole: + MEASURE + | COMPONENT + | DIMENSION + | ATTRIBUTE + | viralAttribute +; + +viralAttribute: + VIRAL ATTRIBUTE +; + +valueDomainID: + IDENTIFIER + ; + +operatorID: + IDENTIFIER +; + +routineName: + IDENTIFIER +; + +constant: + INTEGER_CONSTANT + | NUMBER_CONSTANT + | BOOLEAN_CONSTANT + | STRING_CONSTANT + | NULL_CONSTANT +; + +basicScalarType: + STRING + | INTEGER + | NUMBER + | BOOLEAN + | DATE + | TIME + | TIME_PERIOD + | DURATION + | SCALAR + +; + +retainType: + BOOLEAN_CONSTANT + | ALL +; + + diff --git a/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 b/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 new file mode 100644 index 000000000..65c5f8bf3 --- /dev/null +++ b/vtl-parsers/vtl-parser/src/main/antlr4/it/bancaditalia/oss/vtl/grammar/VtlTokens.g4 @@ -0,0 +1,300 @@ +lexer grammar VtlTokens; + + + LPAREN:'('; + RPAREN:')'; + + QLPAREN: '['; + QRPAREN: ']'; + + GLPAREN:'{'; + GRPAREN:'}'; + + EQ : '='; + LT : '<'; + MT : '>'; + ME : '>='; + NEQ : '<>'; + LE : '<='; + PLUS : '+'; + MINUS: '-'; + MUL: '*'; + DIV: '/'; + COMMA : ','; + POINTER : '->'; + + ASSIGN : ':='; + MEMBERSHIP : '#'; + COLON : ':'; + EVAL : 'eval'; + IF : 'if'; + THEN : 'then'; + ELSE : 'else'; + USING : 'using'; + WITH : 'with'; + CURRENT_DATE : 'current_date'; + ON : 'on'; + DROP : 'drop'; + KEEP : 'keep'; + CALC : 'calc'; + ATTRCALC : 'attrcalc'; + RENAME : 'rename'; + AS : 'as'; + AND : 'and'; + OR : 'or'; + XOR : 'xor'; + NOT : 'not'; + BETWEEN : 'between'; + IN : 'in'; + NOT_IN : 'not_in'; + NULL_CONSTANT :'null'; + ISNULL : 'isnull'; + EX : 'ex'; + UNION : 'union'; + DIFF : 'diff'; + SYMDIFF : 'symdiff'; + INTERSECT : 'intersect'; + KEYS : 'keys'; + INTYEAR : 'intyear'; + INTMONTH : 'intmonth'; + INTDAY : 'intday'; + CHECK : 'check'; + EXISTS_IN : 'exists_in'; + TO : 'to'; + RETURN : 'return'; + IMBALANCE : 'imbalance'; + ERRORCODE : 'errorcode'; + ALL : 'all'; + AGGREGATE : 'aggr'; + ERRORLEVEL : 'errorlevel'; + ORDER : 'order'; + BY : 'by'; + RANK : 'rank'; + ASC : 'asc'; + DESC : 'desc'; + MIN : 'min'; + MAX : 'max'; + FIRST : 'first'; + LAST : 'last'; + INDEXOF : 'indexof'; + ABS : 'abs'; + KEY : 'key'; + LN : 'ln'; + LOG : 'log'; + TRUNC : 'trunc'; + ROUND : 'round'; + POWER : 'power'; + MOD : 'mod'; + LEN : 'length'; + CONCAT : '||'; + TRIM : 'trim'; + UCASE : 'upper'; + LCASE : 'lower'; + SUBSTR : 'substr'; + SUM : 'sum'; + AVG : 'avg'; + MEDIAN : 'median'; + COUNT : 'count'; + DIMENSION : 'identifier'; + MEASURE : 'measure'; + ATTRIBUTE : 'attribute'; + FILTER : 'filter'; + MERGE : 'merge'; + EXP : 'exp'; + ROLE : 'componentRole'; + VIRAL : 'viral'; + CHARSET_MATCH : 'match_characters'; + TYPE : 'type'; + NVL : 'nvl'; + HIERARCHY : 'hierarchy'; + OPTIONAL : '_'; + INVALID : 'invalid'; + + VALUE_DOMAIN : 'valuedomain'; + VARIABLE : 'variable'; + DATA : 'data'; + STRUCTURE : 'structure'; + DATASET : 'dataset'; + OPERATOR : 'operator'; + DEFINE : 'define'; + PUT_SYMBOL : '<-'; + DATAPOINT : 'datapoint'; + HIERARCHICAL : 'hierarchical'; + RULESET : 'ruleset'; + RULE : 'rule'; + END : 'end'; + ALTER_DATASET : 'alterDataset'; + LTRIM : 'ltrim'; + RTRIM : 'rtrim'; + INSTR : 'instr'; + REPLACE : 'replace'; + CEIL : 'ceil'; + FLOOR : 'floor'; + SQRT : 'sqrt'; + ANY : 'any'; + SETDIFF : 'setdiff'; + STDDEV_POP : 'stddev_pop'; + STDDEV_SAMP : 'stddev_samp'; + VAR_POP : 'var_pop'; + VAR_SAMP : 'var_samp'; + GROUP : 'group'; + EXCEPT : 'except'; + HAVING : 'having'; + FIRST_VALUE : 'first_value'; + LAST_VALUE : 'last_value'; + LAG : 'lag'; + LEAD : 'lead'; + RATIO_TO_REPORT : 'ratio_to_report'; + OVER : 'over'; + PRECEDING : 'preceding'; + FOLLOWING : 'following'; + UNBOUNDED : 'unbounded'; + PARTITION : 'partition'; + ROWS : 'rows'; + RANGE : 'range'; + CURRENT : 'current'; + VALID : 'valid'; + FILL_TIME_SERIES : 'fill_time_series'; + FLOW_TO_STOCK : 'flow_to_stock'; + STOCK_TO_FLOW : 'stock_to_flow'; + TIMESHIFT : 'timeshift'; + MEASURES : 'measures'; + NO_MEASURES : 'no_measures'; + CONDITION : 'condition'; + BOOLEAN : 'boolean'; + DATE : 'date'; + TIME_PERIOD :'time_period'; + NUMBER : 'number'; + STRING : 'string'; + TIME : 'time'; + INTEGER : 'integer'; + FLOAT : 'float'; + LIST : 'list'; + RECORD : 'record'; + RESTRICT : 'restrict'; + YYYY : 'yyyy'; + MM : 'mm'; + DD : 'dd'; + MAX_LENGTH : 'maxLength'; + REGEXP : 'regexp'; + IS : 'is'; + WHEN : 'when'; + FROM : 'from'; + AGGREGATES : 'aggregates'; + POINTS : 'points'; + POINT : 'point'; + TOTAL : 'total'; + PARTIAL : 'partial'; + ALWAYS : 'always'; + INNER_JOIN : 'inner_join'; + LEFT_JOIN : 'left_join'; + CROSS_JOIN : 'cross_join'; + FULL_JOIN : 'full_join'; + MAPS_FROM : 'maps_from'; + MAPS_TO : 'maps_to'; + MAP_TO : 'map_to'; + MAP_FROM : 'map_from'; + RETURNS : 'returns'; + PIVOT : 'pivot'; + CUSTOMPIVOT : 'customPivot'; + UNPIVOT : 'unpivot'; + SUBSPACE : 'sub'; + APPLY : 'apply'; + CONDITIONED : 'conditioned'; + PERIOD_INDICATOR : 'period_indicator'; + SINGLE : 'single'; + DURATION : 'duration'; + TIME_AGG : 'time_agg'; + UNIT : 'unit'; + VALUE : 'Value'; + VALUEDOMAINS : 'valuedomains'; + VARIABLES : 'variables'; + INPUT : 'input'; + OUTPUT : 'output'; + CAST : 'cast'; + RULE_PRIORITY : 'rule_priority'; + DATASET_PRIORITY : 'dataset_priority'; + DEFAULT : 'default'; + CHECK_DATAPOINT : 'check_datapoint'; + CHECK_HIERARCHY : 'check_hierarchy'; + COMPUTED : 'computed'; + NON_NULL : 'non_null'; + NON_ZERO : 'non_zero'; + PARTIAL_NULL : 'partial_null'; + PARTIAL_ZERO : 'partial_zero'; + ALWAYS_NULL : 'always_null'; + ALWAYS_ZERO : 'always_zero'; + COMPONENTS : 'components'; + ALL_MEASURES : 'all_measures'; + SCALAR : 'scalar'; + COMPONENT : 'component'; + DATAPOINT_ON_VD : 'datapoint_on_valuedomains'; + DATAPOINT_ON_VAR : 'datapoint_on_variables'; + HIERARCHICAL_ON_VD : 'hierarchical_on_valuedomains'; + HIERARCHICAL_ON_VAR : 'hierarchical_on_variables'; + SET : 'set'; + LANGUAGE : 'language'; + +fragment +LETTER: + [a-zA-Z] +; + +fragment +DIGITS0_9: + '0'..'9' +; + +INTEGER_CONSTANT + : + MINUS?DIGITS0_9+ + ; + +NUMBER_CONSTANT + : + MINUS?INTEGER_CONSTANT '.' INTEGER_CONSTANT + ; + +BOOLEAN_CONSTANT + : + 'true' + | 'false' + ; + +STRING_CONSTANT + : + '"' (~'"')* '"' + ; + +TIME_UNIT + : + 'A' + |'S' + |'Q' + |'M' + |'W' + |'D' + |'T' + ; + +IDENTIFIER + : + LETTER ([A-Za-z0-9_.])* + | '\'' (.)*? '\'' + ; + +WS: + [ \t\r\n\u000C]+ ->channel(1) + ; + +EOL + : ';' + ; + +ML_COMMENT + : + ('/*' (.)*? '*/')-> channel(2); + +SL_COMMENT + : + ('//' (.)*? '\n') ->channel(2); diff --git a/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/GrammarTest.java b/vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/GrammarTest.java similarity index 100% rename from vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/GrammarTest.java rename to vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/GrammarTest.java diff --git a/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxError.java b/vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxError.java similarity index 100% rename from vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxError.java rename to vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxError.java diff --git a/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxErrorListener.java b/vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxErrorListener.java similarity index 100% rename from vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxErrorListener.java rename to vtl-parsers/vtl-parser/src/test/java/it/bancaditalia/oss/vtl/grammar/SyntaxErrorListener.java diff --git a/vtl-parser/src/test/resources/it/bancaditalia/oss/vtl/grammar/PositiveTests.vtl b/vtl-parsers/vtl-parser/src/test/resources/it/bancaditalia/oss/vtl/grammar/PositiveTests.vtl similarity index 100% rename from vtl-parser/src/test/resources/it/bancaditalia/oss/vtl/grammar/PositiveTests.vtl rename to vtl-parsers/vtl-parser/src/test/resources/it/bancaditalia/oss/vtl/grammar/PositiveTests.vtl diff --git a/vtl-samples/src/main/java/it/bancaditalia/oss/vtl/impl/data/samples/SampleDataSets.java b/vtl-samples/src/main/java/it/bancaditalia/oss/vtl/impl/data/samples/SampleDataSets.java index a9ef109cd..9c5d099d3 100644 --- a/vtl-samples/src/main/java/it/bancaditalia/oss/vtl/impl/data/samples/SampleDataSets.java +++ b/vtl-samples/src/main/java/it/bancaditalia/oss/vtl/impl/data/samples/SampleDataSets.java @@ -217,13 +217,10 @@ public Stream streamByKeys(Set DataSet analytic(SerFunction lineageOp, - Map, ? extends DataStructureComponent> components, - WindowClause clause, - Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers) + public DataSet analytic(SerFunction lineageOp, DataStructureComponent sourceComp, DataStructureComponent destComp, WindowClause clause, + SerFunction extractor, SerCollector collector, SerBiFunction>> finisher) { - return dataset.analytic(DataPoint::getLineage, components, clause, collectors, finishers); + return dataset.analytic(lineageOp, sourceComp, destComp, clause, extractor, collector, finisher); } @Override diff --git a/vtl-session/src/main/java/it/bancaditalia/oss/vtl/impl/session/VTLSessionImpl.java b/vtl-session/src/main/java/it/bancaditalia/oss/vtl/impl/session/VTLSessionImpl.java index 1fbbebd56..edce30b59 100644 --- a/vtl-session/src/main/java/it/bancaditalia/oss/vtl/impl/session/VTLSessionImpl.java +++ b/vtl-session/src/main/java/it/bancaditalia/oss/vtl/impl/session/VTLSessionImpl.java @@ -89,7 +89,7 @@ public class VTLSessionImpl implements VTLSession private static final long serialVersionUID = 1L; private static final Logger LOGGER = LoggerFactory.getLogger(VTLSessionImpl.class); - private final ConfigurationManager config = ConfigurationManagerFactory.getInstance(); + private final ConfigurationManager config = ConfigurationManagerFactory.newManager(); private final Engine engine; private final List environments; private final Workspace workspace; @@ -134,9 +134,9 @@ public VTLValue resolve(String alias) } else return cacheHelper(alias, cache, n -> acquireValue(alias, (e, a) -> e.getValue(repository, a)) - .orElseThrow(() -> new VTLUnboundAliasException(alias))); + .orElseThrow(() -> buildUnboundException(alias, "resolve"))); } - + @Override public VTLValueMetadata getMetadata(String alias) { @@ -155,9 +155,16 @@ public VTLValueMetadata getMetadata(String alias) } else return cacheHelper(alias, metacache, n -> acquireValue(n, Environment::getValueMetadata) - .orElseThrow(() -> new VTLUnboundAliasException(alias))); + .orElseThrow(() -> buildUnboundException(alias, "getMetadata"))); } + private VTLUnboundAliasException buildUnboundException(String alias, String op) + { + for (Environment env: environments) + LOGGER.warn("Environment {} reported empty value for operation {} with {}", env.getClass().getSimpleName(), op, alias); + return new VTLUnboundAliasException(alias); + } + @Override public boolean contains(String alias) { diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RankTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RankTransformation.java index ac837c91e..871fbe142 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RankTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RankTransformation.java @@ -20,41 +20,41 @@ package it.bancaditalia.oss.vtl.impl.transform.aggregation; import static it.bancaditalia.oss.vtl.impl.transform.scope.ThisScope.THIS; +import static it.bancaditalia.oss.vtl.impl.transform.util.WindowCriterionImpl.DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.INTEGERDS; import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.DESC; -import static it.bancaditalia.oss.vtl.util.ConcatSpliterator.concatenating; +import static it.bancaditalia.oss.vtl.util.SerCollectors.collectingAndThen; +import static it.bancaditalia.oss.vtl.util.SerCollectors.toArray; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; import static it.bancaditalia.oss.vtl.util.SerCollectors.toSet; +import static it.bancaditalia.oss.vtl.util.SerUnaryOperator.identity; import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static it.bancaditalia.oss.vtl.util.Utils.toEntryWithValue; -import static java.lang.Boolean.TRUE; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; +import static java.util.Collections.singleton; import static java.util.stream.Collectors.joining; +import java.io.Serializable; import java.util.ArrayList; -import java.util.Comparator; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Optional; import java.util.Set; -import java.util.stream.Stream; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import it.bancaditalia.oss.vtl.exceptions.VTLException; import it.bancaditalia.oss.vtl.exceptions.VTLIncompatibleRolesException; import it.bancaditalia.oss.vtl.exceptions.VTLInvalidParameterException; import it.bancaditalia.oss.vtl.exceptions.VTLMissingComponentsException; import it.bancaditalia.oss.vtl.impl.transform.TransformationImpl; +import it.bancaditalia.oss.vtl.impl.transform.util.SortClause; +import it.bancaditalia.oss.vtl.impl.transform.util.WindowClauseImpl; import it.bancaditalia.oss.vtl.impl.types.data.IntegerValue; -import it.bancaditalia.oss.vtl.impl.types.dataset.DataPointBuilder; import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureBuilder; -import it.bancaditalia.oss.vtl.impl.types.dataset.FunctionDataSet; import it.bancaditalia.oss.vtl.impl.types.domain.EntireIntegerDomainSubset; import it.bancaditalia.oss.vtl.impl.types.lineage.LineageNode; import it.bancaditalia.oss.vtl.model.data.Component.Identifier; @@ -71,14 +71,35 @@ import it.bancaditalia.oss.vtl.model.domain.IntegerDomain; import it.bancaditalia.oss.vtl.model.transform.LeafTransformation; import it.bancaditalia.oss.vtl.model.transform.TransformationScheme; -import it.bancaditalia.oss.vtl.util.Utils; +import it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion; +import it.bancaditalia.oss.vtl.model.transform.analytic.WindowClause; +import it.bancaditalia.oss.vtl.util.GenericTuple; +import it.bancaditalia.oss.vtl.util.SerCollector; public class RankTransformation extends TransformationImpl implements AnalyticTransformation, LeafTransformation { private static final long serialVersionUID = 1L; private static final DataStructureComponent RANK_MEASURE = INTEGERDS.getDefaultVariable().as(Measure.class); - private final static Logger LOGGER = LoggerFactory.getLogger(RankTransformation.class); +// private final static Logger LOGGER = LoggerFactory.getLogger(RankTransformation.class); + + public static final class RankedPartition extends HashMap + { + private static final long serialVersionUID = 1L; + public final Class[] repr; + public RankedPartition(Class[] repr, int size) + { + super(size); + + this.repr = repr; + } + } + + private static final class PartitionToRank extends ArrayList + { + private static final long serialVersionUID = 1L; + } + private final List partitionBy; private final List orderByClause; private final String lineageDescriptor; @@ -95,16 +116,9 @@ public VTLValue eval(TransformationScheme scheme) { DataSet dataset = (DataSet) scheme.resolve(THIS); - Map, Boolean> ordering; - - if (orderByClause.isEmpty()) - ordering = dataset.getMetadata().getIDs().stream().collect(toMapWithValues(c -> TRUE)); - else - { - ordering = new LinkedHashMap<>(); - for (OrderByItem orderByComponent: orderByClause) - ordering.put(dataset.getComponent(orderByComponent.getName()).get(), DESC != orderByComponent.getMethod()); - } + List ordering = new ArrayList<>(orderByClause.size()); + for (OrderByItem orderByComponent: orderByClause) + ordering.add(new SortClause(dataset.getComponent(orderByComponent.getName()).get(), orderByComponent.getMethod())); Set> partitionIDs; if (partitionBy != null) @@ -114,65 +128,64 @@ public VTLValue eval(TransformationScheme scheme) .map(c -> c.asRole(Identifier.class)) .collect(toSet()); else - partitionIDs = dataset.getMetadata().getIDs().stream() - .filter(partitionID -> !ordering.containsKey(partitionID)) - .collect(toSet()); + { + partitionIDs = new HashSet<>(dataset.getMetadata().getIDs()); + for (SortCriterion clause: ordering) + partitionIDs.remove(clause.getComponent()); + } - for (DataStructureComponent orderingComponent: ordering.keySet()) - if (partitionIDs.contains(orderingComponent)) - throw new VTLException("Cannot order by " + orderingComponent.getVariable().getName() + " because the component is used in partition by " + partitionBy); + List> ids = new ArrayList<>(dataset.getMetadata().getIDs()); + WindowClause window = new WindowClauseImpl(partitionIDs, ordering, DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING); + DataStructureComponent measure = dataset.getMetadata().getMeasures().iterator().next(); + Class[] repr = ids.stream().map(id -> id.getVariable().getDomain().getRepresentation()) + .collect(toArray(new Class[ids.size()])); + SerCollector, Map> collector = + collectingAndThen(toList(PartitionToRank::new), l -> rankPartition(l, ids, repr)); - // The ordering of the dataset - final Comparator comparator = comparator(ordering); - - // sort each partition with the comparator and then perform the analytic computation on each partition - return new FunctionDataSet<>((DataSetMetadata) getMetadata(scheme), ds -> ds.streamByKeys( - partitionIDs, toList(), (partition, keyValues) -> rankPartition(scheme, partition, keyValues, comparator) - ).collect(concatenating(Utils.ORDERED)), dataset); + return dataset.analytic(dp -> LineageNode.of(lineageDescriptor, dp.getLineage()), measure, + INTEGERDS.getDefaultVariable().as(Measure.class), window, identity(), collector, (r, dp) -> finisher(r, ids, repr, dp)) + .membership("integer_var"); } - private Stream rankPartition(TransformationScheme scheme, List partition, Map, ScalarValue> keyValues, Comparator comparator) + private Collection> finisher(Map ranks, List> ids, Class[] repr, DataPoint dp) { - LOGGER.debug("Analytic invocation on partition {}", keyValues); - partition.sort(comparator); - long rank = 1, position = 1; - Map, ScalarValue> oldValues, measureValues = emptyMap(); - List result = new ArrayList<>(partition.size()); + Serializable[] tuple = new Serializable[ids.size()]; + for (int i = 0; i < tuple.length; i++) + tuple[i] = dp.get(ids.get(i)).get(); + return singleton(IntegerValue.of(ranks.get(new GenericTuple(tuple)))); + } + + private RankedPartition rankPartition(List partition, List> ids, + Class[] repr) + { + long rank = 1, position = 1; + Map, ScalarValue> oldValues, orderByValues = emptyMap(); + // Workaround to allow creating a spark encoder to correctly process the tuple + RankedPartition ranks = new RankedPartition(repr, partition.size()); + + // Cannot use streams (perhaps gatherers when java version becomes high enough) for (DataPoint dp: partition) { - oldValues = measureValues; - measureValues = dp.getValuesByNames(orderByClause.stream().map(OrderByItem::getName).collect(toSet())); + oldValues = orderByValues; + orderByValues = dp.getValuesByNames(orderByClause.stream().map(OrderByItem::getName).collect(toSet())); - ScalarValue rankResult; - if (measureValues.equals(oldValues)) - rankResult = IntegerValue.of(rank); + long rankResult; + if (orderByValues.equals(oldValues)) + rankResult = rank; else - // update rank if the new measures are different from the old - rankResult = IntegerValue.of(rank = position); + // update rank if the new measures in the order by clause are different from the old + rankResult = rank = position; position++; - - result.add(new DataPointBuilder(dp.getValues(Identifier.class)) - .add(RANK_MEASURE, rankResult) - .build(LineageNode.of(lineageDescriptor, dp.getLineage()), (DataSetMetadata) getMetadata(scheme))); + + Serializable[] tuple = new Serializable[ids.size()]; + for (int i = 0; i < tuple.length; i++) + tuple[i] = dp.get(ids.get(i)).get(); + + ranks.put(new GenericTuple(tuple), rankResult); } - return result.stream(); - } - - - private static Comparator comparator(Map, Boolean> sortMethods) - { - return (dp1, dp2) -> { - for (Entry, Boolean> sortID: sortMethods.entrySet()) - { - int res = dp1.get(sortID.getKey()).compareTo(dp2.get(sortID.getKey())); - if (res != 0) - return sortID.getValue() ? res : -res; - } - - return 0; - }; + return ranks; } protected VTLValueMetadata computeMetadata(TransformationScheme scheme) diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RatioToReportTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RatioToReportTransformation.java index 6be207f25..eb31c8299 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RatioToReportTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/RatioToReportTransformation.java @@ -23,7 +23,6 @@ import static it.bancaditalia.oss.vtl.impl.transform.util.WindowCriterionImpl.DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING; import static it.bancaditalia.oss.vtl.impl.types.data.NumberValueImpl.createNumberValue; import static it.bancaditalia.oss.vtl.impl.types.operators.AnalyticOperator.SUM; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static java.util.Collections.emptyList; import static java.util.Collections.singleton; @@ -32,7 +31,6 @@ import java.util.Collection; import java.util.List; -import java.util.Map; import java.util.Set; import org.slf4j.Logger; @@ -46,9 +44,11 @@ import it.bancaditalia.oss.vtl.impl.types.lineage.LineageNode; import it.bancaditalia.oss.vtl.model.data.Component.Identifier; import it.bancaditalia.oss.vtl.model.data.Component.Measure; +import it.bancaditalia.oss.vtl.model.data.DataPoint; import it.bancaditalia.oss.vtl.model.data.DataSet; import it.bancaditalia.oss.vtl.model.data.DataSetMetadata; import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; +import it.bancaditalia.oss.vtl.model.data.Lineage; import it.bancaditalia.oss.vtl.model.data.NumberValue; import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.data.ScalarValueMetadata; @@ -60,7 +60,7 @@ import it.bancaditalia.oss.vtl.model.transform.analytic.WindowClause; import it.bancaditalia.oss.vtl.session.MetadataRepository; import it.bancaditalia.oss.vtl.util.SerBiFunction; -import it.bancaditalia.oss.vtl.util.SerCollector; +import it.bancaditalia.oss.vtl.util.SerFunction; public class RatioToReportTransformation extends UnaryTransformation implements AnalyticTransformation { @@ -87,22 +87,23 @@ protected VTLValue evalOnScalar(MetadataRepository repo, ScalarValue protected VTLValue evalOnDataset(MetadataRepository repo, DataSet dataset, VTLValueMetadata metadata) { Set> partitionIDs = dataset.getMetadata().matchIdComponents(partitionBy, "partition by"); + Set> measures = dataset.getMetadata().getMeasures(); + SerFunction lineageOp = dp -> LineageNode.of(this, dp.getLineage()); WindowClause clause = new WindowClauseImpl(partitionIDs, null, DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING); + SerBiFunction, ScalarValue, Collection>> finisher = (newV, oldV) -> { + if (newV instanceof NullValue || oldV instanceof NullValue) + return singleton(newV instanceof NullValue ? newV : oldV); + else if (newV instanceof NumberValue && oldV instanceof NumberValue) + return singleton(createNumberValue(((NumberValue) oldV).get().doubleValue() / ((NumberValue) newV).get().doubleValue())); + else + throw new UnsupportedOperationException(); + }; + + for (DataStructureComponent measure: measures) + dataset = dataset.analytic(lineageOp, measure, measure, clause, null, SUM.getReducer(measure), finisher); - Map, SerCollector, ?, ScalarValue>> collectors = dataset.getMetadata().getMeasures().stream() - .collect(toMapWithValues(measure -> SUM.getReducer())); - Map, SerBiFunction, ScalarValue, Collection>>> finishers = dataset.getMetadata().getMeasures().stream() - .collect(toMapWithValues(measure -> (newV, oldV) -> { - if (newV instanceof NullValue || oldV instanceof NullValue) - return singleton(newV instanceof NullValue ? newV : oldV); - else if (newV instanceof NumberValue && oldV instanceof NumberValue) - return singleton(createNumberValue(((NumberValue) oldV).get().doubleValue() / ((NumberValue) newV).get().doubleValue())); - else - throw new UnsupportedOperationException(); - })); - - return dataset.analytic(dp -> LineageNode.of(this, dp.getLineage()), dataset.getMetadata().getMeasures(), clause, collectors, finishers); + return dataset; } @Override diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/SimpleAnalyticTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/SimpleAnalyticTransformation.java index edee739c6..823c04a86 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/SimpleAnalyticTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/aggregation/SimpleAnalyticTransformation.java @@ -20,12 +20,12 @@ package it.bancaditalia.oss.vtl.impl.transform.aggregation; import static it.bancaditalia.oss.vtl.impl.transform.scope.ThisScope.THIS; +import static it.bancaditalia.oss.vtl.impl.transform.util.WindowCriterionImpl.DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING; +import static it.bancaditalia.oss.vtl.impl.transform.util.WindowCriterionImpl.RANGE_UNBOUNDED_PRECEDING_TO_CURRENT; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.INTEGERDS; import static it.bancaditalia.oss.vtl.impl.types.operators.AnalyticOperator.COUNT; -import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.ASC; import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.DESC; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; import static it.bancaditalia.oss.vtl.util.SerCollectors.toSet; import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static it.bancaditalia.oss.vtl.util.Utils.keepingValue; @@ -36,7 +36,6 @@ import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; @@ -52,7 +51,6 @@ import it.bancaditalia.oss.vtl.impl.types.operators.AnalyticOperator; import it.bancaditalia.oss.vtl.model.data.Component.Identifier; import it.bancaditalia.oss.vtl.model.data.Component.Measure; -import it.bancaditalia.oss.vtl.model.data.Component.NonIdentifier; import it.bancaditalia.oss.vtl.model.data.DataSet; import it.bancaditalia.oss.vtl.model.data.DataSetMetadata; import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; @@ -67,7 +65,6 @@ import it.bancaditalia.oss.vtl.model.transform.analytic.WindowClause; import it.bancaditalia.oss.vtl.model.transform.analytic.WindowCriterion; import it.bancaditalia.oss.vtl.session.MetadataRepository; -import it.bancaditalia.oss.vtl.util.SerCollector; public class SimpleAnalyticTransformation extends UnaryTransformation implements AnalyticTransformation { @@ -101,7 +98,7 @@ protected VTLValue evalOnDataset(MetadataRepository repo, DataSet dataset, VTLVa List ordering; if (orderByClause.isEmpty()) ordering = dataset.getMetadata().getIDs().stream() - .map(c -> new SortClause(c, ASC)) + .map(SortClause::new) .collect(toList()); else ordering = orderByClause.stream() @@ -118,12 +115,14 @@ protected VTLValue evalOnDataset(MetadataRepository repo, DataSet dataset, VTLVa if (partitionIDs.contains(orderingComponent)) throw new VTLException("Cannot order by " + orderingComponent.getVariable().getName() + " because the component is used in partition by " + partitionBy); - WindowClause clause = new WindowClauseImpl(partitionIDs, ordering, windowCriterion); - Set> nonIDs = dataset.getMetadata().getComponents(NonIdentifier.class); - Map, SerCollector, ?, ScalarValue>> collectors = nonIDs.stream() - .collect(toMapWithValues(k -> aggregation.getReducer())); + WindowCriterion criterion = windowCriterion != null ? windowCriterion : orderByClause.isEmpty() + ? RANGE_UNBOUNDED_PRECEDING_TO_CURRENT : DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING; + WindowClause clause = new WindowClauseImpl(partitionIDs, ordering, criterion); - return dataset.analytic(dp -> LineageNode.of(this, dp.getLineage()), nonIDs, clause, collectors); + for (DataStructureComponent measure: dataset.getMetadata().getMeasures()) + dataset = dataset.analytic(dp -> LineageNode.of(this, dp.getLineage()), measure, clause, aggregation.getReducer(measure)); + + return dataset; } @Override diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/bool/ComparisonTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/bool/ComparisonTransformation.java index bb2161dc1..76deb4ec2 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/bool/ComparisonTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/bool/ComparisonTransformation.java @@ -23,8 +23,6 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.BOOLEANDS; import static java.util.Collections.singletonMap; -import java.util.function.Function; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,6 +47,7 @@ import it.bancaditalia.oss.vtl.model.domain.ValueDomainSubset; import it.bancaditalia.oss.vtl.model.transform.Transformation; import it.bancaditalia.oss.vtl.util.SerBinaryOperator; +import it.bancaditalia.oss.vtl.util.SerFunction; public class ComparisonTransformation extends BinaryTransformation { @@ -96,7 +95,7 @@ protected DataSet evalDatasetWithScalar(VTLValueMetadata metadata, boolean datas else castedScalar = scalar; - Function> extractor; + SerFunction> extractor; if (castToLeft) if (datasetIsLeftOp) extractor = dp -> operator.apply(dp.get(measure), castedScalar); diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/dataset/FlowStockTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/dataset/FlowStockTransformation.java index e64aa02fb..f679f24ef 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/dataset/FlowStockTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/dataset/FlowStockTransformation.java @@ -26,6 +26,7 @@ import static it.bancaditalia.oss.vtl.util.SerCollectors.toConcurrentMap; import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; import static it.bancaditalia.oss.vtl.util.SerCollectors.toSet; +import static java.util.Collections.emptyMap; import java.util.HashSet; import java.util.Map; @@ -101,7 +102,7 @@ public Stream apply(DataSet ds, DataStructureComponent> ids = new HashSet<>(ds.getMetadata().getIDs()); ids.remove(timeid); - return ds.streamByKeys(ids, toConcurrentMap(i -> i, i -> true, (a, b) -> a, () -> new ConcurrentSkipListMap<>(DataPoint.compareBy(timeid)))) + return ds.streamByKeys(ids, emptyMap(), toConcurrentMap(i -> i, i -> true, (a, b) -> a, () -> new ConcurrentSkipListMap<>(DataPoint.compareBy(timeid))), (a, b) -> a) .map(Map::keySet) .map(group -> { Map, ScalarValue> acc = new ConcurrentHashMap<>(); diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/number/NumericUnaryTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/number/NumericUnaryTransformation.java index bfcc72606..1abe2b0f1 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/number/NumericUnaryTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/number/NumericUnaryTransformation.java @@ -130,14 +130,12 @@ protected VTLValue evalOnScalar(MetadataRepository repo, ScalarValue @Override protected VTLValue evalOnDataset(MetadataRepository repo, DataSet dataset, VTLValueMetadata metadata) { - Set> components = dataset.getMetadata().getMeasures(); - return dataset.mapKeepingKeys(dataset.getMetadata(), dp -> LineageNode.of(this, dp.getLineage()), dp -> { - Map, ScalarValue> map = new HashMap<>(dp.getValues(components)); - map.replaceAll((c, v) -> operator.apply(NUMBERDS.cast(v))); - map.putAll(dp.getValues(Attribute.class)); - return map; - }); + Map, ScalarValue> map = new HashMap<>(dp.getValues(Measure.class)); + map.replaceAll((c, v) -> operator.apply(NUMBERDS.cast(v))); + map.putAll(dp.getValues(Attribute.class)); + return map; + }); } @Override diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/ops/CheckHierarchyTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/ops/CheckHierarchyTransformation.java index 95c13e77d..6c133c11a 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/ops/CheckHierarchyTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/ops/CheckHierarchyTransformation.java @@ -40,6 +40,7 @@ import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static java.lang.Double.NaN; +import static java.util.Collections.emptyMap; import static java.util.Objects.requireNonNull; import java.io.Serializable; @@ -149,7 +150,7 @@ public VTLValue eval(TransformationScheme scheme) DataSetMetadata newStructure = (DataSetMetadata) this.getMetadata(scheme); var finisher = new Finisher(allRules, idComp, measure, newStructure); - List results = dataset.streamByKeys(ids, toConcurrentMap(dp -> dp.get(idComp), dp -> dp.get(measure)), finisher::finisher) + List results = dataset.streamByKeys(ids, emptyMap(), toConcurrentMap(dp -> dp.get(idComp), dp -> dp.get(measure)), finisher::finisher) .map(Utils::getStream) .collect(concatenating(false)) .collect(toList()); diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformation.java index a751ccb74..727898201 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformation.java @@ -22,6 +22,7 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.STRING; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.STRINGDS; import static it.bancaditalia.oss.vtl.util.SerCollectors.entriesToMap; +import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static java.util.Collections.singletonMap; import java.util.AbstractMap.SimpleEntry; @@ -33,7 +34,6 @@ import it.bancaditalia.oss.vtl.exceptions.VTLException; import it.bancaditalia.oss.vtl.exceptions.VTLIncompatibleTypesException; import it.bancaditalia.oss.vtl.impl.transform.BinaryTransformation; -import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.data.StringValue; import it.bancaditalia.oss.vtl.impl.types.dataset.DataPointBuilder; import it.bancaditalia.oss.vtl.impl.types.dataset.DataStructureBuilder; @@ -60,9 +60,12 @@ public class ConcatTransformation extends BinaryTransformation { private static final long serialVersionUID = 1L; - private static final SerBinaryOperator> CONCAT = (l, r) -> l instanceof NullValue || r instanceof NullValue - ? NullValue.instance(STRINGDS) - : StringValue.of(l.get().toString() + r.get().toString()); + private static final SerBinaryOperator> CONCAT = ConcatTransformation::concat; + + private static ScalarValue concat(ScalarValue l, ScalarValue r) + { + return StringValue.of(new StringBuilder().append(coalesce(l.get(), "")).append(coalesce(r.get(), "")).toString()); + } public ConcatTransformation(Transformation left, Transformation right) { @@ -72,7 +75,7 @@ public ConcatTransformation(Transformation left, Transformation right) @Override protected ScalarValue evalTwoScalars(VTLValueMetadata metadata, ScalarValue left, ScalarValue right) { - return CONCAT.apply(STRINGDS.cast(left), STRINGDS.cast(right)); + return concat(STRINGDS.cast(left), STRINGDS.cast(right)); } @Override diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ReplaceTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ReplaceTransformation.java index c2f65bd30..f9e206683 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ReplaceTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/string/ReplaceTransformation.java @@ -42,9 +42,11 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.STRINGDS; import static it.bancaditalia.oss.vtl.util.SerCollectors.entriesToMap; import static it.bancaditalia.oss.vtl.util.SerCollectors.toSet; +import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import java.util.AbstractMap.SimpleEntry; import java.util.HashSet; +import java.util.Optional; import java.util.Set; import java.util.regex.Pattern; @@ -55,7 +57,6 @@ import it.bancaditalia.oss.vtl.impl.transform.TransformationImpl; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.data.StringValue; -import it.bancaditalia.oss.vtl.impl.types.domain.EntireStringDomainSubset; import it.bancaditalia.oss.vtl.impl.types.lineage.LineageNode; import it.bancaditalia.oss.vtl.model.data.Component.Measure; import it.bancaditalia.oss.vtl.model.data.DataSet; @@ -77,8 +78,6 @@ public class ReplaceTransformation extends TransformationImpl private final Transformation exprOperand; private final Transformation patternOperand; private final Transformation replaceOperand; - - private transient Pattern storedPattern; public ReplaceTransformation(Transformation expr, Transformation pattern, Transformation replace) { @@ -91,13 +90,10 @@ public ReplaceTransformation(Transformation expr, Transformation pattern, Transf public VTLValue eval(TransformationScheme session) { VTLValue left = exprOperand.eval(session); - ScalarValue replace = STRINGDS.cast((ScalarValue) replaceOperand.eval(session)); - - if (storedPattern == null) - { - ScalarValue pattern = STRINGDS.cast((ScalarValue) patternOperand.eval(session)); - storedPattern = pattern instanceof NullValue ? null : Pattern.compile(STRINGDS.cast(pattern).get().toString()); - } + + String replace = (String) coalesce(STRINGDS.cast((ScalarValue) replaceOperand.eval(session)).get(), ""); + String pattern = (String) coalesce(STRINGDS.cast((ScalarValue) patternOperand.eval(session)).get(), ""); + Pattern storedPattern = Pattern.compile(pattern); if (left instanceof DataSet) { @@ -106,24 +102,31 @@ public VTLValue eval(TransformationScheme session) Set> measures = dataset.getMetadata().getMeasures(); String lineageString = "replace " + storedPattern + " with " + replace; - return dataset.mapKeepingKeys(structure, dp -> LineageNode.of(lineageString, dp.getLineage()), - dp -> measures.stream() - .map(measure -> new SimpleEntry<>(measure, (storedPattern == null || dp.get(measure) instanceof NullValue) - ? STRINGDS.cast(NullValue.instance(STRINGDS)) - : ((StringValue) dp.get(measure)).map(value -> storedPattern.matcher(value).replaceAll(replace.get().toString())) - )).collect(entriesToMap()) + return dataset.mapKeepingKeys(structure, dp -> LineageNode.of(lineageString, dp.getLineage()), dp -> measures.stream() + .map(measure -> new SimpleEntry<>(measure, replaceSingle(replace, storedPattern, dp.get(measure)))) + .collect(entriesToMap()) ); } else { - ScalarValue scalar = (ScalarValue) left; - if (left instanceof NullValue || storedPattern == null) + String scalar = (String) ((ScalarValue) left).get(); + if (scalar == null || storedPattern == null) return NullValue.instance(STRINGDS); - return StringValue.of(storedPattern.matcher(scalar.get().toString()).replaceAll(replace.get().toString())); + return StringValue.of(storedPattern.matcher(scalar).replaceAll(replace)); } } + private ScalarValue replaceSingle(String replace, Pattern storedPattern, ScalarValue scalar) + { + Optional> replaced = Optional.ofNullable(scalar.get()) + .map(Object::toString) + .map(s -> storedPattern.matcher(s).replaceAll(replace)) + .map(StringValue::of); + + return replaced.orElseGet(() -> NullValue.instance(STRINGDS)); + } + @Override protected VTLValueMetadata computeMetadata(TransformationScheme session) { diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformation.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformation.java index 30215847c..8e6e2f9c3 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformation.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformation.java @@ -28,13 +28,17 @@ import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.ASC; import static it.bancaditalia.oss.vtl.model.transform.analytic.WindowCriterion.LimitType.DATAPOINTS; import static it.bancaditalia.oss.vtl.util.ConcatSpliterator.concatenating; +import static it.bancaditalia.oss.vtl.util.SerCollectors.mapping; import static it.bancaditalia.oss.vtl.util.SerCollectors.toCollection; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; import static it.bancaditalia.oss.vtl.util.SerCollectors.toMapWithValues; import static it.bancaditalia.oss.vtl.util.SerPredicate.not; +import static it.bancaditalia.oss.vtl.util.Utils.coalesce; +import static java.time.temporal.ChronoUnit.DAYS; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -98,6 +102,11 @@ public String toString() return name; } } + + private static class ListOfDateValues extends ArrayList> + { + private static final long serialVersionUID = 1L; + } private final FillMode mode; @@ -130,7 +139,7 @@ protected VTLValue evalOnDataset(MetadataRepository repo, DataSet ds, VTLValueMe return new FunctionDataSet<>(structure, dataset -> { String alias = ds instanceof NamedDataSet ? ((NamedDataSet) ds).getAlias() : "Unnamed data set"; LOGGER.debug("Filling time series for {}", alias); - Stream result = dataset.streamByKeys(ids, toCollection(() -> new ConcurrentSkipListSet<>(compareBy(timeID))), + Stream result = dataset.streamByKeys(ids, emptyMap(), toCollection(() -> new ConcurrentSkipListSet<>(compareBy(timeID))), seriesFiller(structure, timeID, nullFiller, min.get(), max.get())) .map(Utils::getStream) .collect(concatenating(Utils.ORDERED)); @@ -141,16 +150,16 @@ protected VTLValue evalOnDataset(MetadataRepository repo, DataSet ds, VTLValueMe else { // a function that fills holes between two dates (old is ignored) - final SerBiFunction>, ScalarValue, Collection>> timeFinisher = (pair, old) -> { - if (pair.size() == 1) - return pair; + final SerBiFunction>, ScalarValue, Collection>> timeFinisher = (pair, old) -> { + if (pair.size() < 2) + return List.of(pair.get(0)); List> result = new ArrayList<>(); // TODO: Cast exception if not date - DateValue end = (DateValue) pair.get(1); + LocalDate end = pair.get(1).get(); // End-exclusive - for (DateValue start = (DateValue) pair.get(0); start.compareTo(end) < 0; start = start.increment(1)) - result.add(start); + for (LocalDate start = pair.get(0).get(); start.compareTo(end) < 0; start = start.plus(1, DAYS)) + result.add(DateValue.of(start)); return result; }; @@ -163,8 +172,9 @@ protected VTLValue evalOnDataset(MetadataRepository repo, DataSet ds, VTLValueMe // Remove all measures and attributes then left-join the time-filled dataset with the old one return ds.mapKeepingKeys(timeStructure, DataPoint::getLineage, dp -> emptyMap()) - .analytic(dp -> LineageNode.of(this, dp.getLineage()), timeID, windowClause, toList(), timeFinisher) - .mappedJoin(structure, ds, (a, b) -> Utils.coalesce(b, fill(a, structure)), true); + .analytic(dp -> LineageNode.of(this, dp.getLineage()), timeID, timeID, windowClause, null, + mapping(v -> (DateValue) v, toList(ListOfDateValues::new)), timeFinisher) + .mappedJoin(structure, ds, (a, b) -> coalesce(b, fill(a, structure)), true); } } diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/SortClause.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/SortClause.java index 9a19891ea..4d4f66562 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/SortClause.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/SortClause.java @@ -34,6 +34,11 @@ public class SortClause implements SortCriterion, Serializable private final DataStructureComponent component; private final SortingMethod method; + public SortClause(DataStructureComponent component) + { + this(component, ASC); + } + public SortClause(DataStructureComponent component, SortingMethod method) { this.component = component; diff --git a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/WindowCriterionImpl.java b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/WindowCriterionImpl.java index 1914d4038..b008e2463 100644 --- a/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/WindowCriterionImpl.java +++ b/vtl-transform/src/main/java/it/bancaditalia/oss/vtl/impl/transform/util/WindowCriterionImpl.java @@ -32,8 +32,8 @@ public class WindowCriterionImpl implements WindowCriterion, Serializable { - public static final WindowCriterion DATAPOINTS_UNBOUNDED_PRECEDING_TO_CURRENT = - new WindowCriterionImpl(DATAPOINTS, UNBOUNDED_PRECEDING, CURRENT_DATA_POINT); + public static final WindowCriterion RANGE_UNBOUNDED_PRECEDING_TO_CURRENT = + new WindowCriterionImpl(RANGE, UNBOUNDED_PRECEDING, CURRENT_DATA_POINT); public static final WindowCriterion DATAPOINTS_UNBOUNDED_PRECEDING_TO_UNBOUNDED_FOLLOWING = new WindowCriterionImpl(DATAPOINTS, UNBOUNDED_PRECEDING, UNBOUNDED_FOLLOWING); diff --git a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformationTest.java b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformationTest.java index e661274cb..c745c5966 100644 --- a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformationTest.java +++ b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/ConcatTransformationTest.java @@ -51,9 +51,9 @@ public static Stream test() // "Filled String", " Leading spaces", "Trailing spaces ", " Leading and trailing ", "\"Quoted\" 'String'", "\t\b \n\r\f" return Stream.of( Arguments.of(SAMPLE13, SAMPLE14, new String[] { "Filled StringA", " Leading spacesC", "Trailing spaces E", " Leading and trailing G", "\"Quoted\" 'String'I", "\t\b \n\r\fK" }), - Arguments.of(SAMPLE13, SAMPLE15, new String[] { "Filled StringK", " Leading spacesC", "Trailing spaces G", null, "\"Quoted\" 'String'A", "\t\b \n\r\fE" }), - Arguments.of(SAMPLE14, SAMPLE15, new String[] { "AK", "CC", "EG", null, "IA", "KE" }) - ); + Arguments.of(SAMPLE13, SAMPLE15, new String[] { "Filled StringK", " Leading spacesC", "Trailing spaces G", " Leading and trailing ", "\"Quoted\" 'String'A", "\t\b \n\r\fE" }), + Arguments.of(SAMPLE14, SAMPLE15, new String[] { "AK", "CC", "EG", "G", "IA", "KE" }) + ); } @ParameterizedTest(name = "{0} || {1}") diff --git a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/SubstrTransformationTest.java b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/SubstrTransformationTest.java index ca4f1da86..5182f73ca 100644 --- a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/SubstrTransformationTest.java +++ b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/string/SubstrTransformationTest.java @@ -53,8 +53,8 @@ public static Stream test() return Stream.of( Arguments.of(SAMPLE13, null, null, new String[] { "Filled String", " Leading spaces", "Trailing spaces ", " Leading and trailing ", "\"Quoted\" 'String'", "\t\b \n\r\f" }), Arguments.of(SAMPLE13, null, 5L, new String[] { "Fille", " L", "Trail", " L", "\"Quot", "\t\b \n\r" }), - Arguments.of(SAMPLE13, 13L, null, new String[] { "g", "spaces", "ces ", "and trailing ", "ring'", "" }), - Arguments.of(SAMPLE13, 13L, 5L, new String[] { "g", "space", "ces ", "and t", "ring'", "" }) + Arguments.of(SAMPLE13, 13L, null, new String[] { "g", "spaces", "ces ", "and trailing ", "ring'", null }), + Arguments.of(SAMPLE13, 13L, 5L, new String[] { "g", "space", "ces ", "and t", "ring'", null }) ); } diff --git a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/testutils/TestUtils.java b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/testutils/TestUtils.java index 4f18ab474..e31d3face 100644 --- a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/testutils/TestUtils.java +++ b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/testutils/TestUtils.java @@ -73,10 +73,7 @@ public static DataSet concat(DataSet... samples) @Override protected Stream streamDataPoints() { - return Arrays.stream(samples) - .map(DataSet::stream) - .reduce(Stream::concat) - .get(); + return Arrays.stream(samples).flatMap(DataSet::stream); } }; } diff --git a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformationTest.java b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformationTest.java index c1eccd183..cb17458e4 100644 --- a/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformationTest.java +++ b/vtl-transform/src/test/java/it/bancaditalia/oss/vtl/impl/transform/time/FillTimeSeriesTransformationTest.java @@ -31,6 +31,7 @@ import static it.bancaditalia.oss.vtl.util.SerCollectors.groupingByConcurrent; import static it.bancaditalia.oss.vtl.util.SerCollectors.maxBy; import static it.bancaditalia.oss.vtl.util.SerCollectors.minBy; +import static it.bancaditalia.oss.vtl.util.SerPredicate.not; import static java.util.stream.Collectors.groupingBy; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -52,6 +53,7 @@ import it.bancaditalia.oss.vtl.impl.transform.testutils.TestUtils; import it.bancaditalia.oss.vtl.impl.transform.time.FillTimeSeriesTransformation.FillMode; import it.bancaditalia.oss.vtl.impl.types.data.DateValue; +import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.model.data.Component.Identifier; import it.bancaditalia.oss.vtl.model.data.DataPoint; import it.bancaditalia.oss.vtl.model.data.DataSet; @@ -91,13 +93,12 @@ public void test(String name, DataSet sample, FillMode mode, int expectedSize) session = TestUtils.mockSession(map); FillTimeSeriesTransformation ftsTransformation = new FillTimeSeriesTransformation(new VarIDOperand("operand"), mode); - DataSet computedResult = (DataSet) ftsTransformation.eval(session); - assertEquals(expectedSize, computedResult.size(), "Dataset size"); - DataStructureComponent time_id = computedResult.getMetadata().getComponent("date_1", Identifier.class, DATEDS).get(); DataStructureComponent string_id = computedResult.getMetadata().getComponent("string_1", Identifier.class, STRINGDS).get(); - + + assertEquals(expectedSize, computedResult.size(), "Number of datapoints"); + if (mode == SINGLE) { Collection> splitResult = computedResult.stream().sequential() @@ -128,8 +129,10 @@ public void test(String name, DataSet sample, FillMode mode, int expectedSize) long nSeries = results.values().stream().mapToLong(Long::longValue).max().getAsLong(); ScalarValue min = Utils.getStream(results.keySet()) + .filter(not(NullValue.class::isInstance)) .collect(collectingAndThen(minBy(ScalarValue::compareTo), Optional::get)); ScalarValue max = Utils.getStream(results.keySet()) + .filter(not(NullValue.class::isInstance)) .collect(collectingAndThen(maxBy(ScalarValue::compareTo), Optional::get)); DateValue curr = (DateValue) min; diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/config/VTLPropertyImpl.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/config/VTLPropertyImpl.java index d63bea617..dd73c3142 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/config/VTLPropertyImpl.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/config/VTLPropertyImpl.java @@ -27,6 +27,7 @@ import java.security.InvalidParameterException; import java.util.Arrays; import java.util.EnumSet; +import java.util.Objects; import it.bancaditalia.oss.vtl.config.VTLProperty; @@ -77,9 +78,9 @@ public String getValue() } @Override - public void setValue(String newValue) + public void setValue(Object newValue) { - value = newValue; + value = Objects.toString(newValue); hasValue = true; } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/DateValue.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/DateValue.java index 863cda83b..8fdbb5c93 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/DateValue.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/DateValue.java @@ -61,7 +61,7 @@ public int compareTo(ScalarValue o) if (o instanceof DateValue) return get().compareTo(((DateValue) o).get()); else - throw new UnsupportedOperationException(); + throw new UnsupportedOperationException("Cannot compare DATE to " + o.getClass()); } @Override diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/StringValue.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/StringValue.java index a491cee99..aa93c0228 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/StringValue.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/data/StringValue.java @@ -35,10 +35,15 @@ public class StringValue, S extends StringDomainSubs public static ScalarValue of(String value) { - return value == null ? NULL_INSTANCE : new StringValue<>(value, STRINGDS); + return value == null || value.isEmpty() ? NULL_INSTANCE : new StringValue<>(value, STRINGDS); } - public StringValue(String value, S domain) + public static > ScalarValue of(String value, S domain) + { + return value == null || value.isEmpty() ? domain.cast(NULL_INSTANCE) : new StringValue<>(value, domain); + } + + protected StringValue(String value, S domain) { super(value, domain); } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AbstractDataSet.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AbstractDataSet.java index 443b0739a..3d180d6c1 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AbstractDataSet.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AbstractDataSet.java @@ -302,21 +302,18 @@ private synchronized void createCache(Set DataSet analytic(SerFunction lineageOp, - Map, ? extends DataStructureComponent> components, - WindowClause clause, - Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers) + public DataSet analytic(SerFunction lineageOp, DataStructureComponent sourceComp, DataStructureComponent destComp, WindowClause clause, + SerFunction extractor, SerCollector collector, SerBiFunction>> finisher) { if (clause.getWindowCriterion() != null && clause.getWindowCriterion().getType() == RANGE) throw new UnsupportedOperationException("Range windows are not implemented in analytic invocation"); DataSetMetadata newStructure = new DataStructureBuilder(getMetadata()) - .removeComponents(components.keySet()) - .addComponents(components.values()) + .removeComponent(sourceComp) + .addComponents(destComp) .build(); - return new AnalyticDataSet<>(this, newStructure, lineageOp, clause, collectors, finishers, components); + return new AnalyticDataSet<>(this, newStructure, lineageOp, clause, sourceComp, destComp, extractor, collector, finisher); } @Override diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AnalyticDataSet.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AnalyticDataSet.java index e298f2a41..b6ff91b73 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AnalyticDataSet.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/AnalyticDataSet.java @@ -23,27 +23,20 @@ import static it.bancaditalia.oss.vtl.model.transform.analytic.SortCriterion.SortingMethod.ASC; import static it.bancaditalia.oss.vtl.util.ConcatSpliterator.concatenating; import static it.bancaditalia.oss.vtl.util.SerCollectors.collectingAndThen; -import static it.bancaditalia.oss.vtl.util.SerCollectors.entriesToMap; import static it.bancaditalia.oss.vtl.util.SerCollectors.groupingByConcurrent; import static it.bancaditalia.oss.vtl.util.SerCollectors.teeing; -import static it.bancaditalia.oss.vtl.util.SerCollectors.toConcurrentSet; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; import static it.bancaditalia.oss.vtl.util.Utils.ORDERED; +import static it.bancaditalia.oss.vtl.util.Utils.coalesce; import static it.bancaditalia.oss.vtl.util.Utils.splitting; -import static it.bancaditalia.oss.vtl.util.Utils.toEntryWithValue; import static java.lang.Math.max; import static java.lang.Math.min; -import static java.util.Collections.singletonMap; -import static java.util.stream.Collector.Characteristics.UNORDERED; import java.lang.ref.SoftReference; import java.util.AbstractMap.SimpleEntry; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -72,7 +65,7 @@ import it.bancaditalia.oss.vtl.util.SerFunction; import it.bancaditalia.oss.vtl.util.Utils; -public final class AnalyticDataSet extends AbstractDataSet +public final class AnalyticDataSet extends AbstractDataSet { private static final long serialVersionUID = 1L; private static final Logger LOGGER = LoggerFactory.getLogger(AnalyticDataSet.class); @@ -82,28 +75,33 @@ public final class AnalyticDataSet extends AbstractDataSet private final DataSet source; private final Set> partitionIds; private final Comparator orderBy; - private final Map, SerCollector, ?, TT>> collectors; - private final Map, SerBiFunction, Collection>>> finishers; - private final Map, ? extends DataStructureComponent> components; private final int inf; private final int sup; private final SerFunction lineageOp; - + private final DataStructureComponent srcComponent; + private final DataStructureComponent destComponent; + private final SerFunction extractor; + private final SerCollector collector; + private final SerBiFunction>> finisher; + private final transient WeakHashMap>> cache; - public AnalyticDataSet(DataSet source, DataSetMetadata newStructure, SerFunction lineageOp, WindowClause clause, - Map, SerCollector, ?, TT>> collectors, - Map, SerBiFunction, Collection>>> finishers, - Map, ? extends DataStructureComponent> components) + public AnalyticDataSet(DataSet source, DataSetMetadata structure, SerFunction lineageOp, WindowClause clause, + DataStructureComponent srcComponent, DataStructureComponent destComponent, + SerFunction extractor, + SerCollector collector, + SerBiFunction>> finisher) { - super(newStructure); + super(structure); this.source = source; this.lineageOp = lineageOp; + this.srcComponent = srcComponent; + this.destComponent = destComponent; + this.extractor = coalesce(extractor, dp -> (T) dp.get(srcComponent)); + this.collector = collector; + this.finisher = coalesce(finisher, (a, b) -> Set.of((ScalarValue) a)); this.partitionIds = clause.getPartitioningIds(); - this.collectors = collectors; - this.finishers = finishers; - this.components = components; this.orderBy = (dp1, dp2) -> { for (SortCriterion criterion: clause.getSortCriteria()) @@ -130,7 +128,7 @@ public AnalyticDataSet(DataSet source, DataSetMetadata newStructure, SerFunction } this.cache = CACHES.computeIfAbsent(new SimpleEntry<>(partitionIds, clause.getSortCriteria()), c -> { - LOGGER.info("Creating cache for partitioning {}@{} with clause #{}", source.getClass().getSimpleName(), source.hashCode(), clause.hashCode()); + LOGGER.info("Creating cache for partitioning {}{} with clause #{}", source.getClass().getSimpleName(), source.getMetadata(), clause.hashCode()); return new WeakHashMap<>(); }); } @@ -201,107 +199,54 @@ private Stream applyToPartition(DataPoint[] partition) return indexes.mapToObj(index -> applyToWindow(partition, index)) .map(splitting(AnalyticDataSet::explode)) .collect(concatenating(ORDERED)) - .map(splitting((values, dp) -> new DataPointBuilder(dp) - .delete(components.keySet()) - .addAll(values) + .map(splitting((value, dp) -> new DataPointBuilder(dp) + .delete(srcComponent) + .add(destComponent, value) .build(lineageOp.apply(dp), getMetadata()))); } // Explode the collections resulting from the application of the window function to single components - private static Stream, ScalarValue>, DataPoint>> explode( - Map, Collection>> colls, DataPoint original) + private static Stream, DataPoint>> explode( + Collection> collected, DataPoint original) { // Shortcut when analytic mapping is 1:1 - if (Utils.getStream(colls.values()).allMatch(coll -> coll.size() == 1)) - { - return Stream.of(new SimpleEntry<>(Utils.getStream(colls) - .map(Utils.keepingKey(coll -> coll.iterator().next())) - .collect(entriesToMap()), original)); - } - - final Stream, Collection>>> stream = Utils.getStream(colls); - Set, ScalarValue>> collected = stream.collect( - SerCollector.of(() -> (Set, ScalarValue>>) new HashSet, ScalarValue>>(), - (acc, cEntry) -> { - DataStructureComponent comp = cEntry.getKey(); - if (!acc.isEmpty()) - { - Set, ScalarValue>> accBefore = new HashSet<>(acc); - acc.clear(); - for (ScalarValue cVal: cEntry.getValue()) - accBefore.forEach(map -> { - map = new HashMap<>(map); - map.put(cEntry.getKey(), cVal); - acc.add(map); - }); - } - else - acc.addAll(cEntry.getValue().stream() - ., ScalarValue>>map(cVal -> singletonMap(comp, cVal)) - .collect(toConcurrentSet())); - }, (accLeft, accRight) -> { - if (accLeft.isEmpty() && accRight.isEmpty()) - return accLeft; - else if (accLeft.isEmpty()) - return accRight; - else if (accRight.isEmpty()) - return accLeft; - - // Merge each of the maps on the left with each of the maps on the right - return Utils.getStream(accLeft) - .map(mapLeft -> Utils.getStream(accRight) - .map(mapRight -> { - HashMap, ScalarValue> result = new HashMap<>(mapLeft); - result.putAll(mapRight); - return result; - }) - ).collect(concatenating(ORDERED)) - .collect(toConcurrentSet()); - }, EnumSet.of(UNORDERED))); - - if (LOGGER.isTraceEnabled()) - LOGGER.trace("Analytic produced {} for datapoint {}:", collected, original); + if (collected.size() == 1) + return Stream.of(new SimpleEntry<>(collected.iterator().next(), original)); return Utils.getStream(collected) - .map(toEntryWithValue(map -> original)); + .map(v -> new SimpleEntry<>(v, original)); } - private Entry, Collection>>, DataPoint> applyToWindow(DataPoint[] partition, int index) + private Entry>, DataPoint> applyToWindow(DataPoint[] partition, int index) { int safeInf = max(0, safeSum(index, inf)); int safeSup = 1 + min(partition.length - 1, safeSum(index, sup)); - Map, Stream>> windows = new HashMap<>(); - for (DataStructureComponent component: components.keySet()) - { - Stream> stream = safeInf < safeSup ? Arrays.stream(partition, safeInf, safeSup).map(dp -> dp.get(component)) : Stream.empty(); - if (!Utils.SEQUENTIAL) - stream = stream.parallel(); - windows.put(component, stream); - } + Stream window = safeInf < safeSup ? Arrays.stream(partition, safeInf, safeSup) : Stream.empty(); + if (!Utils.SEQUENTIAL) + window = window.parallel(); LOGGER.trace("\tAnalysis over {} datapoints for datapoint {}", safeSup - safeInf, partition[index]); + var processed = processSingleComponent(partition[index], extractor, collector, finisher, destComponent, window); - final Map, Collection>> atIndex = Utils.getStream(components) - .map(splitting((oldC, newC) -> { - // get the array slice containing all the datapoints in current window - Stream> window = windows.get(oldC); - - // Collector to compute the invocation over current range for the specified component - SerCollector, ?, Collection>> collector = collectingAndThen(collectors.get(oldC), - v -> finishers.get(oldC).apply(v, partition[index].get(oldC))); - - if (LOGGER.isTraceEnabled()) - collector = teeing(toList(), collector, (source, result) -> { - LOGGER.trace("Result on component {} with values {} yield {}", newC, source, result); - return result; - }); - - // Pair the result with the new measure - return new SimpleEntry<>(newC, window.collect(collector)); - })).collect(entriesToMap()); + return new SimpleEntry<>(processed, partition[index]); + } + + private Collection> processSingleComponent(DataPoint dp, SerFunction extractor, + SerCollector collector, SerBiFunction>> finisher, DataStructureComponent newC, Stream window) + { + // Collector to compute the invocation over current range for the specified component + T extracted = extractor.apply(dp); + SerCollector>> withFinisher = collectingAndThen(collector, v -> finisher.apply(v, extracted)); - return new SimpleEntry<>(atIndex, partition[index]); + if (LOGGER.isTraceEnabled()) + withFinisher = teeing(toList(), withFinisher, (source, result) -> { + LOGGER.trace("Result on component {} with values {} yield {}", newC, source, result); + return result; + }); + + // Pair the result with the new measure + return window.map(extractor).collect(withFinisher); } protected static int safeInc(int a) @@ -310,7 +255,7 @@ protected static int safeInc(int a) } /* - * Detects overf)lows in sum and caps it to Integer.MAX_VALUE + * Detects overflows in sum and caps it to Integer.MAX_VALUE */ protected static int safeSum(int x, int y) { diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/DataStructureBuilder.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/DataStructureBuilder.java index 4e404d057..e3f34ce8a 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/DataStructureBuilder.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/dataset/DataStructureBuilder.java @@ -160,6 +160,10 @@ private DataStructureImpl(Set> components) this.byRole = components.stream() .collect(groupingBy(DataStructureComponent::getRole, DataStructureBuilder::createEmptyStructure, toSet())); this.byRole.get(Attribute.class).addAll(byRole.get(ViralAttribute.class)); + + int totalSize = byRole.values().stream().mapToInt(Collection::size).sum(); + if (totalSize != components.size()) + throw new IllegalStateException(totalSize + " != " + components.size()); } @Override diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/CriterionDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/CriterionDomainSubset.java index 894dcb94a..454dd4f6a 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/CriterionDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/CriterionDomainSubset.java @@ -19,6 +19,8 @@ */ package it.bancaditalia.oss.vtl.impl.types.domain; +import java.io.Serializable; + import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.domain.DescribedDomainSubset; @@ -115,4 +117,10 @@ else if (!parent.equals(other.parent)) return false; return true; } + + @Override + public Class getRepresentation() + { + return parent.getRepresentation(); + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireBooleanDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireBooleanDomainSubset.java index 67d2b33be..d0fd3fece 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireBooleanDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireBooleanDomainSubset.java @@ -21,6 +21,8 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.BOOLEANDS; +import java.io.Serializable; + import it.bancaditalia.oss.vtl.exceptions.VTLCastException; import it.bancaditalia.oss.vtl.impl.types.data.BooleanValue; import it.bancaditalia.oss.vtl.impl.types.data.IntegerValue; @@ -132,4 +134,11 @@ public Variable getDefaultVariable() { return new BooleanVariable("bool_var"); } + + + @Override + public Class getRepresentation() + { + return Boolean.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDateDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDateDomainSubset.java index e3e5119ee..c5d125a0a 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDateDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDateDomainSubset.java @@ -22,6 +22,7 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.DATEDS; import java.io.Serializable; +import java.time.LocalDate; import it.bancaditalia.oss.vtl.impl.types.data.DateValue; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; @@ -73,4 +74,10 @@ public static EntireDateDomainSubset getInstance() { return INSTANCE; } + + @Override + public Class getRepresentation() + { + return LocalDate.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDurationDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDurationDomainSubset.java index 6c764af78..a53f32e35 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDurationDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireDurationDomainSubset.java @@ -82,4 +82,11 @@ public boolean equals(Object obj) { return obj != null && obj.getClass() == getClass(); } + + @Override + public Class getRepresentation() + { + // TODO + throw new UnsupportedOperationException(); + } } \ No newline at end of file diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireIntegerDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireIntegerDomainSubset.java index f40eecb0f..926eef679 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireIntegerDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireIntegerDomainSubset.java @@ -75,4 +75,10 @@ else if (value instanceof IntegerValue) else throw new VTLCastException(this, value); } + + @Override + public Class getRepresentation() + { + return Long.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireNumberDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireNumberDomainSubset.java index 9c3c13c47..91aa324dc 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireNumberDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireNumberDomainSubset.java @@ -19,9 +19,11 @@ */ package it.bancaditalia.oss.vtl.impl.types.domain; +import static it.bancaditalia.oss.vtl.config.VTLGeneralProperties.isUseBigDecimal; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.NUMBERDS; import java.io.Serializable; +import java.math.BigDecimal; import it.bancaditalia.oss.vtl.exceptions.VTLCastException; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; @@ -68,4 +70,10 @@ else if (value instanceof NumberValue) else throw new VTLCastException(this, value); } + + @Override + public Class getRepresentation() + { + return isUseBigDecimal() ? BigDecimal.class : Double.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireStringDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireStringDomainSubset.java index 495255ec7..13f7e0a40 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireStringDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireStringDomainSubset.java @@ -66,4 +66,10 @@ else if (isAssignableFrom(value.getDomain())) else throw new VTLCastException(this, value); } + + @Override + public Class getRepresentation() + { + return String.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimeDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimeDomainSubset.java index fd76895ce..e4c6d6923 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimeDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimeDomainSubset.java @@ -72,4 +72,11 @@ public boolean isComparableWith(ValueDomain other) { return other instanceof TimeDomain; } + + @Override + public Class getRepresentation() + { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimePeriodDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimePeriodDomainSubset.java index 61abf8e2f..c329fd9cf 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimePeriodDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/EntireTimePeriodDomainSubset.java @@ -19,6 +19,8 @@ */ package it.bancaditalia.oss.vtl.impl.types.domain; +import java.io.Serializable; + import it.bancaditalia.oss.vtl.exceptions.VTLCastException; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.data.TimePeriodValue; @@ -70,4 +72,11 @@ public String toString() { return "time_period"; } + + @Override + public Class getRepresentation() + { + // TODO Auto-generated method stub + throw new UnsupportedOperationException(); + } } \ No newline at end of file diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NonNullDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NonNullDomainSubset.java index 1a00c4d4b..6952dd813 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NonNullDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NonNullDomainSubset.java @@ -19,6 +19,8 @@ */ package it.bancaditalia.oss.vtl.impl.types.domain; +import java.io.Serializable; + import it.bancaditalia.oss.vtl.exceptions.VTLCastException; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.model.data.ScalarValue; @@ -107,4 +109,10 @@ else if (!subsetWithNull.equals(other.subsetWithNull)) return false; return true; } + + @Override + public Class getRepresentation() + { + return subsetWithNull.getRepresentation(); + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NullDomain.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NullDomain.java index b48dc0126..7ae3348a3 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NullDomain.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/NullDomain.java @@ -19,6 +19,8 @@ */ package it.bancaditalia.oss.vtl.impl.types.domain; +import java.io.Serializable; + import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.data.Variable; import it.bancaditalia.oss.vtl.model.domain.ValueDomain; @@ -74,4 +76,10 @@ public Variable getDefaultVariable() { throw new UnsupportedOperationException(); } + + @Override + public Class getRepresentation() + { + throw new UnsupportedOperationException(); + } } \ No newline at end of file diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/RegExpDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/RegExpDomainSubset.java index 14376bbb9..7da2a9069 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/RegExpDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/RegExpDomainSubset.java @@ -80,7 +80,7 @@ public String getName() String str = (String) value.get(); if (regexp.test(str)) - return new StringValue<>(str, this); + return StringValue.of(str, this); else throw new VTLCastException(this, value); } @@ -126,4 +126,10 @@ public Variable getDefaultVariable() { return new DefaultVariable<>(this); } + + @Override + public Class getRepresentation() + { + return parent.getRepresentation(); + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StringCodeList.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StringCodeList.java index d2100b223..702935305 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StringCodeList.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StringCodeList.java @@ -179,4 +179,11 @@ public Variable getDefaultVariable() { return new DefaultVariable<>(this); } + + + @Override + public Class getRepresentation() + { + return String.class; + } } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StrlenDomainSubset.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StrlenDomainSubset.java index 1c9219d5c..38298dfb0 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StrlenDomainSubset.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/domain/StrlenDomainSubset.java @@ -58,7 +58,7 @@ public boolean isAssignableFrom(ValueDomain other) protected ScalarValue, StringDomain> castCasted(StringValue casted) { if (test(casted)) - return new StringValue<>(casted.get(), this); + return StringValue.of(casted.get(), this); else throw new VTLCastException(this, casted); } diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/AnalyticOperator.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/AnalyticOperator.java index cd8151d34..8c8bb0095 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/AnalyticOperator.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/AnalyticOperator.java @@ -19,6 +19,7 @@ */ package it.bancaditalia.oss.vtl.impl.types.operators; +import static it.bancaditalia.oss.vtl.impl.types.data.NumberValueImpl.createNumberValue; import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.NULLDS; import static it.bancaditalia.oss.vtl.util.SerCollectors.averagingDouble; import static it.bancaditalia.oss.vtl.util.SerCollectors.collectingAndThen; @@ -27,16 +28,21 @@ import static it.bancaditalia.oss.vtl.util.SerCollectors.mapping; import static it.bancaditalia.oss.vtl.util.SerCollectors.maxBy; import static it.bancaditalia.oss.vtl.util.SerCollectors.minBy; -import static it.bancaditalia.oss.vtl.util.SerCollectors.reducing; import static it.bancaditalia.oss.vtl.util.SerCollectors.summingDouble; import static it.bancaditalia.oss.vtl.util.SerCollectors.toList; +import static java.util.stream.Collector.Characteristics.CONCURRENT; import static java.util.stream.Collector.Characteristics.UNORDERED; +import java.io.Serializable; +import java.math.BigDecimal; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import it.bancaditalia.oss.vtl.config.VTLGeneralProperties; import it.bancaditalia.oss.vtl.impl.types.data.DoubleValue; import it.bancaditalia.oss.vtl.impl.types.data.IntegerValue; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; @@ -46,14 +52,21 @@ import it.bancaditalia.oss.vtl.model.data.DataStructureComponent; import it.bancaditalia.oss.vtl.model.data.NumberValue; import it.bancaditalia.oss.vtl.model.data.ScalarValue; +import it.bancaditalia.oss.vtl.model.domain.BooleanDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.DateDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.IntegerDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.NumberDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.StringDomainSubset; +import it.bancaditalia.oss.vtl.model.domain.ValueDomainSubset; import it.bancaditalia.oss.vtl.util.SerBiFunction; import it.bancaditalia.oss.vtl.util.SerCollector; +import it.bancaditalia.oss.vtl.util.SerFunction; public enum AnalyticOperator { COUNT("count", (dp, m) -> null, collectingAndThen(counting(), IntegerValue::of)), - SUM("sum", collectingAndThen(filtering(v -> !(v instanceof NullValue), summingDouble(v -> ((NumberValue)v).get().doubleValue())), DoubleValue::of)), - AVG("avg", collectingAndThen(filtering(v -> !(v instanceof NullValue), averagingDouble(v -> ((NumberValue)v).get().doubleValue())), DoubleValue::of)), + SUM("sum", collectingAndThen(filtering(v -> v != null && !(v instanceof NullValue), summingDouble(v -> ((NumberValue)v).get().doubleValue())), DoubleValue::of)), + AVG("avg", collectingAndThen(filtering(v -> v != null && !(v instanceof NullValue), averagingDouble(v -> ((NumberValue)v).get().doubleValue())), DoubleValue::of)), MEDIAN("median", collectingAndThen(mapping(NumberValue.class::cast, mapping(NumberValue::get, mapping(Number.class::cast, mapping(Number::doubleValue, toList())))), l -> { List c = new ArrayList<>(l); @@ -61,8 +74,8 @@ public enum AnalyticOperator int s = c.size(); return NumberValueImpl.createNumberValue(s % 2 == 0 ? c.get(s / 2) : (c.get(s /2) + c.get(s / 2 + 1)) / 2); })), - MIN("min", collectingAndThen(filtering(v -> !(v instanceof NullValue), minBy(ScalarValue::compareTo)), v -> v.orElse(NullValue.instance(NULLDS)))), - MAX("max", collectingAndThen(filtering(v -> !(v instanceof NullValue), maxBy(ScalarValue::compareTo)), v -> v.orElse(NullValue.instance(NULLDS)))), + MIN("min", collectingAndThen(filtering(v -> v != null && !(v instanceof NullValue), minBy(ScalarValue::compareTo)), v -> v.orElse(NullValue.instance(NULLDS)))), + MAX("max", collectingAndThen(filtering(v -> v != null && !(v instanceof NullValue), maxBy(ScalarValue::compareTo)), v -> v.orElse(NullValue.instance(NULLDS)))), // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance VAR_POP("var_pop", collectingAndThen(mapping(v -> ((NumberValue)v).get().doubleValue(), SerCollector.of( () -> new double[3], @@ -99,10 +112,10 @@ public enum AnalyticOperator return acuA; }, acu -> acu[2] / (acu[0] + 1.0), EnumSet.of(UNORDERED))), NumberValueImpl::createNumberValue)), - STDDEV_POP("stddev_pop", collectingAndThen(VAR_POP.getReducer(), dv -> NumberValueImpl.createNumberValue(Math.sqrt((Double) dv.get())))), - STDDEV_SAMP("stddev_var", collectingAndThen(VAR_SAMP.getReducer(), dv -> NumberValueImpl.createNumberValue(Math.sqrt((Double) dv.get())))), - FIRST_VALUE("first_value", collectingAndThen(reducing((a, b) -> a), o -> o.orElse(NullValue.instance(NULLDS)))), - LAST_VALUE("last_value", collectingAndThen(reducing((a, b) -> b), o -> o.orElse(NullValue.instance(NULLDS)))); + STDDEV_POP("stddev_pop", collectingAndThen(VAR_POP.reducer, dv -> createNumberValue(Math.sqrt((Double) dv.get())))), + STDDEV_SAMP("stddev_var", collectingAndThen(VAR_SAMP.reducer, dv -> createNumberValue(Math.sqrt((Double) dv.get())))), + FIRST_VALUE("first_value", null), + LAST_VALUE("last_value", null); private final SerCollector, ?, ScalarValue> reducer; private final SerBiFunction, ScalarValue> extractor; @@ -122,14 +135,37 @@ private AnalyticOperator(String name, this.reducer = reducer; } - public SerCollector, ?, ScalarValue> getReducer() + public SerCollector, ?, ScalarValue> getReducer(DataStructureComponent comp) { - return reducer; + SerFunction>, ScalarValue> firstLastFinisher = h -> h.value != null ? h.value : NullValue.instanceFrom(comp); + + if (this == FIRST_VALUE) + return SerCollector.of(() -> new Holder>(getReprClass(comp)), + Holder::setFirst, Holder::mergeFirst, firstLastFinisher, EnumSet.of(CONCURRENT)); + else if (this == LAST_VALUE) + return SerCollector.of(() -> new Holder>(getReprClass(comp)), + Holder::setLast, Holder::mergeLast, firstLastFinisher, EnumSet.of(CONCURRENT)); + else + return reducer; } - - public SerCollector> getReducer(DataStructureComponent measure) + + private Class getReprClass(DataStructureComponent comp) { - return mapping(dp -> extractor.apply(dp, measure), filtering(v -> !(v instanceof NullValue), reducer)); + Class reprType; + ValueDomainSubset domain = comp.getVariable().getDomain(); + if (domain instanceof IntegerDomainSubset) + reprType = Long.class; + else if (domain instanceof NumberDomainSubset) + reprType = VTLGeneralProperties.isUseBigDecimal() ? BigDecimal.class : Double.class; + else if (domain instanceof StringDomainSubset) + reprType = String.class; + else if (domain instanceof DateDomainSubset) + reprType = LocalDate.class; + else if (domain instanceof BooleanDomainSubset) + reprType = Boolean.class; + else + throw new UnsupportedOperationException("Analytic invocation not implemented for components of domain " + domain); + return reprType; } @Override @@ -142,4 +178,52 @@ public String toString() { return extractor; } + + /* Public to be used by spark encoder mechanism */ + public static final class Holder implements Serializable + { + private static final long serialVersionUID = 1L; + + public final Class reprType; + + private AtomicBoolean isSet = new AtomicBoolean(false); + private volatile T value; + + private Holder(Class reprType) + { + this.reprType = reprType; + } + + public T get() + { + return value; + } + + public AtomicBoolean isSet() + { + return isSet; + } + + private void setLast(T newValue) + { + isSet.set(true); + value = newValue; + } + + private void setFirst(T newValue) + { + if (!isSet.compareAndExchange(false, true)) + value = newValue; + } + + private Holder mergeLast(Holder other) + { + return other.isSet.get() ? other : this; + } + + private Holder mergeFirst(Holder other) + { + return isSet.get() ? this : other; + } + } } \ No newline at end of file diff --git a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/ComparisonOperator.java b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/ComparisonOperator.java index d9b9a0eae..55476bfc3 100644 --- a/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/ComparisonOperator.java +++ b/vtl-types/src/main/java/it/bancaditalia/oss/vtl/impl/types/operators/ComparisonOperator.java @@ -21,24 +21,23 @@ import static it.bancaditalia.oss.vtl.impl.types.domain.Domains.BOOLEANDS; -import java.util.function.IntPredicate; - import it.bancaditalia.oss.vtl.impl.types.data.BooleanValue; import it.bancaditalia.oss.vtl.impl.types.data.NullValue; import it.bancaditalia.oss.vtl.impl.types.domain.EntireBooleanDomainSubset; import it.bancaditalia.oss.vtl.model.data.ScalarValue; import it.bancaditalia.oss.vtl.model.domain.BooleanDomain; import it.bancaditalia.oss.vtl.util.SerBiFunction; +import it.bancaditalia.oss.vtl.util.SerIntPredicate; public enum ComparisonOperator implements SerBiFunction, ScalarValue, ScalarValue> { EQ("=", c -> c == 0), NE("<>", c -> c != 0), GT(">", c -> c > 0), GE(">=", c -> c >= 0), LT("<", c -> c < 0), LE("<=", c -> c <= 0); - private final IntPredicate lambda; + private final SerIntPredicate lambda; private final String name; - private ComparisonOperator(String name, IntPredicate lambda) + private ComparisonOperator(String name, SerIntPredicate lambda) { this.lambda = lambda; this.name = name;