diff --git a/metafacture-json/build.gradle b/metafacture-json/build.gradle index 1ffa26d2..01a5b23a 100644 --- a/metafacture-json/build.gradle +++ b/metafacture-json/build.gradle @@ -22,7 +22,9 @@ dependencies { implementation 'com.fasterxml.jackson.core:jackson-core:2.13.0' implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.0' implementation 'com.jayway.jsonpath:json-path:2.6.0' + implementation 'com.github.erosb:everit-json-schema:1.14.1' testImplementation 'junit:junit:4.12' testImplementation 'org.mockito:mockito-core:2.5.5' + testImplementation 'com.github.tomakehurst:wiremock-jre8:2.33.2' testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21' } diff --git a/metafacture-json/src/main/java/org/metafacture/json/JsonValidator.java b/metafacture-json/src/main/java/org/metafacture/json/JsonValidator.java new file mode 100644 index 00000000..8db6d0bb --- /dev/null +++ b/metafacture-json/src/main/java/org/metafacture/json/JsonValidator.java @@ -0,0 +1,194 @@ +/* + * Copyright 2021, 2023 Fabian Steeg, hbz + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.metafacture.json; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultObjectPipe; + +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaClient; +import org.everit.json.schema.loader.SchemaLoader; +import org.everit.json.schema.loader.SchemaLoader.SchemaLoaderBuilder; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; + +/** + * Validate JSON against a given schema, pass only valid input to the receiver. + * + * @author Fabian Steeg (fsteeg) + */ +@Description("Validate JSON against a given schema, send only valid input to the receiver. Pass the schema location to validate against. " + + "Write valid and/or invalid output to locations specified with `writeValid` and `writeInvalid`. " + + "Set the JSON key for the record ID value with `idKey` (for logging output, defaults to `id`).") +@In(String.class) +@Out(String.class) +@FluxCommand("validate-json") +public final class JsonValidator extends DefaultObjectPipe> { + + private static final Logger LOG = LoggerFactory.getLogger(JsonValidator.class); + private static final String DEFAULT_ID_KEY = "id"; + private Schema schema; + private long fail; + private long success; + private FileWriter writeInvalid; + private FileWriter writeValid; + private String idKey = DEFAULT_ID_KEY; + + /** + * @param url The URL of the schema to validate against. + */ + public JsonValidator(final String url) { + initSchema(url); + } + + /** + * @param writeValid The location to write valid data to. + */ + public void setWriteValid(final String writeValid) { + this.writeValid = fileWriter(writeValid); + } + + /** + * @param writeInvalid The location to write invalid data to. + */ + public void setWriteInvalid(final String writeInvalid) { + this.writeInvalid = fileWriter(writeInvalid); + } + + /** + * @param idKey The JSON key for the record ID value. + */ + public void setIdKey(final String idKey) { + this.idKey = idKey; + } + + @Override + public void process(final String json) { + try { + validate(json, new JSONObject(json) /* throws JSONException on syntax error */); + } + catch (final JSONException e) { + handleInvalid(json, null, e.getMessage()); + } + } + + private void validate(final String json, final JSONObject object) { + try { + schema.validate(object); // throws ValidationException if invalid + getReceiver().process(json); + ++success; + write(json, writeValid); + } + catch (final ValidationException e) { + handleInvalid(json, object, e.getAllMessages().toString()); + } + } + + @Override + protected void onCloseStream() { + close(writeInvalid); + close(writeValid); + LOG.debug("Success: {}, Fail: {}", success, fail); + super.onCloseStream(); + } + + private void initSchema(final String schemaUrl) { + if (schema != null) { + return; + } + SchemaLoaderBuilder schemaLoader = SchemaLoader.builder(); + try { + final URL url = new URL(schemaUrl); + schemaLoader = schemaLoader.schemaJson(jsonFrom(url.openStream())) + .resolutionScope(baseFor(url.toString())); + } + catch (final IOException e) { + LOG.debug("Could not read as URL: {}, trying to load from class path", schemaUrl); + schemaLoader = schemaLoader.schemaClient(SchemaClient.classPathAwareClient()) + .schemaJson(jsonFrom(getClass().getResourceAsStream(schemaUrl))) + .resolutionScope("classpath://" + baseFor(schemaUrl)); + } + schema = schemaLoader.build().load().build(); + } + + private JSONObject jsonFrom(final InputStream inputStream) { + try { + return new JSONObject(new JSONTokener(inputStream)); + } + catch (final JSONException e) { + throw new MetafactureException(e.getMessage(), e); + } + } + + private String baseFor(final String path) { + return path.substring(0, path.lastIndexOf('/') + 1); + } + + private FileWriter fileWriter(final String fileLocation) { + try { + return new FileWriter(fileLocation); + } + catch (final IOException e) { + throw new MetafactureException(e.getMessage(), e); + } + } + + private void handleInvalid(final String json, final JSONObject object, + final String errorMessage) { + LOG.info("Invalid JSON: {} in {}", errorMessage, object != null ? object.opt(idKey) : json); + ++fail; + write(json, writeInvalid); + } + + private void write(final String json, final FileWriter fileWriter) { + if (fileWriter != null) { + try { + fileWriter.append(json); + fileWriter.append("\n"); + } + catch (final IOException e) { + throw new MetafactureException(e.getMessage(), e); + } + } + } + + private void close(final FileWriter fileWriter) { + if (fileWriter != null) { + try { + fileWriter.close(); + } + catch (final IOException e) { + throw new MetafactureException(e.getMessage(), e); + } + } + } + +} diff --git a/metafacture-json/src/main/resources/flux-commands.properties b/metafacture-json/src/main/resources/flux-commands.properties index 2d9cedee..27bc9a13 100644 --- a/metafacture-json/src/main/resources/flux-commands.properties +++ b/metafacture-json/src/main/resources/flux-commands.properties @@ -15,3 +15,4 @@ # encode-json org.metafacture.json.JsonEncoder decode-json org.metafacture.json.JsonDecoder +validate-json org.metafacture.json.JsonValidator diff --git a/metafacture-json/src/test/java/org/metafacture/json/JsonValidatorTest.java b/metafacture-json/src/test/java/org/metafacture/json/JsonValidatorTest.java new file mode 100644 index 00000000..35474b93 --- /dev/null +++ b/metafacture-json/src/test/java/org/metafacture/json/JsonValidatorTest.java @@ -0,0 +1,175 @@ +/* + * Copyright 2021, 2023 Fabian Steeg, hbz + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.metafacture.json; + +import static org.hamcrest.CoreMatchers.both; +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertThat; +import static com.github.tomakehurst.wiremock.client.WireMock.stubFor; +import static com.github.tomakehurst.wiremock.client.WireMock.request; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collection; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.ObjectReceiver; +import org.mockito.InOrder; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import com.github.tomakehurst.wiremock.client.WireMock; +import com.github.tomakehurst.wiremock.core.WireMockConfiguration; +import com.github.tomakehurst.wiremock.junit.WireMockRule; + + +/** + * Tests for {@link JsonValidator}. + * + * @author Fabian Steeg + * + */ +@RunWith(Parameterized.class) +public final class JsonValidatorTest { + + private static final String MAIN_SCHEMA = "/schemas/schema.json"; + private static final String ID_SCHEMA = "/schemas/id.json"; + private static final String JSON_VALID = "{\"id\":\"http://example.org/\"}"; + private static final String JSON_INVALID_MISSING_REQUIRED = "{}"; + private static final String JSON_INVALID_URI_FORMAT= "{\"id\":\"example.org/\"}"; + private static final String JSON_INVALID_DUPLICATE_KEY = "{\"id\":\"val\",\"id\":\"val\"}"; + private static final String JSON_INVALID_SYNTAX_ERROR = "{\"id1\":\"val\",\"id2\":\"val\""; + + private JsonValidator validator; + + @Mock + private ObjectReceiver receiver; + private InOrder inOrder; + private Function schemaLocationGetter; + + @Rule + public WireMockRule wireMockRule = new WireMockRule(WireMockConfiguration.wireMockConfig() + .jettyAcceptors(Runtime.getRuntime().availableProcessors()).dynamicPort()); + + @Parameterized.Parameters(name = "{index}") + public static Collection siteMaps() { + return Arrays.asList((Object[][]) (new Function[][] { // + // Pass the schema to each test as path on classpath, file url, and http url: + { (Object rule) -> MAIN_SCHEMA }, + { (Object rule) -> JsonValidatorTest.class.getResource(MAIN_SCHEMA).toString() }, + { (Object rule) -> ((WireMockRule) rule).baseUrl() + MAIN_SCHEMA } })); + } + + public JsonValidatorTest(Function schemaLocationGetter) { + this.schemaLocationGetter = schemaLocationGetter; + } + + @Before + public void setup() throws IOException { + MockitoAnnotations.initMocks(this); + wireMock(MAIN_SCHEMA, ID_SCHEMA); + String schemaLocation = schemaLocationGetter.apply(wireMockRule); + validator = new JsonValidator(schemaLocation); + validator.setReceiver(receiver); + inOrder = Mockito.inOrder(receiver); + } + + private void wireMock(final String... schemaLocations) throws IOException { + for (String schemaLocation : schemaLocations) { + stubFor(request("GET", WireMock.urlEqualTo(schemaLocation)).willReturn( + WireMock.ok().withBody(readToString(getClass().getResource(schemaLocation))) + .withHeader("Content-type", "application/json"))); + } + } + + private String readToString(final URL url) throws IOException { + return new BufferedReader(new InputStreamReader(url.openStream(), StandardCharsets.UTF_8)) + .lines().collect(Collectors.joining("\n")); + } + + @Test + public void callWireMockSchema() throws MalformedURLException, IOException { + final String schemaContent = readToString(new URL(wireMockRule.baseUrl() + MAIN_SCHEMA)); + assertThat(schemaContent, both(containsString("$schema")).and(containsString("$ref"))); + } + + @Test + public void testShouldValidate() { + validator.process(JSON_VALID); + inOrder.verify(receiver, Mockito.calls(1)).process(JSON_VALID); + } + + @Test + public void testShouldInvalidateMissingRequired() { + validator.process(JSON_INVALID_MISSING_REQUIRED); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testShouldInvalidateUriFormat() { + validator.process(JSON_INVALID_URI_FORMAT); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testShouldInvalidateDuplicateKey() { + validator.process(JSON_INVALID_DUPLICATE_KEY); + inOrder.verifyNoMoreInteractions(); + } + + @Test + public void testShouldInvalidateSyntaxError() { + validator.process(JSON_INVALID_SYNTAX_ERROR); + inOrder.verifyNoMoreInteractions(); + } + + @Test(expected = MetafactureException.class) + public void testShouldCatchMissingSchemaFile() { + new JsonValidator("").process("{}"); + } + + @Test(expected = MetafactureException.class) + public void testShouldCatchMissingValidOutputFile() { + validator.setWriteValid(""); + validator.process(JSON_INVALID_MISSING_REQUIRED); + } + + @Test(expected = MetafactureException.class) + public void testShouldCatchMissingInvalidOutputFile() { + validator.setWriteInvalid(""); + validator.process(JSON_INVALID_MISSING_REQUIRED); + } + + @After + public void cleanup() { + validator.closeStream(); + } + +} diff --git a/metafacture-json/src/test/resources/schemas/id.json b/metafacture-json/src/test/resources/schemas/id.json new file mode 100644 index 00000000..255cefcf --- /dev/null +++ b/metafacture-json/src/test/resources/schemas/id.json @@ -0,0 +1,8 @@ +{ + "$id": "id.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "URL", + "description": "The URL/URI of the resource", + "type": "string", + "format": "uri" +} diff --git a/metafacture-json/src/test/resources/schemas/schema.json b/metafacture-json/src/test/resources/schemas/schema.json new file mode 100644 index 00000000..fac9e0a5 --- /dev/null +++ b/metafacture-json/src/test/resources/schemas/schema.json @@ -0,0 +1,13 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "schema.json", + "type": "object", + "properties": { + "id": { + "$ref": "id.json" + } + }, + "required": [ + "id" + ] +}