Skip to content

Commit d3463e4

Browse files
committed
Text Classification JUnit Tests
Signed-off-by: andreadimaio <[email protected]>
1 parent 5115c7f commit d3463e4

File tree

8 files changed

+3430
-10
lines changed

8 files changed

+3430
-10
lines changed
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
/*
2+
* Copyright IBM Corp. 2025 - 2025
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
package com.ibm.watsonx.ai.it;
6+
7+
import static java.util.Objects.isNull;
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertFalse;
10+
import static org.junit.jupiter.api.Assertions.assertNotNull;
11+
import static org.junit.jupiter.api.Assertions.assertNull;
12+
import static org.junit.jupiter.api.Assertions.assertThrows;
13+
import static org.junit.jupiter.api.Assertions.assertTrue;
14+
import java.nio.file.Path;
15+
import org.junit.jupiter.api.Test;
16+
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
17+
import com.ibm.watsonx.ai.core.exeception.WatsonxException;
18+
import com.ibm.watsonx.ai.textprocessing.KvpFields;
19+
import com.ibm.watsonx.ai.textprocessing.KvpFields.KvpField;
20+
import com.ibm.watsonx.ai.textprocessing.Language;
21+
import com.ibm.watsonx.ai.textprocessing.Schema;
22+
import com.ibm.watsonx.ai.textprocessing.SemanticConfig.SchemaMergeStrategy;
23+
import com.ibm.watsonx.ai.textprocessing.textclassification.TextClassificationDeleteParameters;
24+
import com.ibm.watsonx.ai.textprocessing.textclassification.TextClassificationParameters;
25+
import com.ibm.watsonx.ai.textprocessing.textclassification.TextClassificationSemanticConfig;
26+
import com.ibm.watsonx.ai.textprocessing.textclassification.TextClassificationService;
27+
28+
@EnabledIfEnvironmentVariable(named = "WATSONX_API_KEY", matches = ".+")
29+
@EnabledIfEnvironmentVariable(named = "WATSONX_PROJECT_ID", matches = ".+")
30+
@EnabledIfEnvironmentVariable(named = "WATSONX_URL", matches = ".+")
31+
@EnabledIfEnvironmentVariable(named = "WATSONX_DOCUMENT_REFERENCE_CONNECTION_ID", matches = ".+")
32+
@EnabledIfEnvironmentVariable(named = "WATSONX_DOCUMENT_REFERENCE_BUCKET", matches = ".+")
33+
@EnabledIfEnvironmentVariable(named = "CLOUD_OBJECT_STORAGE_URL", matches = ".+")
34+
public class ClassificationServiceIT {
35+
36+
static final String API_KEY = System.getenv("WATSONX_API_KEY");
37+
static final String PROJECT_ID = System.getenv("WATSONX_PROJECT_ID");
38+
static final String URL = System.getenv("WATSONX_URL");
39+
static final String DOCUMENT_REFERENCE_CONNECTION_ID = System.getenv("WATSONX_DOCUMENT_REFERENCE_CONNECTION_ID");
40+
static final String DOCUMENT_REFERENCE_BUCKET = System.getenv("WATSONX_DOCUMENT_REFERENCE_BUCKET");
41+
static final String CLOUD_OBJECT_STORAGE_URL = System.getenv("CLOUD_OBJECT_STORAGE_URL");
42+
43+
static final TextClassificationService classificationService = TextClassificationService.builder()
44+
.baseUrl(URL)
45+
.apiKey(API_KEY)
46+
.cosUrl(CLOUD_OBJECT_STORAGE_URL)
47+
.projectId(PROJECT_ID)
48+
.documentReference(DOCUMENT_REFERENCE_CONNECTION_ID, DOCUMENT_REFERENCE_BUCKET)
49+
.logRequests(true)
50+
.logResponses(true)
51+
.build();
52+
53+
@Test
54+
void test_upload_and_start_classification_with_file() throws Exception {
55+
56+
var file = Path.of(ClassLoader.getSystemResource("invoice.pdf").toURI()).toFile();
57+
58+
var parameters = TextClassificationParameters.builder()
59+
.languages(Language.ENGLISH)
60+
.build();
61+
62+
var response = classificationService.uploadAndStartClassification(file, parameters);
63+
assertNotNull(response.entity());
64+
assertNull(response.entity().custom());
65+
assertNotNull(response.entity().documentReference().connection());
66+
assertNotNull(response.entity().documentReference().connection().id());
67+
assertNotNull(response.entity().parameters());
68+
assertNotNull(response.entity().parameters().languages());
69+
assertTrue(response.entity().parameters().languages().size() == 1);
70+
assertNotNull(response.entity().results());
71+
assertNotNull(response.entity().results().status());
72+
assertNotNull(response.entity().results().numberPagesProcessed());
73+
assertNotNull(response.metadata().id());
74+
assertNotNull(response.metadata().createdAt());
75+
assertNotNull(response.metadata().projectId());
76+
77+
var status = response.entity().results().status();
78+
while (!status.equals("failed") && !status.equals("completed")) {
79+
Thread.sleep(2000);
80+
response = classificationService.fetchClassificationRequest(response.metadata().id());
81+
status = response.entity().results().status();
82+
}
83+
84+
assertEquals("completed", status);
85+
assertNotNull(response.entity());
86+
assertNull(response.entity().custom());
87+
assertNotNull(response.entity().documentReference().connection());
88+
assertNotNull(response.entity().documentReference().connection().id());
89+
assertNotNull(response.entity().parameters());
90+
assertNotNull(response.entity().parameters().languages());
91+
assertTrue(response.entity().parameters().languages().size() == 1);
92+
assertNotNull(response.entity().results());
93+
assertNotNull(response.entity().results().completedAt());
94+
assertNotNull(response.entity().results().runningAt());
95+
assertNotNull(response.entity().results().status());
96+
assertNotNull(response.entity().results().numberPagesProcessed());
97+
assertNotNull(response.metadata().id());
98+
assertNotNull(response.metadata().createdAt());
99+
assertNotNull(response.metadata().modifiedAt());
100+
assertNotNull(response.metadata().projectId());
101+
assertTrue(response.entity().results().documentClassified());
102+
assertEquals("Invoice", response.entity().results().documentType());
103+
104+
assertTrue(classificationService.deleteFile(DOCUMENT_REFERENCE_BUCKET, "invoice.pdf"));
105+
}
106+
107+
@Test
108+
void test_upload_extract_and_fetch_with_file() throws Exception {
109+
110+
var file = Path.of(ClassLoader.getSystemResource("invoice.pdf").toURI()).toFile();
111+
112+
var parameters = TextClassificationParameters.builder()
113+
.languages(Language.ENGLISH)
114+
.build();
115+
116+
var result = classificationService.uploadClassifyAndFetch(file, parameters);
117+
assertNull(result.error());
118+
assertNotNull(result.completedAt());
119+
assertNotNull(result.numberPagesProcessed());
120+
assertNotNull(result.runningAt());
121+
assertTrue(result.documentClassified());
122+
assertEquals("Invoice", result.documentType());
123+
assertEquals("completed", result.status());
124+
125+
parameters = TextClassificationParameters.builder()
126+
.removeUploadedFile(true)
127+
.build();
128+
129+
result = classificationService.uploadClassifyAndFetch(file, parameters);
130+
assertEquals("Invoice", result.documentType());
131+
132+
// Wait for async deletion
133+
Thread.sleep(500);
134+
}
135+
136+
@Test
137+
void test_delete_request() throws Exception {
138+
139+
var file = Path.of(ClassLoader.getSystemResource("invoice.pdf").toURI()).toFile();
140+
141+
var parameters = TextClassificationParameters.builder()
142+
.languages(Language.ENGLISH)
143+
.build();
144+
145+
var response = classificationService.uploadAndStartClassification(file, parameters);
146+
assertTrue(
147+
classificationService.deleteRequest(
148+
response.metadata().id(),
149+
TextClassificationDeleteParameters.builder()
150+
.hardDelete(true)
151+
.build()
152+
)
153+
);
154+
155+
var ex = assertThrows(WatsonxException.class, () -> classificationService.fetchClassificationRequest(response.metadata().id()));
156+
assertEquals(404, ex.statusCode());
157+
}
158+
159+
@Test
160+
void test_upload_classify_and_fetch_with_inputstream() throws Exception {
161+
162+
var filename = "invoice.pdf";
163+
var inputstream = ClassLoader.getSystemResourceAsStream(filename);
164+
165+
var parameters = TextClassificationParameters.builder()
166+
.languages(Language.ENGLISH)
167+
.build();
168+
169+
var result = classificationService.uploadClassifyAndFetch(inputstream, filename);
170+
assertEquals("Invoice", result.documentType());
171+
assertTrue(classificationService.deleteFile(DOCUMENT_REFERENCE_BUCKET, filename));
172+
173+
parameters = TextClassificationParameters.builder()
174+
.removeUploadedFile(true)
175+
.build();
176+
177+
inputstream = ClassLoader.getSystemResourceAsStream(filename);
178+
result = classificationService.uploadClassifyAndFetch(inputstream, filename, parameters);
179+
assertEquals("Invoice", result.documentType());
180+
181+
// Wait for async deletion
182+
Thread.sleep(500);
183+
}
184+
185+
@Test
186+
void test_classification_with_semantic_config() throws Exception {
187+
188+
var invoice = Path.of(ClassLoader.getSystemResource("invoice.pdf").toURI()).toFile();
189+
var unclassified = Path.of(ClassLoader.getSystemResource("test.pdf").toURI()).toFile();
190+
191+
var fields = KvpFields.builder()
192+
.add("invoice_date", KvpField.of("The date when the invoice was issued.", "2024-07-10"))
193+
.add("invoice_number", KvpField.of("The unique number identifying the invoice.", "INV-2024-001"))
194+
.add("total_amount", KvpField.of("The total amount to be paid.", "1250.50"))
195+
.build();
196+
197+
var semanticConfig = TextClassificationSemanticConfig.builder()
198+
.schemasMergeStrategy(SchemaMergeStrategy.REPLACE)
199+
.schemas(
200+
Schema.builder()
201+
.documentDescription("A vendor-issued invoice listing purchased items, prices, and payment information")
202+
.documentType("My-Invoice")
203+
.fields(fields)
204+
.additionalPromptInstructions("The document contains a table with all the data")
205+
.build()
206+
).build();
207+
208+
var parameters = TextClassificationParameters.builder()
209+
.languages(Language.ENGLISH)
210+
.semanticConfig(semanticConfig)
211+
.build();
212+
213+
var result = classificationService.uploadClassifyAndFetch(invoice, parameters);
214+
assertTrue(result.documentClassified());
215+
assertEquals("My-Invoice", result.documentType());
216+
217+
result = classificationService.uploadClassifyAndFetch(unclassified, parameters);
218+
assertFalse(result.documentClassified());
219+
assertTrue(isNull(result.documentType()) || result.documentType().isBlank());
220+
}
221+
}

modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/TextExtractionServiceIT.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
import com.ibm.watsonx.ai.core.auth.AuthenticationProvider;
1616
import com.ibm.watsonx.ai.core.auth.iam.IAMAuthenticator;
1717
import com.ibm.watsonx.ai.core.exeception.WatsonxException;
18+
import com.ibm.watsonx.ai.textprocessing.Language;
1819
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionDeleteParameters;
1920
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters;
20-
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters.Language;
2121
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters.Mode;
2222
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters.Type;
2323
import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionService;
@@ -29,7 +29,7 @@
2929
@EnabledIfEnvironmentVariable(named = "WATSONX_DOCUMENT_REFERENCE_CONNECTION_ID", matches = ".+")
3030
@EnabledIfEnvironmentVariable(named = "WATSONX_DOCUMENT_REFERENCE_BUCKET", matches = ".+")
3131
@EnabledIfEnvironmentVariable(named = "WATSONX_RESULTS_REFERENCE_CONNECTION_ID", matches = ".+")
32-
@EnabledIfEnvironmentVariable(named = "WATSONX_DOCUMENT_REFERENCE_BUCKET", matches = ".+")
32+
@EnabledIfEnvironmentVariable(named = "WATSONX_RESULTS_REFERENCE_BUCKET", matches = ".+")
3333
@EnabledIfEnvironmentVariable(named = "CLOUD_OBJECT_STORAGE_URL", matches = ".+")
3434
public class TextExtractionServiceIT {
3535

@@ -60,7 +60,7 @@ public class TextExtractionServiceIT {
6060
@Test
6161
void test_upload_and_start_extraction_with_file() throws Exception {
6262

63-
var file = Path.of(getClass().getClassLoader().getResource("test.pdf").toURI()).toFile();
63+
var file = Path.of(ClassLoader.getSystemResource("test.pdf").toURI()).toFile();
6464

6565
var parameters = TextExtractionParameters.builder()
6666
.languages(Language.ENGLISH)
@@ -119,7 +119,7 @@ void test_upload_and_start_extraction_with_file() throws Exception {
119119
@Test
120120
void test_upload_extract_and_fetch_with_file() throws Exception {
121121

122-
var file = Path.of(getClass().getClassLoader().getResource("test.pdf").toURI()).toFile();
122+
var file = Path.of(ClassLoader.getSystemResource("test.pdf").toURI()).toFile();
123123

124124
var parameters = TextExtractionParameters.builder()
125125
.languages(Language.ENGLISH)
@@ -151,8 +151,7 @@ void test_upload_extract_and_fetch_with_file() throws Exception {
151151
@Test
152152
void test_delete_request() throws Exception {
153153

154-
155-
var file = Path.of(getClass().getClassLoader().getResource("test.pdf").toURI()).toFile();
154+
var file = Path.of(ClassLoader.getSystemResource("test.pdf").toURI()).toFile();
156155

157156
var parameters = TextExtractionParameters.builder()
158157
.languages(Language.ENGLISH)
@@ -176,7 +175,7 @@ void test_delete_request() throws Exception {
176175
void test_upload_extract_and_fetch_with_inputstream() throws Exception {
177176

178177
var filename = "test.pdf";
179-
var inputstream = getClass().getClassLoader().getResourceAsStream(filename);
178+
var inputstream = ClassLoader.getSystemResourceAsStream(filename);
180179

181180
var parameters = TextExtractionParameters.builder()
182181
.languages(Language.ENGLISH)
@@ -196,7 +195,7 @@ void test_upload_extract_and_fetch_with_inputstream() throws Exception {
196195
.removeOutputFile(true)
197196
.build();
198197

199-
inputstream = getClass().getClassLoader().getResourceAsStream(filename);
198+
inputstream = ClassLoader.getSystemResourceAsStream(filename);
200199
text = textExtractionService.uploadExtractAndFetch(inputstream, filename, parameters);
201200
assertEquals("PDF TEST", text);
202201

@@ -209,7 +208,7 @@ void test_upload_extract_and_fetch_with_inputstream() throws Exception {
209208
@Test
210209
void test_multiple_outputs() throws Exception {
211210

212-
var file = Path.of(getClass().getClassLoader().getResource("test.pdf").toURI()).toFile();
211+
var file = Path.of(ClassLoader.getSystemResource("test.pdf").toURI()).toFile();
213212

214213
var parameters = TextExtractionParameters.builder()
215214
.requestedOutputs(Type.PLAIN_TEXT, Type.JSON, Type.HTML)
@@ -238,7 +237,7 @@ void test_multiple_outputs() throws Exception {
238237
@Test
239238
void test_ocr() throws Exception {
240239

241-
var file = Path.of(getClass().getClassLoader().getResource("ocr.jpg").toURI()).toFile();
240+
var file = Path.of(ClassLoader.getSystemResource("ocr.jpg").toURI()).toFile();
242241
var parameters = TextExtractionParameters.builder()
243242
.mode(Mode.HIGH_QUALITY)
244243
.requestedOutputs(Type.PLAIN_TEXT)

0 commit comments

Comments
 (0)