From 9743b72ec308d703eba805ac3934d084b5c59ecb Mon Sep 17 00:00:00 2001 From: "Daniel A. Werning" <33833393+dwerning@users.noreply.github.com> Date: Fri, 17 May 2024 08:10:20 +0200 Subject: [PATCH] Update v2.1.3 --- .../tla/backend/es/model/LemmaEntity.java | 18 +-- .../tla/backend/es/model/SentenceEntity.java | 14 +- .../tla/backend/es/model/ThsEntryEntity.java | 13 -- .../backend/es/model/meta/ModelConfig.java | 2 - .../tla/backend/es/query/ESQueryBuilder.java | 2 +- .../es/query/LemmaSearchQueryBuilder.java | 10 +- .../es/query/SentenceSearchQueryBuilder.java | 11 ++ .../java/tla/backend/es/query/SortSpec.java | 127 +++++++++--------- .../tla/backend/service/LemmaService.java | 31 +---- 9 files changed, 103 insertions(+), 125 deletions(-) diff --git a/src/main/java/tla/backend/es/model/LemmaEntity.java b/src/main/java/tla/backend/es/model/LemmaEntity.java index 46d08893..1f3877c5 100644 --- a/src/main/java/tla/backend/es/model/LemmaEntity.java +++ b/src/main/java/tla/backend/es/model/LemmaEntity.java @@ -27,11 +27,11 @@ import tla.backend.es.model.parts.Transcription; import tla.backend.es.model.parts.Glyphs; import tla.domain.dto.LemmaDto; +import tla.domain.dto.LemmaDto.TimeSpan; import tla.domain.model.meta.BTSeClass; import tla.domain.model.meta.TLADTO; @Data -@SuperBuilder @TLADTO(LemmaDto.class) @BTSeClass("BTSLemmaEntry") @ToString(callSuper = true) @@ -59,7 +59,7 @@ public class LemmaEntity extends TLAEntity { @Field(type = FieldType.Object) @JsonAlias({"time_span"}) - private AttestedTimeSpan timeSpan; + private TimeSpan timeSpan; @Field(type = FieldType.Keyword) private Integer attestedSentencesCount; @@ -71,18 +71,4 @@ public class LemmaEntity extends TLAEntity { public LemmaEntity() { this.words = Collections.emptyList(); } - - @Getter - @Setter - @NoArgsConstructor - @AllArgsConstructor - @JsonInclude(Include.NON_NULL) - public static class AttestedTimeSpan { - - @Field(type = FieldType.Integer) - private Integer begin; - - @Field(type = FieldType.Integer) - private Integer end; - } } diff --git a/src/main/java/tla/backend/es/model/SentenceEntity.java b/src/main/java/tla/backend/es/model/SentenceEntity.java index 693555d1..75550aeb 100644 --- a/src/main/java/tla/backend/es/model/SentenceEntity.java +++ b/src/main/java/tla/backend/es/model/SentenceEntity.java @@ -108,7 +108,7 @@ public static class Context { @Field(type = FieldType.Text) private String paragraph; /** - * This sentence's positon in the containing text's array of sentences, starting + * This sentence's position in the containing text's array of sentences, starting * with 0. */ @JsonAlias("pos") @@ -118,7 +118,17 @@ public static class Context { * How many variants of a single sentences exist (due to ambivalences). */ @Field(type = FieldType.Integer) - private int variants; + private int variants; + /** + * Latest possible date + */ + @Field(type = FieldType.Integer) + private int notAfter; + /** + * Earliest possible date + */ + @Field(type = FieldType.Integer) + private int notBefore; } } \ No newline at end of file diff --git a/src/main/java/tla/backend/es/model/ThsEntryEntity.java b/src/main/java/tla/backend/es/model/ThsEntryEntity.java index d2725097..494cefaf 100644 --- a/src/main/java/tla/backend/es/model/ThsEntryEntity.java +++ b/src/main/java/tla/backend/es/model/ThsEntryEntity.java @@ -26,7 +26,6 @@ import tla.backend.es.model.parts.Translations; import tla.domain.dto.ThsEntryDto; import tla.domain.model.Passport; -import tla.domain.model.extern.AttestedTimespan; import tla.domain.model.meta.BTSeClass; import tla.domain.model.meta.TLADTO; @@ -144,16 +143,4 @@ public List extractTimespan() { Collections.sort(years); return years; } - - /** - * creates a DTO object representing the timespan covered by a thesaurus term. - */ - public AttestedTimespan.Period toAttestedPeriod() { - List years = this.extractTimespan(); - return AttestedTimespan.Period.builder() - .begin(years.get(0)) - .end(years.get(1)) - .ref(this.toDTOReference()) - .build(); - } } diff --git a/src/main/java/tla/backend/es/model/meta/ModelConfig.java b/src/main/java/tla/backend/es/model/meta/ModelConfig.java index 0d4a05e6..c822aeeb 100644 --- a/src/main/java/tla/backend/es/model/meta/ModelConfig.java +++ b/src/main/java/tla/backend/es/model/meta/ModelConfig.java @@ -47,7 +47,6 @@ import tla.domain.dto.ThsEntryDto; import tla.domain.dto.meta.AbstractDto; import tla.domain.model.SentenceToken; -import tla.domain.model.extern.AttestedTimespan; import tla.domain.model.meta.AbstractBTSBaseClass; import tla.domain.model.meta.BTSeClass; import tla.domain.model.meta.TLADTO; @@ -256,7 +255,6 @@ protected static ModelMapper initModelMapper() { ).addMapping( LemmaEntity::getRevisionState, LemmaDto::setReviewState ); - modelMapper.createTypeMap(LemmaEntity.AttestedTimeSpan.class, AttestedTimespan.Period.class); modelMapper.createTypeMap(ThsEntryEntity.class, ThsEntryDto.class) .addMappings( m -> m.using(translationsToMapConverter).map( diff --git a/src/main/java/tla/backend/es/query/ESQueryBuilder.java b/src/main/java/tla/backend/es/query/ESQueryBuilder.java index b58302c3..ecb577ab 100644 --- a/src/main/java/tla/backend/es/query/ESQueryBuilder.java +++ b/src/main/java/tla/backend/es/query/ESQueryBuilder.java @@ -54,7 +54,7 @@ public NativeSearchQuery buildNativeSearchQuery(Pageable page) { ).withTrackTotalHits( page.isPaged() ).withSorts( - this.getSortSpec().primary() + this.sortSpec.Sorting() ); log.info("query: {}", this.getNativeRootQueryBuilder()); this.getNativeAggregationBuilders().forEach( diff --git a/src/main/java/tla/backend/es/query/LemmaSearchQueryBuilder.java b/src/main/java/tla/backend/es/query/LemmaSearchQueryBuilder.java index dd131b58..939df45b 100644 --- a/src/main/java/tla/backend/es/query/LemmaSearchQueryBuilder.java +++ b/src/main/java/tla/backend/es/query/LemmaSearchQueryBuilder.java @@ -10,9 +10,11 @@ import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.Operator; +import org.elasticsearch.search.sort.SortOrder; import lombok.Getter; import tla.backend.es.model.LemmaEntity; +import tla.backend.es.query.SortSpec.FieldOrder; import tla.backend.service.ModelClass; import tla.domain.command.TypeSpec; import tla.domain.command.TranscriptionSpec; @@ -228,10 +230,12 @@ public void setBibliography(String bibliography) { public void setSort(String sort) { super.setSort(sort); - if (sortSpec.field.equals("root")) { - sortSpec.field = "relations.root.name"; + if (sortSpec.FieldOrders.get(0).equals("root")) { + sortSpec.FieldOrders.get(0).field = "relations.root.name"; } - // nicht einheitlich/elegant gel?st: die anderen vier Suchf?lle sind so gel?st, + sortSpec.addFieldOrder(new FieldOrder("sortKey", SortOrder.ASC)); + + // nicht einheitlich/elegant gel?st: die anderen vier Suchf?lle sind so gel?st, // dass in der URL der Name der Variablen erscheint ("sortKey" bzw. "timeSpan.begin", // gefolgt von "_asc"/"_desc", aber nicht hier bei "relations.root.name" // die Angaben in der URL sollten generell sprechend sein "transliteration_asc", ..., "timespan_asc", ... diff --git a/src/main/java/tla/backend/es/query/SentenceSearchQueryBuilder.java b/src/main/java/tla/backend/es/query/SentenceSearchQueryBuilder.java index 17ba5d8b..7ba615ce 100644 --- a/src/main/java/tla/backend/es/query/SentenceSearchQueryBuilder.java +++ b/src/main/java/tla/backend/es/query/SentenceSearchQueryBuilder.java @@ -7,12 +7,14 @@ import org.apache.lucene.search.join.ScoreMode; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.sort.SortOrder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import tla.backend.es.model.SentenceEntity; import tla.backend.es.model.SentenceEntity.Context; import tla.backend.es.model.parts.Token; +import tla.backend.es.query.SortSpec.FieldOrder; import tla.backend.service.ModelClass; import tla.domain.command.PassportSpec; import tla.domain.command.SentenceSearch.TokenSpec; @@ -23,6 +25,13 @@ public class SentenceSearchQueryBuilder extends ESQueryBuilder implements MultiLingQueryBuilder { public final static String AGG_ID_TEXT_IDS = "text_ids"; + + public void setSort(String sort) { + super.setSort(sort); + sortSpec.addFieldOrder(new FieldOrder("context.textId", SortOrder.ASC)); + sortSpec.addFieldOrder(new FieldOrder("context.position", SortOrder.ASC)); + } + public void setContext(Context context) { BoolQueryBuilder textQuery = boolQuery(); @@ -35,6 +44,8 @@ public void setContext(Context context) { } } + + /* public void setTokens(Collection tokens) { if (tokens != null) { diff --git a/src/main/java/tla/backend/es/query/SortSpec.java b/src/main/java/tla/backend/es/query/SortSpec.java index f0a52d23..0d0a33c9 100644 --- a/src/main/java/tla/backend/es/query/SortSpec.java +++ b/src/main/java/tla/backend/es/query/SortSpec.java @@ -1,6 +1,7 @@ package tla.backend.es.query; import java.util.Arrays; +import java.util.ArrayList; import org.elasticsearch.search.sort.ScoreSortBuilder; import org.elasticsearch.search.sort.SortBuilder; @@ -14,73 +15,79 @@ * Representation of search order specifications. */ @NoArgsConstructor -@AllArgsConstructor public class SortSpec { - public static final String DELIMITER = "_"; - /** - * an empty sort specification instance, whose {@link #primary()} method just returns - * a standard {@link ScoreSortBuilder}. - */ - public static final SortSpec DEFAULT = new SortSpec(); + public static final String DELIMITER = "_"; + /** + * an empty sort specification instance, whose {@link #primary()} method just + * returns a standard {@link ScoreSortBuilder}. + */ + public static final SortSpec DEFAULT = new SortSpec(); - /** - * name of field by whose value to order. - */ - protected String field; - /** - * sort order (i.e. {@link SortOrder.ASC} or {@link SortOrder.DESC}) - */ - protected SortOrder order; + protected ArrayList FieldOrders = new ArrayList(); - /** - * Create new sort spec configured for ascending order ({@link SortOrder.ASC}) on given field. - */ - public SortSpec(String field) { - this(field, SortOrder.ASC); - } + /** + * Create new sort spec configured for ascending order ({@link SortOrder.ASC}) + * on given field. + */ + public SortSpec(FieldOrder FieldOrder) { + this.FieldOrders.add(FieldOrder); + } - /** - * Create a new sort specification instance with given field name and sort order ("asc"/"desc"). - */ - public SortSpec(String field, String order) { - this( - field, - order.toLowerCase().equals("desc") ? SortOrder.DESC : SortOrder.ASC - ); - } + public SortSpec(ArrayList FieldOrders) { + this.FieldOrders = FieldOrders; + } - /** - * Create a sort spec instance from a string consisting of a field name, followed by an order specifier (asc/desc), - * seperated by the delimiter character defined in {@link #DELIMITER}. - */ - public static SortSpec from(String source) { - if (source != null) { - String[] segm = source.split(DELIMITER); - String field = String.join( - DELIMITER, - Arrays.asList(segm).subList(0, segm.length - 1) - ); - if (segm.length > 1) { - return new SortSpec(field, segm[segm.length - 1]); - } else { - return new SortSpec(segm[0]); - } - } else { - return new SortSpec("id"); - } - } + public void addFieldOrder(FieldOrder FieldOrder) { + this.FieldOrders.add(FieldOrder); + } - public SortBuilder primary() { - if (this.field != null) { - return SortBuilders.fieldSort(this.field).order(this.order); - } else { - return SortBuilders.scoreSort(); - } - } + public static class FieldOrder { + /** + * name of field by whose value to order. + */ + protected String field; + /** + * sort order (i.e. {@link SortOrder.ASC} or {@link SortOrder.DESC}) + */ + protected SortOrder order; - public SortBuilder secondary() { - return SortBuilders.fieldSort("id").order(this.order); - } + public FieldOrder(String field, SortOrder order) { + this.field = field; + this.order = order; + } + } + /** + * Create a sort spec instance from a string consisting of a field name, + * followed by an order specifier (asc/desc), seperated by the delimiter + * character defined in {@link #DELIMITER}. + */ + public static SortSpec from(String source) { + + if (source != null) { + String[] segm = source.split(DELIMITER); + String field = String.join(DELIMITER, Arrays.asList(segm).subList(0, segm.length - 1)); + ; + if (segm.length > 1) { + if (segm[segm.length - 1].equals("asc")) { + return new SortSpec(new FieldOrder(field, SortOrder.ASC)); + } else { + return new SortSpec(new FieldOrder(field, SortOrder.DESC)); + } + } else { + return new SortSpec(new FieldOrder(segm[0], SortOrder.ASC)); + } + } else { + return new SortSpec(new FieldOrder("id", SortOrder.ASC)); + } + } + + public ArrayList> Sorting() { + ArrayList> allSortBuilders = new ArrayList>(); + for (FieldOrder fieldOrder : FieldOrders) { + allSortBuilders.add(SortBuilders.fieldSort(fieldOrder.field).order(fieldOrder.order)); + } + return allSortBuilders; + } } \ No newline at end of file diff --git a/src/main/java/tla/backend/service/LemmaService.java b/src/main/java/tla/backend/service/LemmaService.java index 694f7cfe..25adfde5 100644 --- a/src/main/java/tla/backend/service/LemmaService.java +++ b/src/main/java/tla/backend/service/LemmaService.java @@ -32,9 +32,7 @@ import tla.domain.dto.extern.SingleDocumentWrapper; import tla.domain.dto.meta.AbstractDto; import tla.domain.model.Language; -import tla.domain.model.extern.AttestedTimespan; -import tla.domain.model.extern.AttestedTimespan.AttestationStats; -import tla.domain.model.extern.AttestedTimespan.Period; + @Service @ModelClass(value = LemmaEntity.class, path = "lemma") @@ -60,7 +58,7 @@ public ElasticsearchRepository getRepo() { * that lemma attestations are computed from occurrences and put into the * wrapped lemma DTO. * - * @see {@link #computeAttestedTimespans(String)} + * @see {@link #setAttestationsAndPeriod(String)} */ @Override public SingleDocumentWrapper getDetails(String id) { @@ -68,33 +66,10 @@ public SingleDocumentWrapper getDetails(String id) { if (lemma == null) { return null; } - SingleDocumentWrapper wrapper = super.getDetails(id); - ((LemmaDto) wrapper.getDoc()).setAttestations( - this.computeAttestedTimespans((LemmaDto) wrapper.getDoc()) - ); + SingleDocumentWrapper wrapper = super.getDetails(id); return wrapper; } - /** - * count sentences and texts containing the specified lemma. - */ - public List computeAttestedTimespans(LemmaDto dto) { - ESQueryResult sentenceSearchResult = searchService.register( - new SentencesContainingLemmaOccurrenceQueryBuilder(dto.getId()) - ).run(SearchService.UNPAGED); - Period attestedPeriod = dto.getTimeSpan(); - AttestationStats counts = AttestationStats.builder().count(0 - //sentenceSearchResult.getAggregation(SentenceSearchQueryBuilder.AGG_ID_TEXT_IDS).size() - ).texts(0 - //sentenceSearchResult.getAggregation(SentenceSearchQueryBuilder.AGG_ID_TEXT_IDS).size() - ).sentences( - sentenceSearchResult.getHitCount() - ).build(); - return List.of( - AttestedTimespan.builder().period(attestedPeriod).attestations(counts).build() - ); - } - public Map getMostFrequent(int limit) { SearchResponse response = this.searchService.query(SentenceEntity.class, matchAllQuery(), AggregationBuilders.nested("aggs", "tokens").subAggregation(AggregationBuilders.terms("lemmata")