Skip to content

Commit

Permalink
OAK-11412 - Indexing job: delay creation of index writers (Lucene and…
Browse files Browse the repository at this point in the history
… Elastic) until the start of the indexing phase (#2008)
  • Loading branch information
nfsantos authored Jan 22, 2025
1 parent 593d4c1 commit 2ddb40a
Show file tree
Hide file tree
Showing 30 changed files with 196 additions and 222 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,17 @@ public LuceneIndexDefinition(NodeState root, NodeState defn, String indexPath) {
this.codec = createCodec();
}

public static Builder newBuilder(NodeState root, NodeState defn, String indexPath){
return (Builder)new Builder()
public static Builder newLuceneBuilder(NodeState root, NodeState defn, String indexPath){
return (Builder) new Builder()
.root(root)
.defn(defn)
.indexPath(indexPath);
}

public static class Builder extends IndexDefinition.Builder {
public static class Builder extends IndexDefinition.Builder<LuceneIndexDefinition> {
@Override
public LuceneIndexDefinition build() {
return (LuceneIndexDefinition)super.build();
return super.build();
}

@Override
Expand All @@ -97,7 +97,7 @@ public LuceneIndexDefinition.Builder reindex() {
}

@Override
protected IndexDefinition createInstance(NodeState indexDefnStateToUse) {
protected LuceneIndexDefinition createInstance(NodeState indexDefnStateToUse) {
return new LuceneIndexDefinition(root, indexDefnStateToUse, version, uid, indexPath);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ public class LuceneIndexEditorProvider implements IndexEditorProvider {
private boolean nrtIndexingEnabled;
private LuceneIndexWriterConfig writerConfig = new LuceneIndexWriterConfig();


/**
* Number of indexed Lucene document that can be held in memory
* This ensures that for very large commit memory consumption
Expand Down Expand Up @@ -125,6 +124,7 @@ public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier,
this(indexCopier, indexTracker, extractedTextCache, augmentorFactory, mountInfoProvider,
ActiveDeletedBlobCollectorFactory.NOOP, null, null);
}

public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier,
@Nullable IndexTracker indexTracker,
ExtractedTextCache extractedTextCache,
Expand Down Expand Up @@ -203,7 +203,7 @@ public Editor getIndexEditor(

if (indexDefinition == null) {
indexDefinition = LuceneIndexDefinition
.newBuilder(root, definition.getNodeState(), indexPath)
.newLuceneBuilder(root, definition.getNodeState(), indexPath)
.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ private void computeAsyncIndexInfo(NodeState idxState, String indexPath, LuceneI
}

private void computeSize(NodeState idxState, LuceneIndexInfo info) throws IOException {
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(nodeStore.getRoot(), idxState, info.indexPath).build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(nodeStore.getRoot(), idxState, info.indexPath).build();
for (String dirName : idxState.getChildNodeNames()) {
if (NodeStateUtils.isHidden(dirName)) {
// This is true for both read-write index data dir (:data) and the read-only mount (:oak-libs-mount-index-data)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ private Result check(Level level, boolean cleanWorkDir, Closer closer) throws IO

private void checkIndex(Result result, Closer closer) throws IOException {
NodeState idx = NodeStateUtils.getNode(rootState, indexPath);
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx, indexPath).build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx, indexPath).build();
workDir = createWorkDir(workDirRoot, PathUtils.getName(indexPath));

for (String dirName : idx.getChildNodeNames()){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public LuceneIndexDumper(NodeState rootState, String indexPath, File baseDir) {
public void dump() throws IOException {
try (Closer closer = Closer.create()) {
NodeState idx = NodeStateUtils.getNode(rootState, indexPath);
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx, indexPath).build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx, indexPath).build();
indexDir = DirectoryUtils.createIndexDir(baseDir, indexPath);
IndexMeta meta = new IndexMeta(indexPath);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ IndexWriter getWriter() throws IOException {
config.setMergePolicy(definition.getMergePolicy());
writer = localRefWriter = new IndexWriter(directory, config);
genAtStart = getLatestGeneration(directory);
log.trace("IndexWriterConfig for index [{}] is {}", definition.getIndexPath(), config);
log.info("Creating writer for index: {}. Config: {}", definition.getIndexPath(), config);
PERF_LOGGER.end(start, -1, "Created IndexWriter for directory {}", definition);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@
public class LuceneDocumentMakerLargeStringPropertiesLogTest {

ListAppender<ILoggingEvent> listAppender = null;
private final String nodeImplLogger = LuceneDocumentMaker.class.getName();
private final String warnMessage = "String length: {} for property: {} at Node: {} is greater than configured value {}";
private String customStringPropertyThresholdLimit = "9";
private String smallStringProperty = "1234567";
private String largeStringPropertyAsPerCustomThreshold = "1234567890";
private static final String nodeImplLogger = LuceneDocumentMaker.class.getName();
private static final String warnMessage = "String length: {} for property: {} at Node: {} is greater than configured value {}";
private static final String customStringPropertyThresholdLimit = "9";
private static final String smallStringProperty = "1234567";
private static final String largeStringPropertyAsPerCustomThreshold = "1234567890";

@Rule
public TemporarySystemProperty temporarySystemProperty = new TemporarySystemProperty();
Expand Down Expand Up @@ -81,7 +81,7 @@ private LuceneDocumentMaker addPropertyAccordingToType(NodeBuilder test, Type ty
.analyzed()
.valueExcludedPrefixes("/jobs");

LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, builder.build(), "/foo").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, builder.build(), "/foo").build();
LuceneDocumentMaker docMaker = new LuceneDocumentMaker(defn,
defn.getApplicableIndexingRule("nt:base"), "/x");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.junit.Test;

import static java.util.Arrays.asList;
import java.util.List;

import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;

public class LuceneDocumentMakerTest {
private NodeState root = INITIAL_CONTENT;
private LuceneIndexDefinitionBuilder builder = new LuceneIndexDefinitionBuilder();
private final NodeState root = INITIAL_CONTENT;
private final LuceneIndexDefinitionBuilder builder = new LuceneIndexDefinitionBuilder();

@Test
public void excludeSingleProperty() throws Exception{
Expand All @@ -43,7 +44,7 @@ public void excludeSingleProperty() throws Exception{
.analyzed()
.valueExcludedPrefixes("/jobs");

LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, builder.build(), "/foo").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, builder.build(), "/foo").build();
LuceneDocumentMaker docMaker = new LuceneDocumentMaker(defn,
defn.getApplicableIndexingRule("nt:base"), "/x");

Expand All @@ -55,10 +56,10 @@ public void excludeSingleProperty() throws Exception{
test.setProperty("foo", "/jobs/a");
assertNull(docMaker.makeDocument(test.getNodeState()));

test.setProperty("foo", asList("/a", "/jobs/a"), Type.STRINGS);
test.setProperty("foo", List.of("/a", "/jobs/a"), Type.STRINGS);
assertNotNull(docMaker.makeDocument(test.getNodeState()));

test.setProperty("foo", asList("/jobs/a"), Type.STRINGS);
test.setProperty("foo", List.of("/jobs/a"), Type.STRINGS);
assertNull(docMaker.makeDocument(test.getNodeState()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ public class FSDirectoryFactoryTest {
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));

private NodeState root = INITIAL_CONTENT;
private NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();
private final NodeState root = INITIAL_CONTENT;
private final NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();

@Test
public void singleIndex() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, idx.getNodeState(), "/fooIndex").build();
FSDirectoryFactory factory = new FSDirectoryFactory(temporaryFolder.getRoot());

Directory dir = factory.newInstance(defn, idx, ":data", false);
Expand All @@ -64,8 +64,8 @@ public void singleIndex() throws Exception{

@Test
public void multiIndexWithSimilarPaths() throws Exception{
LuceneIndexDefinition defn1 = LuceneIndexDefinition.newBuilder(root, idx.getNodeState(), "/content/a/en_us/oak:index/fooIndex").build();
LuceneIndexDefinition defn2 = LuceneIndexDefinition.newBuilder(root, idx.getNodeState(), "/content/b/en_us/oak:index/fooIndex").build();
LuceneIndexDefinition defn1 = LuceneIndexDefinition.newLuceneBuilder(root, idx.getNodeState(), "/content/a/en_us/oak:index/fooIndex").build();
LuceneIndexDefinition defn2 = LuceneIndexDefinition.newLuceneBuilder(root, idx.getNodeState(), "/content/b/en_us/oak:index/fooIndex").build();

FSDirectoryFactory factory = new FSDirectoryFactory(temporaryFolder.getRoot());
factory.newInstance(defn1, idx, ":data", false).close();
Expand All @@ -81,7 +81,7 @@ public void multiIndexWithSimilarPaths() throws Exception{

@Test
public void reuseExistingDir() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, idx.getNodeState(), "/fooIndex").build();
FSDirectoryFactory factory = new FSDirectoryFactory(temporaryFolder.getRoot());

Directory dir = factory.newInstance(defn, idx, ":data", false);
Expand All @@ -98,7 +98,7 @@ public void reuseExistingDir() throws Exception{

@Test
public void directoryMapping() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(root, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(root, idx.getNodeState(), "/fooIndex").build();
FSDirectoryFactory factory = new FSDirectoryFactory(temporaryFolder.getRoot());

Directory dir1 = factory.newInstance(defn, idx, ":data", false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@

public class IndexConsistencyCheckerTest {

private NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();
private final NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private final NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();


@Rule
Expand Down Expand Up @@ -117,7 +117,7 @@ public void blobsWithSizeMismatch() throws Exception{

@Test
public void validIndexTest() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
Directory dir = new OakDirectory(idx, ":data", defn, false);
createIndex(dir, 10);

Expand All @@ -142,7 +142,7 @@ public void validIndexTest() throws Exception{

@Test
public void missingFile() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
Directory dir = new OakDirectory(idx, ":data", defn, false);
createIndex(dir, 10);

Expand All @@ -164,7 +164,7 @@ public void missingFile() throws Exception{

@Test
public void badFile() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
Directory dir = new OakDirectory(idx, ":data", defn, false);
createIndex(dir, 10);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@
import static org.junit.Assert.assertNotNull;

public class LuceneIndexDumperTest {
private NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();
private final NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private final NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();

@Rule
public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));

@Test
public void directoryDump() throws Exception{
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, idx.getNodeState(), "/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, idx.getNodeState(), "/fooIndex").build();

long size = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@
import static org.junit.Assert.*;

public class LuceneIndexImporterTest {
private NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();
private final NodeState rootState = InitialContentHelper.INITIAL_CONTENT;
private final NodeBuilder idx = new LuceneIndexDefinitionBuilder().build().builder();

@Rule
public final TemporaryFolder temporaryFolder = new TemporaryFolder(new File("target"));

@Test
public void exportAndImport() throws Exception{
NodeState baseIndexState = idx.getNodeState();
LuceneIndexDefinition defn = LuceneIndexDefinition.newBuilder(rootState, baseIndexState, "/oak:index/fooIndex").build();
LuceneIndexDefinition defn = LuceneIndexDefinition.newLuceneBuilder(rootState, baseIndexState, "/oak:index/fooIndex").build();

LuceneIndexEditorContext.configureUniqueId(idx);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
Expand Down Expand Up @@ -207,15 +209,15 @@ public static NodeBuilder childBuilder(NodeBuilder nb, String path, boolean crea
return nb;
}

public Set<IndexDefinition> getIndexDefinitions() throws IOException, CommitFailedException {
public List<IndexDefinition> getIndexDefinitions() throws IOException, CommitFailedException {
NodeState checkpointedState = this.retrieveNodeStateForCheckpoint();
NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
NodeBuilder builder = copyOnWriteStore.getRoot().builder();
NodeState root = builder.getNodeState();
this.updateIndexDefinitions(builder);
IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder();
IndexDefinition.BaseBuilder indexDefBuilder = new IndexDefinition.BaseBuilder();

Set<IndexDefinition> indexDefinitions = new HashSet<>();
ArrayList<IndexDefinition> indexDefinitions = new ArrayList<>();

for (String indexPath : indexHelper.getIndexPaths()) {
NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false);
Expand All @@ -226,10 +228,9 @@ public Set<IndexDefinition> getIndexDefinitions() throws IOException, CommitFail
}

/**
* @param indexDefinitions
* @return set of preferred path elements referred from the given set of index definitions.
*/
public Set<String> getPreferredPathElements(Set<IndexDefinition> indexDefinitions) {
public Set<String> getPreferredPathElements(List<IndexDefinition> indexDefinitions) {
Set<String> preferredPathElements = new HashSet<>();
for (IndexDefinition indexDf : indexDefinitions) {
preferredPathElements.addAll(indexDf.getRelativeNodeNames());
Expand All @@ -240,17 +241,15 @@ public Set<String> getPreferredPathElements(Set<IndexDefinition> indexDefinition
/**
* @param indexDefinitions set of IndexDefinition to be used to calculate the Path Predicate
* @param typeToRepositoryPath Function to convert type <T> to valid repository path of type <String>
* @param <T>
* @return filter predicate based on the include/exclude path rules of the given set of index definitions.
*/
public <T> Predicate<T> getFilterPredicate(Set<IndexDefinition> indexDefinitions, Function<T, String> typeToRepositoryPath) {
public <T> Predicate<T> getFilterPredicate(List<IndexDefinition> indexDefinitions, Function<T, String> typeToRepositoryPath) {
return t -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(typeToRepositoryPath.apply(t)) != PathFilter.Result.EXCLUDE);
}

/**
* @param pattern Pattern for a custom excludes regex based on which paths would be filtered out
* @param typeToRepositoryPath Function to convert type <T> to valid repository path of type <String>
* @param <T>
* @return Return a predicate that should test true for all paths that do not match the provided regex pattern.
*/
public <T> Predicate<T> getFilterPredicateBasedOnCustomRegex(Pattern pattern, Function<T, String> typeToRepositoryPath) {
Expand Down
Loading

0 comments on commit 2ddb40a

Please sign in to comment.