diff --git a/project/build.properties b/project/build.properties index 6adcdc7..5a9ed92 100755 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.3.3 +sbt.version=1.3.4 diff --git a/src/main/scala/org/bireme/sd/IndexTest.scala b/src/main/scala/org/bireme/sd/IndexTest.scala new file mode 100644 index 0000000..4cf8ff4 --- /dev/null +++ b/src/main/scala/org/bireme/sd/IndexTest.scala @@ -0,0 +1,71 @@ +package org.bireme.sd + +import collection.JavaConverters._ + +import java.io.File + +import org.apache.lucene.index.{DirectoryReader, IndexReader, Term} +import org.apache.lucene.search.{IndexSearcher, TermQuery, TopDocs} +import org.apache.lucene.store.FSDirectory + +import scala.util.{Failure, Success, Try} + +object IndexTest extends App { + private def usage(): Unit = { + Console.err.println("usage: IndexTest\n" + + "\t\t - path to Lucene index\n" + + "\t\t - document field\n" + + "\t\t[] - term used to verify the number of hits. Default is 'dengue'") + System.exit(0) + } + + val size = args.length + if (size < 2) usage() + + val term = if (size > 2) args(2) else "dengue" + + + val hits = Try[Int] { + val directory: FSDirectory = FSDirectory.open(new File(args(0)).toPath) + val ireader: DirectoryReader = DirectoryReader.open(directory) + val hitNum: Int = checkDocs(new Term(args(1), term), ireader) + + ireader.close() + directory.close() + + hitNum + } match { + case Success(h) => h + case Failure(_) => 0 + } +//println(s"hits=$hits") + val retValue = if (hits > 255) 255 else hits + + System.exit(retValue) // Value exit values 8 bits + + private def checkDocs(term: Term, + ireader: IndexReader): Int = { + require(term != null) + require(ireader != null) + + val numDocs = ireader.numDocs() + if (numDocs <= 0) throw new Exception("numDocs <= 0") + + val query = new TermQuery(term) + val isearcher = new IndexSearcher(ireader) + val topDocs: TopDocs = isearcher.search(query, 1000) + //val totalHits: Long = topDocs.totalHits.value Lucene 8.0.0 + val totalHits: Long = topDocs.totalHits + + if (totalHits <= 0) throw new Exception("totalHits <= 0") + + topDocs.scoreDocs.foreach { + scoreDoc => + val doc = ireader.document(scoreDoc.doc) + doc.getFields().asScala.map(_.stringValue()) + } + + totalHits.toInt + } +} + diff --git a/src/main/scala/org/bireme/sd/LuceneIndexAkka.scala b/src/main/scala/org/bireme/sd/LuceneIndexAkka.scala index 646cb8b..a41f214 100755 --- a/src/main/scala/org/bireme/sd/LuceneIndexAkka.scala +++ b/src/main/scala/org/bireme/sd/LuceneIndexAkka.scala @@ -73,7 +73,7 @@ class LuceneIndexMain(indexPath: String, val lastModifiedDoc: MVMap[String, Long] = docLastModified.openMap("modDoc") if (fullIndexing) lastModifiedDoc.clear() - val excludeDays: Int = 2 // Number of days to remove from the last iahx update day + val excludeDays: Int = 20 // Number of days to remove from the last iahx update day val excludeTime: Long = excludeDays * 24 * 60 * 60 * 1000 // excludeDays in miliseconds val lastIahxModification: Long = getIahxModification.getOrElse(new Date().getTime - excludeTime) diff --git a/src/main/scala/org/bireme/sd/service/Conf.scala b/src/main/scala/org/bireme/sd/service/Conf.scala index 5411870..79613b7 100755 --- a/src/main/scala/org/bireme/sd/service/Conf.scala +++ b/src/main/scala/org/bireme/sd/service/Conf.scala @@ -15,7 +15,7 @@ object Conf { val maxDocs: Int = 10 // Maximum number of documents to be pre-processed or retrieved - val lastDays: Option[Int] = Some(7) // Update only docs that are younger (entrance_date flag) than 'lastDays' days" + val lastDays: Option[Int] = Some(40) // Update only docs that are younger (entrance_date flag) than 'lastDays' days" val sources: Option[Set[String]] = Some(Set("MEDLINE", "LILACS", "LIS", "colecionaSUS")) // Update only docs whose field 'db' belongs to sources"