Skip to content

Commit

Permalink
Merge pull request #27 from pitangainnovare/impl/top100
Browse files Browse the repository at this point in the history
Cria índice metrics para apoiar SciELO Analytics
  • Loading branch information
pitangainnovare authored Jul 17, 2024
2 parents 17ce0c2 + dfc76ae commit 6c9bc37
Show file tree
Hide file tree
Showing 25 changed files with 635 additions and 25 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Solr
index/usage/data
index/data/usage/data/
index/data/log_manager/data
index/data/metrics/data
index/logs

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
7 changes: 4 additions & 3 deletions config/menu.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
WAGTAIL_MENU_APPS_ORDER = {
"collection": 100,
"log_manager": 200,
"tasks": 300,
"unexpected-error": 400,
"metrics": 200,
"log_manager": 300,
"tasks": 400,
"unexpected-error": 500,
}

def get_menu_order(app_name):
Expand Down
29 changes: 22 additions & 7 deletions config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
"collection",
"core",
"log_manager",
"metrics",
"tracker",
]

Expand Down Expand Up @@ -403,7 +404,21 @@
HAYSTACK_CONNECTIONS = {
"default": {
"ENGINE": "haystack.backends.solr_backend.SolrEngine",
"URL": "%s%s" % (env("SOLR_URL", default="http://solr:8983/solr/"), "usage"),
"URL": "%s%s" % (env("SOLR_URL", default="http://solr:8983/solr/"), "metrics"),
"ADMIN_URL": "http://solr:8983/solr/admin/cores",
"SILENTLY_FAIL": False,
"SOLR_TIMEOUT": 10,
},
"metrics": {
"ENGINE": "haystack.backends.solr_backend.SolrEngine",
"URL": "%s%s" % (env("SOLR_URL", default="http://solr:8983/solr/"), "metrics"),
"ADMIN_URL": "http://solr:8983/solr/admin/cores",
"SILENTLY_FAIL": False,
"SOLR_TIMEOUT": 10,
},
"log_manager": {
"ENGINE": "haystack.backends.solr_backend.SolrEngine",
"URL": "%s%s" % (env("SOLR_URL", default="http://solr:8983/solr/"), "log_manager"),
"ADMIN_URL": "http://solr:8983/solr/admin/cores",
"SILENTLY_FAIL": False,
"SOLR_TIMEOUT": 10,
Expand All @@ -421,10 +436,10 @@

SEARCH_PAGINATION_ITEMS_PER_PAGE = 10

SEARCH_FACET_ITEMS_PER_MORE = 5
# SEARCH_FACET_ITEMS_PER_MORE = 5

SEARCH_FACET_LIST = [
"server_time",
"action_name",
"browser_name",
]
# SEARCH_FACET_LIST = [
# "server_time",
# "action_name",
# "browser_name",
# ]
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="congre" version="1.5">
<schema name="log_manager" version="1.5">

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
Expand Down
File renamed without changes.
6 changes: 6 additions & 0 deletions index/data/log_manager/core.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#Written by CorePropertiesLocator
#Fri Mar 29 13:50:09 UTC 2024
schema=schema.xml
dataDir=data
name=log_manager
config=solrconfig.xml
108 changes: 108 additions & 0 deletions index/data/metrics/conf/schema.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="metrics" version="1.5">

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />

<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>

<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>

<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>

<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>

<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>

<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

<fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>

<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>

<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>

<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>

<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
</types>

<fields>
<field name="_version_" type="long" indexed="true" stored="true"/>

<!-- id unique field -->
<field name="id" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- default search -->
<field name="text" type="text" indexed="true" stored="true"/>
<field name="tw" type="text" indexed="true" stored="false" multiValued="true"/>

<!-- haystack -->
<field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- type of solr document: top100articles, tr_j1, tr_j4, ir_a1, ir_a4, gr_j1, gr_j4, lr_j1, lr_j4, cr_j1 -->
<fieldn name="metric_scope" type="string" index="true" stored="true" multiValued="false"/>

<!-- COUNTER R5 metrics -->
<field name="total_item_requests" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="total_item_investigations" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="unique_item_requests" type="int" indexed="true" stored="true" multiValued="false"/>
<field name="unique_item_investigations" type="int" indexed="true" stored="true" multiValued="false"/>

<!-- date of access -->
<field name="year_month_day" type="date" indexed="true" stored="true" multiValued="false"/>

<!-- country of origin of access -->
<field name="country" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- language of the accessed document -->
<field name="document_language" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- collection -->
<field name="collection" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- journal -->
<field name="key_issn" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- document identifier -->
<field name="pid" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- year of publication -->
<field name="yop" type="string" indexed="true" stored="true" multiValued="false"/>

<field name="priority" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- dynamic field definitions -->
<dynamicField name="*" type="text" indexed="true" stored="true" multiValued="false"/>

</fields>

<uniqueKey>id</uniqueKey>
<copyField source="text" dest="tw"/>
<copyField source="*" dest="tw"/>

</schema>
98 changes: 98 additions & 0 deletions index/data/metrics/conf/solrconfig.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?xml version="1.0" encoding="UTF-8" ?>

<!--
~ Copyright (c) 2013-2022. LA Referencia / Red CLARA and others
~
~ This program is free software: you can redistribute it and/or modify
~ it under the terms of the GNU Affero General Public License as published by
~ the Free Software Foundation, either version 3 of the License, or
~ (at your option) any later version.
~
~ This program is distributed in the hope that it will be useful,
~ but WITHOUT ANY WARRANTY; without even the implied warranty of
~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
~ GNU Affero General Public License for more details.
~
~ You should have received a copy of the GNU Affero General Public License
~ along with this program. If not, see <http://www.gnu.org/licenses/>.
~
~ This file is part of LA Referencia software platform LRHarvester v4.x
~ For any further information please contact Lautaro Matas <[email protected]>
-->

<!--
This is a stripped down config file used for a simple example...
It is *not* a good example to work from.
-->
<config>
<luceneMatchVersion>LUCENE_42</luceneMatchVersion>
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory, the default, is filesystem based.
solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

<dataDir>${solr.core0.data.dir:}</dataDir>

<!-- To enable dynamic schema REST APIs, use the following for <schemaFactory>:
<schemaFactory class="ManagedIndexSchemaFactory">
<bool name="mutable">true</bool>
<str name="managedSchemaResourceName">managed-schema</str>
</schemaFactory>
When ManagedIndexSchemaFactory is specified, Solr will load the schema from
he resource named in 'managedSchemaResourceName', rather than from schema.xml.
Note that the managed schema resource CANNOT be named schema.xml. If the managed
schema does not exist, Solr will create it after reading schema.xml, then rename
'schema.xml' to 'schema.xml.bak'.
Do NOT hand edit the managed schema - external modifications will be ignored and
overwritten as a result of schema modification REST API calls.
When ManagedIndexSchemaFactory is specified with mutable = true, schema
modification REST API calls will be allowed; otherwise, error responses will be
sent back for these requests.
-->
<schemaFactory class="ClassicIndexSchemaFactory"/>

<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.core0.data.dir:}</str>
</updateLog>
</updateHandler>

<!-- realtime get handler, guaranteed to return the latest stored fields
of any document, without the need to commit or open a new searcher. The current
implementation relies on the updateLog feature being enabled. -->
<requestHandler name="/get" class="solr.RealTimeGetHandler">
<lst name="defaults">
<str name="omitHeader">true</str>
</lst>
</requestHandler>

<requestHandler name="/replication" class="solr.ReplicationHandler" startup="lazy" />

<requestDispatcher handleSelect="true" >
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" formdataUploadLimitInKB="2048" />
</requestDispatcher>

<requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
<requestHandler name="/analysis/field" startup="lazy" class="solr.FieldAnalysisRequestHandler" />
<requestHandler name="/update" class="solr.UpdateRequestHandler" />

<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">solrpingquery</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>

<!-- config for the admin interface -->
<admin>
<defaultQuery>solr</defaultQuery>
</admin>

</config>

Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
#Fri Mar 29 13:50:09 UTC 2024
schema=schema.xml
dataDir=data
name=usage
name=metrics
config=solrconfig.xml
8 changes: 4 additions & 4 deletions local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ services:
- solr
volumes:
- .:/app:z
- ../scms_data/usage/log_manager:/data
- ../scms_data/usage/data:/data
env_file:
- ./.envs/.local/.django
- ./.envs/.local/.postgres
Expand Down Expand Up @@ -60,7 +60,7 @@ services:
env_file:
- ./.envs/.local/.minio
volumes:
- ../scms_data/usage/minio:/data
- ../scms_data/minio:/data
command: server --console-address ":9001" /data

postgres:
Expand All @@ -70,8 +70,8 @@ services:
image: scielo_usage_local_postgres
container_name: scielo_usage_local_postgres
volumes:
- ../scms_data/usage/postgresql:/var/lib/postgresql/data:Z
- ../scms_data/usage/postgresql_backup:/backups:z
- ../scms_data/postgresql:/var/lib/postgresql/data:Z
- ../scms_data/postgresql_backup:/backups:z
ports:
- "5439:5432"
env_file:
Expand Down
4 changes: 0 additions & 4 deletions log_manager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,27 +268,23 @@ class CollectionLogFileDateCount(CommonControlField):

year = models.IntegerField(
_('Year'),
max_length=4,
null=False,
blank=False,
)

month = models.IntegerField(
_('Month'),
max_length=2,
null=False,
blank=False,
)

found_log_files = models.IntegerField(
verbose_name=_('Number of Found Valid Log Files'),
max_length=8,
default=0,
)

expected_log_files = models.IntegerField(
verbose_name=_('Number of Expected Valid Log Files'),
max_length=8,
blank=True,
null=True,
)
Expand Down
2 changes: 1 addition & 1 deletion log_manager/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ def get_model(self):
return LogProcessedRow

def index_queryset(self, using=None):
return self.get_model().objects.filter(log_file__collection__acron2='wi').all()
return self.get_model().objects.all()
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
{{ object.title }}
{{ object.body }}
{{server_time}}
{{browser_name}}
{{browser_version}}
{{ip}}
{{latitude}}
{{longitude}}
{{action_name}}
Empty file added metrics/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions metrics/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from django.contrib import admin

# Register your models here.
6 changes: 6 additions & 0 deletions metrics/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class MetricsConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "metrics"
Empty file added metrics/migrations/__init__.py
Empty file.
Loading

0 comments on commit 6c9bc37

Please sign in to comment.